Merge branch 'master' of https://github.com/jgm/pandoc

author: Igor Pashev <pashev.igor@gmail.com> 2021-07-17 18:10:34 +0200
committer: Igor Pashev <pashev.igor@gmail.com> 2021-07-17 18:46:16 +0200
commit: 48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree: 1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc
parent: 0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent: 46099e79defe662e541b12548200caf29063c1c6 (diff)
download: pandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz
197 files changed, 11652 insertions, 8040 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index e6d5c93d4..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -1,9 +1,10 @@
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE CPP                 #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -19,6 +20,7 @@ module Text.Pandoc.App (
           , Filter(..)
           , defaultOpts
           , parseOptions
+          , parseOptionsFromArgs
           , options
           , applyFilters
           ) where
@@ -27,6 +29,7 @@ import Control.Monad ( (>=>), when )
 import Control.Monad.Trans ( MonadIO(..) )
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as B8
 import qualified Data.ByteString.Lazy as BL
 import Data.Char (toLower)
 import Data.Maybe (fromMaybe, isJust, isNothing)
@@ -44,19 +47,21 @@ import System.FilePath ( takeBaseName, takeExtension )
 import System.IO (nativeNewline, stdout)
 import qualified System.IO as IO (Newline (..))
 import Text.Pandoc
+import Text.Pandoc.Builder (setMeta)
+import Text.Pandoc.MIME (getCharset)
 import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
 import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
-                            IpynbOutput (..) )
-import Text.Pandoc.App.CommandLineOptions (parseOptions, options)
+                            IpynbOutput (..))
+import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
+                                           options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
-import Text.Pandoc.Builder (setMeta)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
-import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
+import Text.Pandoc.SelfContained (makeSelfContained)
 import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
          headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
-         defaultUserDataDirs, tshow, findM)
+         defaultUserDataDir, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString)
 import Text.Pandoc.Readers.Markdown (yamlToMeta)
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -67,25 +72,29 @@ import System.Posix.Terminal (queryTerminal)
 
 convertWithOpts :: Opt -> IO ()
 convertWithOpts opts = do
+  datadir <- case optDataDir opts of
+                  Nothing   -> do
+                    d <- defaultUserDataDir
+                    exists <- doesDirectoryExist d
+                    return $ if exists
+                                then Just d
+                                else Nothing
+                  Just _    -> return $ optDataDir opts
+
   let outputFile = fromMaybe "-" (optOutputFile opts)
   let filters = optFilters opts
   let verbosity = optVerbosity opts
 
   when (optDumpArgs opts) $
-    do UTF8.hPutStrLn stdout outputFile
-       mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
+    do UTF8.hPutStrLn stdout (T.pack outputFile)
+       mapM_ (UTF8.hPutStrLn stdout . T.pack)
+             (fromMaybe ["-"] $ optInputFiles opts)
        exitSuccess
 
   let sources = case optInputFiles opts of
                      Just xs | not (optIgnoreArgs opts) -> xs
                      _ -> ["-"]
 
-  datadir <- case optDataDir opts of
-                  Nothing   -> do
-                    ds <- defaultUserDataDirs
-                    findM doesDirectoryExist ds
-                  Just _    -> return $ optDataDir opts
-
   let runIO' :: PandocIO a -> IO a
       runIO' f = do
         (res, reports) <- runIOorExplode $ do
@@ -151,9 +160,11 @@ convertWithOpts opts = do
                                     else optTabStop opts)
 
 
-    let readSources :: [FilePath] -> PandocIO Text
-        readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
-                              mapM readSource srcs
+    let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+        readSources srcs =
+          mapM (\fp -> do
+                   t <- readSource fp
+                   return (if fp == "-" then "" else fp, convertTabs t)) srcs
 
 
     outputSettings <- optToOutputSettings opts
@@ -190,20 +201,9 @@ convertWithOpts opts = do
                     Nothing -> readDataFile "abbreviations"
                     Just f  -> readFileStrict f
 
-    metadata <- if format == "jats" &&
-                   isNothing (lookupMeta "csl" (optMetadata opts)) &&
-                   isNothing (lookupMeta "citation-style"
-                                               (optMetadata opts))
-                   then do
-                     jatsCSL <- readDataFile "jats.csl"
-                     let jatsEncoded = makeDataURI
-                                         ("application/xml", jatsCSL)
-                     return $ setMeta "csl" jatsEncoded $ optMetadata opts
-                   else return $ optMetadata opts
-
     case lookupMetaString "lang" (optMetadata opts) of
-           ""      -> setTranslations $ Lang "en" "" "US" []
-           l       -> case parseBCP47 l of
+           ""      -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+           l       -> case parseLang l of
                            Left _   -> report $ InvalidLang l
                            Right l' -> setTranslations l'
 
@@ -257,13 +257,17 @@ convertWithOpts opts = do
     let sourceToDoc :: [FilePath] -> PandocIO Pandoc
         sourceToDoc sources' =
            case reader of
-                TextReader r
-                  | optFileScope opts || readerNameBase == "json" ->
-                      mconcat <$> mapM (readSource >=> r readerOpts) sources'
-                  | otherwise ->
-                      readSources sources' >>= r readerOpts
-                ByteStringReader r ->
-                  mconcat <$> mapM (readFile' >=> r readerOpts) sources'
+             TextReader r
+               | readerNameBase == "json" ->
+                   mconcat <$> mapM (readSource >=> r readerOpts) sources'
+               | optFileScope opts  ->
+                   -- Read source and convert tabs (see #6709)
+                   let readSource' = fmap convertTabs . readSource
+                   in mconcat <$> mapM (readSource' >=> r readerOpts) sources'
+               | otherwise ->
+                   readSources sources' >>= r readerOpts
+             ByteStringReader r ->
+               mconcat <$> mapM (readFile' >=> r readerOpts) sources'
 
 
     when (readerNameBase == "markdown_github" ||
@@ -281,12 +285,21 @@ convertWithOpts opts = do
       report $ Deprecated "pandoc-citeproc filter"
                "Use --citeproc instead."
 
+    let cslMetadata =
+          maybe id (setMeta "csl") (optCSL opts) .
+          (case optBibliography opts of
+             [] -> id
+             xs -> setMeta "bibliography" xs) .
+          maybe id (setMeta "citation-abbreviations")
+                         (optCitationAbbreviations opts) $ mempty
+
     doc <- sourceToDoc sources >>=
               (   (if isJust (optExtractMedia opts)
                       then fillMediaBag
                       else return)
               >=> return . adjustMetadata (metadataFromFile <>)
-              >=> return . adjustMetadata (<> metadata)
+              >=> return . adjustMetadata (<> optMetadata opts)
+              >=> return . adjustMetadata (<> cslMetadata)
               >=> applyTransforms transforms
               >=> applyFilters readerOpts filters [T.unpack format]
               >=> maybe return extractMedia (optExtractMedia opts)
@@ -353,7 +366,18 @@ readSource src = case parseURI src of
                          _ -> PandocAppError (tshow e))
 
 readURI :: FilePath -> PandocIO Text
-readURI src = UTF8.toText . fst <$> openURL (T.pack src)
+readURI src = do
+  (bs, mt) <- openURL (T.pack src)
+  case mt >>= getCharset of
+    Just "UTF-8"      -> return $ UTF8.toText bs
+    Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
+    Just charset      -> throwError $ PandocUnsupportedCharsetError charset
+    Nothing           -> liftIO $ -- try first as UTF-8, then as latin1
+                          E.catch (return $! UTF8.toText bs)
+                                  (\case
+                                      TSE.DecodeError{} ->
+                                        return $ T.pack $ B8.unpack bs
+                                      e -> E.throwIO e)
 
 readFile' :: MonadIO m => FilePath -> m BL.ByteString
 readFile' "-" = liftIO BL.getContents
@@ -364,6 +388,5 @@ writeFnBinary "-" = liftIO . BL.putStr
 writeFnBinary f   = liftIO . BL.writeFile (UTF8.encodePath f)
 
 writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
--- TODO this implementation isn't maximally efficient:
-writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
-writerFn eol f   = liftIO . UTF8.writeFileWith eol f . T.unpack
+writerFn eol "-" = liftIO . UTF8.putStrWith eol
+writerFn eol f   = liftIO . UTF8.writeFileWith eol f
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 906fcc4c0..a6df12715 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.CommandLineOptions
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -17,6 +17,7 @@ Does a pandoc conversion based on command-line options.
 -}
 module Text.Pandoc.App.CommandLineOptions (
             parseOptions
+          , parseOptionsFromArgs
           , options
           , engines
           , lookupHighlightStyle
@@ -25,11 +26,12 @@ module Text.Pandoc.App.CommandLineOptions (
 import Control.Monad
 import Control.Monad.Trans
 import Control.Monad.Except (throwError)
+import Control.Monad.State.Strict
 import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
          defConfig, Indent(..), NumberFormat(..))
 import Data.Bifunctor (second)
 import Data.Char (toLower)
-import Data.List (intercalate, sort)
+import Data.List (intercalate, sort, foldl')
 #ifdef _WINDOWS
 #if MIN_VERSION_base(4,12,0)
 import Data.List (isPrefixOf)
@@ -46,10 +48,13 @@ import System.FilePath
 import System.IO (stdout)
 import Text.DocTemplates (Context (..), ToContext (toVal), Val (..))
 import Text.Pandoc
-import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), addMeta)
+import Text.Pandoc.Builder (setMeta)
+import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..),
+                            DefaultsState (..), applyDefaults,
+                            fullDefaultsPath)
 import Text.Pandoc.Filter (Filter (..))
 import Text.Pandoc.Highlighting (highlightingStyles)
-import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs, findM)
+import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDir)
 import Text.Printf
 
 #ifdef EMBED_DATA_FILES
@@ -64,16 +69,19 @@ import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
 import qualified Data.Text as T
-import qualified Data.YAML as Y
 import qualified Text.Pandoc.UTF8 as UTF8
 
 parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
 parseOptions options' defaults = do
   rawArgs <- map UTF8.decodeArg <$> getArgs
   prg <- getProgName
+  parseOptionsFromArgs options' defaults prg rawArgs
 
+parseOptionsFromArgs
+  :: [OptDescr (Opt -> IO Opt)] -> Opt -> String -> [String] -> IO Opt
+parseOptionsFromArgs options' defaults prg rawArgs = do
   let (actions, args, unrecognizedOpts, errors) =
-           getOpt' Permute options' rawArgs
+           getOpt' Permute options' (map UTF8.decodeArg rawArgs)
 
   let unknownOptionErrors =
        foldr (handleUnrecognizedOption . takeWhile (/= '=')) []
@@ -85,7 +93,7 @@ parseOptions options' defaults = do
         ("Try " ++ prg ++ " --help for more information.")
 
   -- thread option data structure through all supplied option actions
-  opts <- foldl (>>=) (return defaults) actions
+  opts <- foldl' (>>=) (return defaults) actions
   let mbArgs = case args of
                  [] -> Nothing
                  xs -> Just xs
@@ -166,7 +174,11 @@ options =
 
     , Option "d" ["defaults"]
                  (ReqArg
-                  (\arg opt -> applyDefaults opt arg
+                  (\arg opt -> runIOorExplode $ do
+                     let defsState = DefaultsState { curDefaults = Nothing,
+                                                     inheritanceGraph = [] }
+                     fp <- fullDefaultsPath (optDataDir opt) arg
+                     evalStateT (applyDefaults opt fp) defsState
                   )
                   "FILE")
                 ""
@@ -276,7 +288,8 @@ options =
     , Option "" ["resource-path"]
                 (ReqArg
                   (\arg opt -> return opt { optResourcePath =
-                                   splitSearchPath arg })
+                                   splitSearchPath arg ++
+                                    optResourcePath opt })
                    "SEARCHPATH")
                   "" -- "Paths to search for images and other resources"
 
@@ -801,10 +814,10 @@ options =
                            map (\c -> ['-',c]) shorts ++
                            map ("--" ++) longs
                      let allopts = unwords (concatMap optnames options)
-                     UTF8.hPutStrLn stdout $ printf tpl allopts
-                         (unwords readersNames)
-                         (unwords writersNames)
-                         (unwords $ map (T.unpack . fst) highlightingStyles)
+                     UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
+                         (T.unpack $ T.unwords readersNames)
+                         (T.unpack $ T.unwords writersNames)
+                         (T.unpack $ T.unwords $ map fst highlightingStyles)
                          (unwords datafiles)
                      exitSuccess ))
                  "" -- "Print bash completion script"
@@ -843,7 +856,7 @@ options =
                                else if extensionEnabled x allExts
                                        then '-'
                                        else ' ') : drop 4 (show x)
-                     mapM_ (UTF8.hPutStrLn stdout . showExt)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
                        [ex | ex <- extList, extensionEnabled ex allExts]
                      exitSuccess )
                   "FORMAT")
@@ -857,14 +870,14 @@ options =
                                  , sShortname s `notElem`
                                     [T.pack "Alert", T.pack "Alert_indent"]
                                  ]
-                     mapM_ (UTF8.hPutStrLn stdout) (sort langs)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
                      exitSuccess ))
                  ""
 
     , Option "" ["list-highlight-styles"]
                  (NoArg
                   (\_ -> do
-                     mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
+                     mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
                      exitSuccess ))
                  ""
 
@@ -882,7 +895,7 @@ options =
                             | T.null t -> -- e.g. for docx, odt, json:
                                 E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
                                   ("templates/default." ++ arg)
-                            | otherwise -> write . T.unpack $ t
+                            | otherwise -> write t
                           Left e  -> E.throwIO e
                      exitSuccess)
                   "FORMAT")
@@ -928,12 +941,13 @@ options =
                  (NoArg
                   (\_ -> do
                      prg <- getProgName
-                     defaultDatadirs <- defaultUserDataDirs
-                     UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
-                       compileInfo ++
-                       "\nUser data directory: " ++
-                       intercalate " or " defaultDatadirs ++
-                       ('\n':copyrightMessage))
+                     defaultDatadir <- defaultUserDataDir
+                     UTF8.hPutStrLn stdout
+                      $ T.pack
+                      $ prg ++ " " ++ T.unpack pandocVersion ++
+                        compileInfo ++
+                        "\nUser data directory: " ++ defaultDatadir ++
+                        ('\n':copyrightMessage)
                      exitSuccess ))
                  "" -- "Print version"
 
@@ -941,7 +955,7 @@ options =
                  (NoArg
                   (\_ -> do
                      prg <- getProgName
-                     UTF8.hPutStr stdout (usageMessage prg options)
+                     UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
                      exitSuccess ))
                  "" -- "Show help"
     ]
@@ -962,7 +976,7 @@ usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
 
 copyrightMessage :: String
 copyrightMessage = intercalate "\n" [
- "Copyright (C) 2006-2020 John MacFarlane. Web:  https://pandoc.org",
+ "Copyright (C) 2006-2021 John MacFarlane. Web:  https://pandoc.org",
  "This is free software; see the source for copying conditions. There is no",
  "warranty, not even for merchantability or fitness for a particular purpose." ]
 
@@ -1002,38 +1016,16 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
 handleUnrecognizedOption x =
   (("Unknown option " ++ x ++ ".") :)
 
-readersNames :: [String]
-readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
+readersNames :: [Text]
+readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))
 
-writersNames :: [String]
+writersNames :: [Text]
 writersNames = sort
-  ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
+  ("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))
 
 splitField :: String -> (String, String)
 splitField = second (tailDef "true") . break (`elemText` ":=")
 
--- | Apply defaults from --defaults file.
-applyDefaults :: Opt -> FilePath -> IO Opt
-applyDefaults opt file = runIOorExplode $ do
-  let fp = if null (takeExtension file)
-              then addExtension file "yaml"
-              else file
-  setVerbosity $ optVerbosity opt
-  dataDirs <- liftIO defaultUserDataDirs
-  let fps = fp : case optDataDir opt of
-              Nothing -> map (</> ("defaults" </> fp))
-                               dataDirs
-              Just dd -> [dd </> "defaults" </> fp]
-  fp' <- fromMaybe fp <$> findM fileExists fps
-  inp <- readFileLazy fp'
-  case Y.decode1 inp of
-      Right (f :: Opt -> Opt) -> return $ f opt
-      Left (errpos, errmsg)  -> throwError $
-         PandocParseError $ T.pack $
-         "Error parsing " ++ fp' ++ " line " ++
-          show (Y.posLine errpos) ++ " column " ++
-          show (Y.posColumn errpos) ++ ":\n" ++ errmsg
-
 lookupHighlightStyle :: PandocMonad m => String -> m Style
 lookupHighlightStyle s
   | takeExtension s == ".theme" = -- attempt to load KDE theme
@@ -1062,6 +1054,27 @@ setVariable key val (Context ctx) = Context $ M.alter go key ctx
         go (Just (ListVal xs)) = Just $ ListVal $ xs ++ [toVal val]
         go (Just x)            = Just $ ListVal [x, toVal val]
 
+addMeta :: String -> String -> Meta -> Meta
+addMeta k v meta =
+  case lookupMeta k' meta of
+       Nothing -> setMeta k' v' meta
+       Just (MetaList xs) ->
+                  setMeta k' (MetaList (xs ++ [v'])) meta
+       Just x  -> setMeta k' (MetaList [x, v']) meta
+ where
+  v' = readMetaValue v
+  k' = T.pack k
+
+readMetaValue :: String -> MetaValue
+readMetaValue s
+  | s == "true"  = MetaBool True
+  | s == "True"  = MetaBool True
+  | s == "TRUE"  = MetaBool True
+  | s == "false" = MetaBool False
+  | s == "False" = MetaBool False
+  | s == "FALSE" = MetaBool False
+  | otherwise    = MetaString $ T.pack s
+
 -- On Windows with ghc 8.6+, we need to rewrite paths
 -- beginning with \\ to \\?\UNC\. -- See #5127.
 normalizePath :: FilePath -> FilePath
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 155b7e586..bdf8c6667 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.App.FormatHeuristics
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -15,18 +15,24 @@ module Text.Pandoc.App.FormatHeuristics
   ) where
 
 import Data.Char (toLower)
+import Data.Foldable (asum)
 import Data.Text (Text)
 import System.FilePath (takeExtension)
 
--- Determine default format based on file extensions.
+-- | Determines default format based on file extensions; uses the format
+-- of the first extension that's associated with a format.
+--
+-- Examples:
+--
+-- > formatFromFilePaths ["text.unknown", "no-extension"]
+-- Nothing
+--
+-- > formatFromFilePaths ["my.md", "other.rst"]
+-- Just "markdown"
 formatFromFilePaths :: [FilePath] -> Maybe Text
-formatFromFilePaths [] = Nothing
-formatFromFilePaths (x:xs) =
-  case formatFromFilePath x of
-    Just f     -> Just f
-    Nothing    -> formatFromFilePaths xs
+formatFromFilePaths = asum . map formatFromFilePath
 
--- Determine format based on file extension
+-- | Determines format based on file extension.
 formatFromFilePath :: FilePath -> Maybe Text
 formatFromFilePath x =
   case takeExtension (map toLower x) of
@@ -48,6 +54,11 @@ formatFromFilePath x =
     ".lhs"      -> Just "markdown+lhs"
     ".ltx"      -> Just "latex"
     ".markdown" -> Just "markdown"
+    ".mkdn"     -> Just "markdown"
+    ".mkd"      -> Just "markdown"
+    ".mdwn"     -> Just "markdown"
+    ".mdown"    -> Just "markdown"
+    ".Rmd"      -> Just "markdown"
     ".md"       -> Just "markdown"
     ".ms"       -> Just "ms"
     ".muse"     -> Just "muse"
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 00b4b5523..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.Opt
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -20,21 +20,31 @@ module Text.Pandoc.App.Opt (
             Opt(..)
           , LineEnding (..)
           , IpynbOutput (..)
+          , DefaultsState (..)
           , defaultOpts
-          , addMeta
+          , applyDefaults
+          , fullDefaultsPath
           ) where
+import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM)
+import Control.Monad.State.Strict (StateT, modify, gets)
+import System.FilePath ( addExtension, (</>), takeExtension, takeDirectory )
+import System.Directory ( canonicalizePath )
 import Data.Char (isLower, toLower)
+import Data.Maybe (fromMaybe)
 import GHC.Generics hiding (Meta)
-import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.Filter (Filter (..))
-import Text.Pandoc.Logging (Verbosity (WARNING))
+import Text.Pandoc.Logging (Verbosity (WARNING), LogMessage(..))
 import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault),
                             TrackChanges (AcceptChanges),
                             WrapOption (WrapAuto), HTMLMathMethod (PlainMath),
                             ReferenceLocation (EndOfDocument),
                             ObfuscationMethod (NoObfuscation),
                             CiteMethod (Citeproc))
-import Text.Pandoc.Shared (camelCaseStrToHyphenated)
+import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, report,
+                          PandocMonad(lookupEnv), getUserDataDir)
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError))
+import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDir,
+                           findM, ordNub)
 import qualified Text.Pandoc.Parsing as P
 import Text.Pandoc.Readers.Metadata (yamlMap)
 import Text.Pandoc.Class.PandocPure
@@ -43,7 +53,7 @@ import Data.Text (Text, unpack)
 import Data.Default (def)
 import qualified Data.Text as T
 import qualified Data.Map as M
-import Text.Pandoc.Definition (Meta(..), MetaValue(..), lookupMeta)
+import Text.Pandoc.Definition (Meta(..), MetaValue(..))
 import Data.Aeson (defaultOptions, Options(..))
 import Data.Aeson.TH (deriveJSON)
 import Control.Applicative ((<|>))
@@ -147,19 +157,194 @@ data Opt = Opt
     , optNoCheckCertificate    :: Bool       -- ^ Disable certificate validation
     , optEol                   :: LineEnding -- ^ Style of line-endings to use
     , optStripComments         :: Bool       -- ^ Skip HTML comments
+    , optCSL                   :: Maybe FilePath -- ^ CSL stylesheet
+    , optBibliography          :: [FilePath]  -- ^ Bibliography files
+    , optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations
     } deriving (Generic, Show)
 
 instance FromYAML (Opt -> Opt) where
-  parseYAML (Mapping _ _ m) =
-    foldr (.) id <$> mapM doOpt (M.toList m)
+  parseYAML (Mapping _ _ m) = chain doOpt (M.toList m)
   parseYAML n = failAtNode n "Expected a mapping"
 
+data DefaultsState = DefaultsState
+    {
+      curDefaults      :: Maybe FilePath -- currently parsed file
+    , inheritanceGraph :: [[FilePath]]   -- defaults file inheritance graph
+    } deriving (Show)
+
+instance (PandocMonad m, MonadIO m)
+      => FromYAML (Opt -> StateT DefaultsState m Opt) where
+  parseYAML (Mapping _ _ m) = do
+    let opts = M.mapKeys toText m
+    dataDir <- case M.lookup "data-dir" opts of
+      Nothing -> return Nothing
+      Just v -> Just . unpack <$> parseYAML v
+    f <- parseOptions (M.toList m)
+    case M.lookup "defaults" opts of
+      Just v -> do
+        g <- parseDefaults v dataDir
+        return  $ g >=> f >=> resolveVarsInOpt
+      Nothing -> return $ f >=> resolveVarsInOpt
+    where
+      toText (Scalar _ (SStr s)) = s
+      toText _ = ""
+  parseYAML n = failAtNode n "Expected a mapping"
+
+resolveVarsInOpt :: forall m. (PandocMonad m, MonadIO m)
+                 => Opt -> StateT DefaultsState m Opt
+resolveVarsInOpt
+    opt@Opt
+    { optTemplate              = oTemplate
+    , optMetadataFiles         = oMetadataFiles
+    , optOutputFile            = oOutputFile
+    , optInputFiles            = oInputFiles
+    , optSyntaxDefinitions     = oSyntaxDefinitions
+    , optAbbreviations         = oAbbreviations
+    , optReferenceDoc          = oReferenceDoc
+    , optEpubMetadata          = oEpubMetadata
+    , optEpubFonts             = oEpubFonts
+    , optEpubCoverImage        = oEpubCoverImage
+    , optLogFile               = oLogFile
+    , optFilters               = oFilters
+    , optDataDir               = oDataDir
+    , optExtractMedia          = oExtractMedia
+    , optCss                   = oCss
+    , optIncludeBeforeBody     = oIncludeBeforeBody
+    , optIncludeAfterBody      = oIncludeAfterBody
+    , optIncludeInHeader       = oIncludeInHeader
+    , optResourcePath          = oResourcePath
+    , optCSL                   = oCSL
+    , optBibliography          = oBibliography
+    , optCitationAbbreviations = oCitationAbbreviations
+    }
+  = do
+      oTemplate' <- mapM resolveVars oTemplate
+      oMetadataFiles' <- mapM resolveVars oMetadataFiles
+      oOutputFile' <- mapM resolveVars oOutputFile
+      oInputFiles' <- mapM (mapM resolveVars) oInputFiles
+      oSyntaxDefinitions' <- mapM resolveVars oSyntaxDefinitions
+      oAbbreviations' <- mapM resolveVars oAbbreviations
+      oReferenceDoc' <- mapM resolveVars oReferenceDoc
+      oEpubMetadata' <- mapM resolveVars oEpubMetadata
+      oEpubFonts' <- mapM resolveVars oEpubFonts
+      oEpubCoverImage' <- mapM resolveVars oEpubCoverImage
+      oLogFile' <- mapM resolveVars oLogFile
+      oFilters' <- mapM resolveVarsInFilter oFilters
+      oDataDir' <- mapM resolveVars oDataDir
+      oExtractMedia' <- mapM resolveVars oExtractMedia
+      oCss' <- mapM resolveVars oCss
+      oIncludeBeforeBody' <- mapM resolveVars oIncludeBeforeBody
+      oIncludeAfterBody' <- mapM resolveVars oIncludeAfterBody
+      oIncludeInHeader' <- mapM resolveVars oIncludeInHeader
+      oResourcePath' <- mapM resolveVars oResourcePath
+      oCSL' <- mapM resolveVars oCSL
+      oBibliography' <- mapM resolveVars oBibliography
+      oCitationAbbreviations' <- mapM resolveVars oCitationAbbreviations
+      return opt{ optTemplate              = oTemplate'
+                , optMetadataFiles         = oMetadataFiles'
+                , optOutputFile            = oOutputFile'
+                , optInputFiles            = oInputFiles'
+                , optSyntaxDefinitions     = oSyntaxDefinitions'
+                , optAbbreviations         = oAbbreviations'
+                , optReferenceDoc          = oReferenceDoc'
+                , optEpubMetadata          = oEpubMetadata'
+                , optEpubFonts             = oEpubFonts'
+                , optEpubCoverImage        = oEpubCoverImage'
+                , optLogFile               = oLogFile'
+                , optFilters               = oFilters'
+                , optDataDir               = oDataDir'
+                , optExtractMedia          = oExtractMedia'
+                , optCss                   = oCss'
+                , optIncludeBeforeBody     = oIncludeBeforeBody'
+                , optIncludeAfterBody      = oIncludeAfterBody'
+                , optIncludeInHeader       = oIncludeInHeader'
+                , optResourcePath          = oResourcePath'
+                , optCSL                   = oCSL'
+                , optBibliography          = oBibliography'
+                , optCitationAbbreviations = oCitationAbbreviations'
+                }
+
+ where
+  resolveVars :: FilePath -> StateT DefaultsState m FilePath
+  resolveVars [] = return []
+  resolveVars ('$':'{':xs) =
+    let (ys, zs) = break (=='}') xs
+     in if null zs
+           then return $ '$':'{':xs
+           else do
+             val <- lookupEnv' ys
+             (val ++) <$> resolveVars (drop 1 zs)
+  resolveVars (c:cs) = (c:) <$> resolveVars cs
+  lookupEnv' :: String -> StateT DefaultsState m String
+  lookupEnv' "." = do
+    mbCurDefaults <- gets curDefaults
+    maybe (return "")
+          (fmap takeDirectory . liftIO . canonicalizePath)
+          mbCurDefaults
+  lookupEnv' "USERDATA" = do
+    mbodatadir <- mapM resolveVars oDataDir
+    mbdatadir  <- getUserDataDir
+    defdatadir <- liftIO defaultUserDataDir
+    return $ fromMaybe defdatadir (mbodatadir <|> mbdatadir)
+  lookupEnv' v = do
+    mbval <- fmap T.unpack <$> lookupEnv (T.pack v)
+    case mbval of
+      Nothing -> do
+        report $ EnvironmentVariableUndefined (T.pack v)
+        return mempty
+      Just x  -> return x
+  resolveVarsInFilter (JSONFilter fp) =
+    JSONFilter <$> resolveVars fp
+  resolveVarsInFilter (LuaFilter fp) =
+    LuaFilter <$> resolveVars fp
+  resolveVarsInFilter CiteprocFilter = return CiteprocFilter
+
+
+
+parseDefaults :: (PandocMonad m, MonadIO m)
+              => Node Pos
+              -> Maybe FilePath
+              -> Parser (Opt -> StateT DefaultsState m Opt)
+parseDefaults n dataDir = parseDefsNames n >>= \ds -> return $ \o -> do
+  -- get parent defaults:
+  defsParent <- gets $ fromMaybe "" . curDefaults
+  -- get child defaults:
+  defsChildren <- mapM (fullDefaultsPath dataDir) ds
+  -- expand parent in defaults inheritance graph by children:
+  defsGraph <- gets inheritanceGraph
+  let defsGraphExp = expand defsGraph defsChildren defsParent
+  modify $ \defsState -> defsState{ inheritanceGraph = defsGraphExp }
+  -- check for cyclic inheritance:
+  if cyclic defsGraphExp
+    then throwError $
+      PandocSomeError $ T.pack $
+        "Error: Circular defaults file reference in " ++
+        "'" ++ defsParent ++ "'"
+    else foldM applyDefaults o defsChildren
+  where parseDefsNames x = (parseYAML x >>= \xs -> return $ map unpack xs)
+                       <|> (parseYAML x >>= \x' -> return [unpack x'])
+
+parseOptions :: Monad m
+             => [(Node Pos, Node Pos)]
+             -> Parser (Opt -> StateT DefaultsState m Opt)
+parseOptions ns = do
+  f <- chain doOpt' ns
+  return $ return . f
+
+chain :: Monad m => (a -> m (b -> b)) -> [a] -> m (b -> b)
+chain f = foldM g id
+  where g o n = f n >>= \o' -> return $ o' . o
+
+doOpt' :: (Node Pos, Node Pos) -> Parser (Opt -> Opt)
+doOpt' (k',v) = do
+  k <- parseStringKey k'
+  case k of
+    "defaults" -> return id
+    _ -> doOpt (k',v)
+
 doOpt :: (Node Pos, Node Pos) -> Parser (Opt -> Opt)
 doOpt (k',v) = do
-  k <- case k' of
-         Scalar _ (SStr t) -> return t
-         Scalar _ _ -> failAtNode k' "Non-string key"
-         _ -> failAtNode k' "Non-scalar key"
+  k <- parseStringKey k'
   case k of
     "tab-stop" ->
       parseYAML v >>= \x -> return (\o -> o{ optTabStop = x })
@@ -358,26 +543,18 @@ doOpt (k',v) = do
       (parseYAML v >>= \x -> return (\o -> o{ optCss = optCss o <>
                                                 [unpack x] }))
     "bibliography" ->
-      do let addItem x o = o{ optMetadata =
-                                 addMeta "bibliography" (T.unpack x)
-                                    (optMetadata o) }
-         (parseYAML v >>= \(xs :: [Text]) -> return $ \o ->
-                                                    foldr addItem o xs)
-          <|>
-          (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o)
+      (parseYAML v >>= \x -> return (\o ->
+                               o{ optBibliography = optBibliography o <>
+                                                      map unpack x }))
+      <|>
+      (parseYAML v >>= \x -> return (\o ->
+                               o{ optBibliography = optBibliography o <>
+                                                       [unpack x] }))
     "csl" ->
-      do let addItem x o = o{ optMetadata =
-                                 addMeta "csl" (T.unpack x)
-                                   (optMetadata o) }
-         (parseYAML v >>= \(xs :: [Text]) -> return $ \o ->
-                                                    foldr addItem o xs)
-          <|>
-          (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o)
+      parseYAML v >>= \x -> return (\o -> o{ optCSL = unpack <$> x })
     "citation-abbreviations" ->
-      parseYAML v >>= \x ->
-             return (\o -> o{ optMetadata =
-                                addMeta "citation-abbreviations" (T.unpack x)
-                                  (optMetadata o) })
+      parseYAML v >>= \x -> return (\o -> o{ optCitationAbbreviations =
+                                                  unpack <$> x })
     "ipynb-output" ->
       parseYAML v >>= \x -> return (\o -> o{ optIpynbOutput = x })
     "include-before-body" ->
@@ -406,7 +583,8 @@ doOpt (k',v) = do
                                 optIncludeInHeader o <> [unpack x] }))
     "resource-path" ->
       parseYAML v >>= \x ->
-             return (\o -> o{ optResourcePath = map unpack x })
+             return (\o -> o{ optResourcePath = map unpack x <>
+                                 optResourcePath o })
     "request-headers" ->
       parseYAML v >>= \x ->
              return (\o -> o{ optRequestHeaders = x })
@@ -492,38 +670,70 @@ defaultOpts = Opt
     , optNoCheckCertificate    = False
     , optEol                   = Native
     , optStripComments         = False
+    , optCSL                   = Nothing
+    , optBibliography          = []
+    , optCitationAbbreviations = Nothing
     }
 
+parseStringKey ::  Node Pos -> Parser Text
+parseStringKey k = case k of
+  Scalar _ (SStr t) -> return t
+  Scalar _ _ -> failAtNode k "Non-string key"
+  _ -> failAtNode k "Non-scalar key"
+
 yamlToMeta :: Node Pos -> Parser Meta
 yamlToMeta (Mapping _ _ m) =
     either (fail . show) return $ runEverything (yamlMap pMetaString m)
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
-    runEverything p = runPure (P.readWithM p def "")
+    runEverything p =
+      runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
-addMeta :: String -> String -> Meta -> Meta
-addMeta k v meta =
-  case lookupMeta k' meta of
-       Nothing -> setMeta k' v' meta
-       Just (MetaList xs) ->
-                  setMeta k' (MetaList (xs ++ [v'])) meta
-       Just x  -> setMeta k' (MetaList [x, v']) meta
- where
-  v' = readMetaValue v
-  k' = T.pack k
+-- | Apply defaults from --defaults file.
+applyDefaults :: (PandocMonad m, MonadIO m)
+              => Opt
+              -> FilePath
+              -> StateT DefaultsState m Opt
+applyDefaults opt file = do
+  setVerbosity $ optVerbosity opt
+  modify $ \defsState -> defsState{ curDefaults = Just file }
+  inp <- readFileLazy file
+  case decode1 inp of
+      Right f -> f opt
+      Left (errpos, errmsg)  -> throwError $
+         PandocParseError $ T.pack $
+         "Error parsing " ++ file ++ " line " ++
+          show (posLine errpos) ++ " column " ++
+          show (posColumn errpos) ++ ":\n" ++ errmsg
 
-readMetaValue :: String -> MetaValue
-readMetaValue s
-  | s == "true"  = MetaBool True
-  | s == "True"  = MetaBool True
-  | s == "TRUE"  = MetaBool True
-  | s == "false" = MetaBool False
-  | s == "False" = MetaBool False
-  | s == "FALSE" = MetaBool False
-  | otherwise    = MetaString $ T.pack s
+fullDefaultsPath :: (PandocMonad m, MonadIO m)
+                 => Maybe FilePath
+                 -> FilePath
+                 -> m FilePath
+fullDefaultsPath dataDir file = do
+  let fp = if null (takeExtension file)
+              then addExtension file "yaml"
+              else file
+  defaultDataDir <- liftIO defaultUserDataDir
+  let defaultFp = fromMaybe defaultDataDir dataDir </> "defaults" </> fp
+  fromMaybe fp <$> findM fileExists [fp, defaultFp]
 
+-- | In a list of lists, append another list in front of every list which
+-- starts with specific element.
+expand :: Ord a => [[a]] -> [a] -> a -> [[a]]
+expand [] ns n = fmap (\x -> x : [n]) ns
+expand ps ns n = concatMap (ext n ns) ps
+  where
+    ext x xs p = case p of
+      (l : _) | x == l -> fmap (: p) xs
+      _ -> [p]
+
+cyclic :: Ord a => [[a]] -> Bool
+cyclic = any hasDuplicate
+  where
+    hasDuplicate xs = length (ordNub xs) /= length xs
 
 -- see https://github.com/jgm/pandoc/pull/4083
 -- using generic deriving caused long compilation times
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 139b408cb..3864ab188 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE TupleSections       #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -59,8 +59,8 @@ optToOutputSettings opts = do
   let outputFile = fromMaybe "-" (optOutputFile opts)
 
   when (optDumpArgs opts) . liftIO $ do
-    UTF8.hPutStrLn stdout outputFile
-    mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
+    UTF8.hPutStrLn stdout (T.pack outputFile)
+    mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
     exitSuccess
 
   epubMetadata <- traverse readUtf8File $ optEpubMetadata opts
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 9e9cc8d9b..620546c13 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Asciify
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -10,396 +10,19 @@
 Function to convert accented latin letters to their unaccented
 ascii equivalents (used in constructing HTML identifiers).
 -}
-module Text.Pandoc.Asciify (toAsciiChar)
+module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
 where
-import Data.Char (isAscii)
-import qualified Data.Map as M
+import Data.Char (isAscii, isMark)
+import qualified Data.Text.Normalize as TN
+import Data.Text (Text)
+import qualified Data.Text as T
 
-toAsciiChar :: Char -> Maybe Char
-toAsciiChar c | isAscii c = Just c
-              | otherwise = M.lookup c asciiMap
+toAsciiText :: Text -> Text
+toAsciiText = T.filter isAscii . TN.normalize (TN.NFD)
 
-asciiMap :: M.Map Char Char
-asciiMap = M.fromList
-  [('\192','A')
-  ,('\193','A')
-  ,('\194','A')
-  ,('\195','A')
-  ,('\196','A')
-  ,('\197','A')
-  ,('\199','C')
-  ,('\200','E')
-  ,('\201','E')
-  ,('\202','E')
-  ,('\203','E')
-  ,('\204','I')
-  ,('\205','I')
-  ,('\206','I')
-  ,('\207','I')
-  ,('\209','N')
-  ,('\210','O')
-  ,('\211','O')
-  ,('\212','O')
-  ,('\213','O')
-  ,('\214','O')
-  ,('\217','U')
-  ,('\218','U')
-  ,('\219','U')
-  ,('\220','U')
-  ,('\221','Y')
-  ,('\224','a')
-  ,('\225','a')
-  ,('\226','a')
-  ,('\227','a')
-  ,('\228','a')
-  ,('\229','a')
-  ,('\231','c')
-  ,('\232','e')
-  ,('\233','e')
-  ,('\234','e')
-  ,('\235','e')
-  ,('\236','i')
-  ,('\237','i')
-  ,('\238','i')
-  ,('\239','i')
-  ,('\241','n')
-  ,('\242','o')
-  ,('\243','o')
-  ,('\244','o')
-  ,('\245','o')
-  ,('\246','o')
-  ,('\249','u')
-  ,('\250','u')
-  ,('\251','u')
-  ,('\252','u')
-  ,('\253','y')
-  ,('\255','y')
-  ,('\256','A')
-  ,('\257','a')
-  ,('\258','A')
-  ,('\259','a')
-  ,('\260','A')
-  ,('\261','a')
-  ,('\262','C')
-  ,('\263','c')
-  ,('\264','C')
-  ,('\265','c')
-  ,('\266','C')
-  ,('\267','c')
-  ,('\268','C')
-  ,('\269','c')
-  ,('\270','D')
-  ,('\271','d')
-  ,('\274','E')
-  ,('\275','e')
-  ,('\276','E')
-  ,('\277','e')
-  ,('\278','E')
-  ,('\279','e')
-  ,('\280','E')
-  ,('\281','e')
-  ,('\282','E')
-  ,('\283','e')
-  ,('\284','G')
-  ,('\285','g')
-  ,('\286','G')
-  ,('\287','g')
-  ,('\288','G')
-  ,('\289','g')
-  ,('\290','G')
-  ,('\291','g')
-  ,('\292','H')
-  ,('\293','h')
-  ,('\296','I')
-  ,('\297','i')
-  ,('\298','I')
-  ,('\299','i')
-  ,('\300','I')
-  ,('\301','i')
-  ,('\302','I')
-  ,('\303','i')
-  ,('\304','I')
-  ,('\305','i')
-  ,('\308','J')
-  ,('\309','j')
-  ,('\310','K')
-  ,('\311','k')
-  ,('\313','L')
-  ,('\314','l')
-  ,('\315','L')
-  ,('\316','l')
-  ,('\317','L')
-  ,('\318','l')
-  ,('\323','N')
-  ,('\324','n')
-  ,('\325','N')
-  ,('\326','n')
-  ,('\327','N')
-  ,('\328','n')
-  ,('\332','O')
-  ,('\333','o')
-  ,('\334','O')
-  ,('\335','o')
-  ,('\336','O')
-  ,('\337','o')
-  ,('\340','R')
-  ,('\341','r')
-  ,('\342','R')
-  ,('\343','r')
-  ,('\344','R')
-  ,('\345','r')
-  ,('\346','S')
-  ,('\347','s')
-  ,('\348','S')
-  ,('\349','s')
-  ,('\350','S')
-  ,('\351','s')
-  ,('\352','S')
-  ,('\353','s')
-  ,('\354','T')
-  ,('\355','t')
-  ,('\356','T')
-  ,('\357','t')
-  ,('\360','U')
-  ,('\361','u')
-  ,('\362','U')
-  ,('\363','u')
-  ,('\364','U')
-  ,('\365','u')
-  ,('\366','U')
-  ,('\367','u')
-  ,('\368','U')
-  ,('\369','u')
-  ,('\370','U')
-  ,('\371','u')
-  ,('\372','W')
-  ,('\373','w')
-  ,('\374','Y')
-  ,('\375','y')
-  ,('\376','Y')
-  ,('\377','Z')
-  ,('\378','z')
-  ,('\379','Z')
-  ,('\380','z')
-  ,('\381','Z')
-  ,('\382','z')
-  ,('\416','O')
-  ,('\417','o')
-  ,('\431','U')
-  ,('\432','u')
-  ,('\461','A')
-  ,('\462','a')
-  ,('\463','I')
-  ,('\464','i')
-  ,('\465','O')
-  ,('\466','o')
-  ,('\467','U')
-  ,('\468','u')
-  ,('\486','G')
-  ,('\487','g')
-  ,('\488','K')
-  ,('\489','k')
-  ,('\490','O')
-  ,('\491','o')
-  ,('\496','j')
-  ,('\500','G')
-  ,('\501','g')
-  ,('\504','N')
-  ,('\505','n')
-  ,('\512','A')
-  ,('\513','a')
-  ,('\514','A')
-  ,('\515','a')
-  ,('\516','E')
-  ,('\517','e')
-  ,('\518','E')
-  ,('\519','e')
-  ,('\520','I')
-  ,('\521','i')
-  ,('\522','I')
-  ,('\523','i')
-  ,('\524','O')
-  ,('\525','o')
-  ,('\526','O')
-  ,('\527','o')
-  ,('\528','R')
-  ,('\529','r')
-  ,('\530','R')
-  ,('\531','r')
-  ,('\532','U')
-  ,('\533','u')
-  ,('\534','U')
-  ,('\535','u')
-  ,('\536','S')
-  ,('\537','s')
-  ,('\538','T')
-  ,('\539','t')
-  ,('\542','H')
-  ,('\543','h')
-  ,('\550','A')
-  ,('\551','a')
-  ,('\552','E')
-  ,('\553','e')
-  ,('\558','O')
-  ,('\559','o')
-  ,('\562','Y')
-  ,('\563','y')
-  ,('\894',';')
-  ,('\7680','A')
-  ,('\7681','a')
-  ,('\7682','B')
-  ,('\7683','b')
-  ,('\7684','B')
-  ,('\7685','b')
-  ,('\7686','B')
-  ,('\7687','b')
-  ,('\7690','D')
-  ,('\7691','d')
-  ,('\7692','D')
-  ,('\7693','d')
-  ,('\7694','D')
-  ,('\7695','d')
-  ,('\7696','D')
-  ,('\7697','d')
-  ,('\7698','D')
-  ,('\7699','d')
-  ,('\7704','E')
-  ,('\7705','e')
-  ,('\7706','E')
-  ,('\7707','e')
-  ,('\7710','F')
-  ,('\7711','f')
-  ,('\7712','G')
-  ,('\7713','g')
-  ,('\7714','H')
-  ,('\7715','h')
-  ,('\7716','H')
-  ,('\7717','h')
-  ,('\7718','H')
-  ,('\7719','h')
-  ,('\7720','H')
-  ,('\7721','h')
-  ,('\7722','H')
-  ,('\7723','h')
-  ,('\7724','I')
-  ,('\7725','i')
-  ,('\7728','K')
-  ,('\7729','k')
-  ,('\7730','K')
-  ,('\7731','k')
-  ,('\7732','K')
-  ,('\7733','k')
-  ,('\7734','L')
-  ,('\7735','l')
-  ,('\7738','L')
-  ,('\7739','l')
-  ,('\7740','L')
-  ,('\7741','l')
-  ,('\7742','M')
-  ,('\7743','m')
-  ,('\7744','M')
-  ,('\7745','m')
-  ,('\7746','M')
-  ,('\7747','m')
-  ,('\7748','N')
-  ,('\7749','n')
-  ,('\7750','N')
-  ,('\7751','n')
-  ,('\7752','N')
-  ,('\7753','n')
-  ,('\7754','N')
-  ,('\7755','n')
-  ,('\7764','P')
-  ,('\7765','p')
-  ,('\7766','P')
-  ,('\7767','p')
-  ,('\7768','R')
-  ,('\7769','r')
-  ,('\7770','R')
-  ,('\7771','r')
-  ,('\7774','R')
-  ,('\7775','r')
-  ,('\7776','S')
-  ,('\7777','s')
-  ,('\7778','S')
-  ,('\7779','s')
-  ,('\7786','T')
-  ,('\7787','t')
-  ,('\7788','T')
-  ,('\7789','t')
-  ,('\7790','T')
-  ,('\7791','t')
-  ,('\7792','T')
-  ,('\7793','t')
-  ,('\7794','U')
-  ,('\7795','u')
-  ,('\7796','U')
-  ,('\7797','u')
-  ,('\7798','U')
-  ,('\7799','u')
-  ,('\7804','V')
-  ,('\7805','v')
-  ,('\7806','V')
-  ,('\7807','v')
-  ,('\7808','W')
-  ,('\7809','w')
-  ,('\7810','W')
-  ,('\7811','w')
-  ,('\7812','W')
-  ,('\7813','w')
-  ,('\7814','W')
-  ,('\7815','w')
-  ,('\7816','W')
-  ,('\7817','w')
-  ,('\7818','X')
-  ,('\7819','x')
-  ,('\7820','X')
-  ,('\7821','x')
-  ,('\7822','Y')
-  ,('\7823','y')
-  ,('\7824','Z')
-  ,('\7825','z')
-  ,('\7826','Z')
-  ,('\7827','z')
-  ,('\7828','Z')
-  ,('\7829','z')
-  ,('\7830','h')
-  ,('\7831','t')
-  ,('\7832','w')
-  ,('\7833','y')
-  ,('\7840','A')
-  ,('\7841','a')
-  ,('\7842','A')
-  ,('\7843','a')
-  ,('\7864','E')
-  ,('\7865','e')
-  ,('\7866','E')
-  ,('\7867','e')
-  ,('\7868','E')
-  ,('\7869','e')
-  ,('\7880','I')
-  ,('\7881','i')
-  ,('\7882','I')
-  ,('\7883','i')
-  ,('\7884','O')
-  ,('\7885','o')
-  ,('\7886','O')
-  ,('\7887','o')
-  ,('\7908','U')
-  ,('\7909','u')
-  ,('\7910','U')
-  ,('\7911','u')
-  ,('\7922','Y')
-  ,('\7923','y')
-  ,('\7924','Y')
-  ,('\7925','y')
-  ,('\7926','Y')
-  ,('\7927','y')
-  ,('\7928','Y')
-  ,('\7929','y')
-  ,('\8175','`')
-  ,('\8490','K')
-  ,('\8800','=')
-  ,('\8814','<')
-  ,('\8815','>')
-  ]
+toAsciiChar :: Char -> Maybe Char
+toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of
+                  (x:xs) | isAscii x
+                         , all isMark xs
+                         -> Just x
+                  _      -> Nothing
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
deleted file mode 100644
index b41e93125..000000000
--- a/src/Text/Pandoc/BCP47.hs
+++ /dev/null
@@ -1,112 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing and rendering BCP47 language identifiers.
--}
-module Text.Pandoc.BCP47 (
-                       getLang
-                     , parseBCP47
-                     , Lang(..)
-                     , renderLang
-                     )
-where
-import Control.Monad (guard)
-import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.DocTemplates (FromContext(..))
-import qualified Data.Text as T
-import qualified Text.Parsec as P
-
--- | Represents BCP 47 language/country code.
-data Lang = Lang{ langLanguage :: T.Text
-                , langScript   :: T.Text
-                , langRegion   :: T.Text
-                , langVariants :: [T.Text] }
-                deriving (Eq, Ord, Show)
-
--- | Render a Lang as BCP 47.
-renderLang :: Lang -> T.Text
-renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
-                    ([langScript lang, langRegion lang] ++ langVariants lang))
-
--- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe T.Text
-getLang opts meta =
-  case lookupContext "lang" (writerVariables opts) of
-        Just s -> Just s
-        _      ->
-          case lookupMeta "lang" meta of
-               Just (MetaBlocks [Para [Str s]])  -> Just s
-               Just (MetaBlocks [Plain [Str s]]) -> Just s
-               Just (MetaInlines [Str s])        -> Just s
-               Just (MetaString s)               -> Just s
-               _                                 -> Nothing
-
--- | Parse a BCP 47 string as a Lang.  Currently we parse
--- extensions and private-use fields as "variants," even
--- though officially they aren't.
-parseBCP47 :: T.Text -> Either T.Text Lang
-parseBCP47 lang =
-  case P.parse bcp47 "lang" lang of
-       Right r -> Right r
-       Left e  -> Left $ T.pack $ show e
-  where bcp47 = do
-          language <- pLanguage
-          script <- P.option "" pScript
-          region <- P.option "" pRegion
-          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
-          P.eof
-          return Lang{   langLanguage = language
-                       , langScript = script
-                       , langRegion = region
-                       , langVariants = variants }
-        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
-        pLanguage = do
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toLower $ T.pack cs
-        pScript = P.try $ do
-          P.char '-'
-          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
-          xs <- P.count 3
-                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
-          return $ T.toLower $ T.pack (x:xs)
-        pRegion = P.try $ do
-          P.char '-'
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toUpper $ T.pack cs
-        pVariant = P.try $ do
-          P.char '-'
-          ds <- P.option "" (P.count 1 P.digit)
-          cs <- P.many1 asciiLetter
-          let var = ds ++ cs
-              lv = length var
-          guard $ if null ds
-                     then lv >= 5 && lv <= 8
-                     else lv == 4
-          return $ T.toLower $ T.pack var
-        pExtension = P.try $ do
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          let lcs = length cs
-          guard $ lcs >= 2 && lcs <= 8
-          return $ T.toLower $ T.pack cs
-        pPrivateUse = P.try $ do
-          P.char '-'
-          P.char 'x'
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          guard $ not (null cs) && length cs <= 8
-          let var = "x-" ++ cs
-          return $ T.toLower $ T.pack var
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
index d98c85147..625feadbb 100644
--- a/src/Text/Pandoc/CSS.hs
+++ b/src/Text/Pandoc/CSS.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Text.Pandoc.CSS
-Copyright   : © 2006-2020 John MacFarlane <jgm@berkeley.edu>,
+Copyright   : © 2006-2021 John MacFarlane <jgm@berkeley.edu>,
                 2015-2016 Mauro Bieg,
                 2015      Ophir Lifshitz <hangfromthefloor@gmail.com>
 License     : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 10812644f..2bd21bcfb 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.CSV
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
@@ -68,8 +68,7 @@ escaped opts = try $
 
 pCSVUnquotedCell :: CSVOptions -> Parser Text
 pCSVUnquotedCell opts = T.pack <$>
-  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
-                  && c /= csvQuote opts))
+  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'))
 
 pCSVDelim :: CSVOptions -> Parser ()
 pCSVDelim opts = do
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index a48f97c3b..246f54516 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -5,7 +5,10 @@
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc
-  ( processCitations )
+  ( processCitations,
+    getReferences,
+    getStyle
+  )
 where
 
 import Citeproc
@@ -15,38 +18,104 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir)
-import Data.ByteString (ByteString)
-import qualified Data.ByteString.Lazy as L
+import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
+import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
-import Text.Pandoc.Walk
-import Text.Pandoc.Builder as B
-import Text.Pandoc (PandocMonad(..), PandocError(..),
-                    readDataFile, ReaderOptions(..), pandocExtensions,
-                    report, LogMessage(..), fetchItem)
-import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow)
+import Text.Pandoc.Class (PandocMonad(..), getResourcePath, getUserDataDir,
+                          fetchItem, readDataFile, report, setResourcePath)
+import Text.Pandoc.Error (PandocError(..))
+import Text.Pandoc.Extensions (pandocExtensions)
+import Text.Pandoc.Logging (LogMessage(..))
+import Text.Pandoc.Options (ReaderOptions(..))
+import Text.Pandoc.Shared (stringify, ordNub, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Walk (query, walk, walkM)
+import Control.Applicative ((<|>))
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad.State (State, evalState, get, put, runState)
 import Data.Aeson (eitherDecode)
-import Data.Default
-import Data.Ord ()
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Lazy as L
+import Data.Char (isPunctuation, isUpper)
+import Data.Default (Default(def))
+import qualified Data.Foldable as Foldable
 import qualified Data.Map as M
+import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Ord ()
+import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
-import Data.Char (isPunctuation, isUpper)
 import Data.Text (Text)
 import qualified Data.Text as T
-import Control.Monad.State
-import qualified Data.Sequence as Seq
-import qualified Data.Foldable as Foldable
-import System.FilePath
-import Control.Applicative
-import Control.Monad.Except
-import Data.Maybe (mapMaybe, fromMaybe)
+import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
--- import Debug.Trace as Trace (trace, traceShowId)
 
 
-processCitations :: PandocMonad m => Pandoc -> m Pandoc
+processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
+  style <- getStyle (Pandoc meta bs)
+
+  mblang <- getLang meta
+  let locale = Citeproc.mergeLocales mblang style
+
+  refs <- getReferences (Just locale) (Pandoc meta bs)
+
+  let otherIdsMap = foldr (\ref m ->
+                             case T.words . extractText <$>
+                                  M.lookup "other-ids"
+                                      (referenceVariables ref) of
+                                Nothing  -> m
+                                Just ids -> foldr
+                                  (\id' ->
+                                    M.insert id' (referenceId ref)) m ids)
+                          M.empty refs
+  let meta' = deleteMeta "nocite" meta
+  let citations = getCitations locale otherIdsMap $ Pandoc meta' bs
+
+
+  let linkCites = maybe False truish $ lookupMeta "link-citations" meta
+  let opts = defaultCiteprocOptions{ linkCitations = linkCites }
+  let result = Citeproc.citeproc opts style mblang refs citations
+  mapM_ (report . CiteprocWarning) (resultWarnings result)
+  let sopts = styleOptions style
+  let classes = "references" : -- TODO remove this or keep for compatibility?
+                "csl-bib-body" :
+                ["hanging-indent" | styleHangingIndent sopts]
+  let refkvs = (case styleEntrySpacing sopts of
+                   Just es | es > 0 -> (("entry-spacing",T.pack $ show es):)
+                   _ -> id) .
+               (case styleLineSpacing sopts of
+                   Just ls | ls > 1 -> (("line-spacing",T.pack $ show ls):)
+                   _ -> id) $ []
+  let bibs = mconcat $ map (\(ident, out) ->
+                     B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para .
+                         walk (convertQuotes locale) .
+                         insertSpace $ out)
+                      (resultBibliography result)
+  let moveNotes = styleIsNoteStyle sopts &&
+           maybe True truish (lookupMeta "notes-after-punctuation" meta)
+  let cits = map (walk (convertQuotes locale)) $
+               resultCitations result
+
+  let metanocites = lookupMeta "nocite" meta
+  let Pandoc meta'' bs' =
+         maybe id (setMeta "nocite") metanocites .
+         walk (mvPunct moveNotes locale) .
+         (if styleIsNoteStyle sopts
+             then walk addNote .  walk deNote
+             else id) .
+         evalState (walkM insertResolvedCitations $ Pandoc meta' bs)
+         $ cits
+  return $ Pandoc meta''
+         $ insertRefs refkvs classes meta''
+            (walk fixLinks $ B.toList bibs) bs'
+
+-- | Retrieve the CSL style specified by the csl or citation-style
+-- metadata field in a pandoc document, or the default CSL style
+-- if none is specified.  Retrieve the parent style
+-- if the style is a dependent style.  Add abbreviations defined
+-- in an abbreviation file if one has been specified.
+getStyle :: PandocMonad m => Pandoc -> m (Style Inlines)
+getStyle (Pandoc meta _) = do
   let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
                 >>= metaValueToText
 
@@ -87,86 +156,60 @@ processCitations (Pandoc meta bs) = do
           catchError (getFile ".csl" basename) (\_ -> fst <$> fetchItem url)
 
   styleRes <- Citeproc.parseStyle getParentStyle cslContents
-  style <-
-    case styleRes of
-       Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
-       Right style -> return style{ styleAbbreviations = mbAbbrevs }
-  let mblang = parseLang <$>
-         ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText)
-  let locale = Citeproc.mergeLocales mblang style
+  case styleRes of
+     Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
+     Right style -> return style{ styleAbbreviations = mbAbbrevs }
+
+
+-- Retrieve citeproc lang based on metadata.
+getLang :: PandocMonad m => Meta -> m (Maybe Lang)
+getLang meta = maybe (return Nothing) bcp47LangToIETF
+                 ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
+                   metaValueToText)
+
+-- | Get references defined inline in the metadata and via an external
+-- bibliography.  Only references that are actually cited in the document
+-- (either with a genuine citation or with `nocite`) are returned.
+-- URL variables are converted to links.
+getReferences :: PandocMonad m
+              => Maybe Locale -> Pandoc -> m [Reference Inlines]
+getReferences mblocale (Pandoc meta bs) = do
+  locale <- case mblocale of
+                Just l  -> return l
+                Nothing -> do
+                  mblang <- getLang meta
+                  case mblang of
+                    Just lang -> return $ either mempty id $ getLocale lang
+                    Nothing   -> return mempty
+
   let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
       getCiteId _ = mempty
   let metanocites = lookupMeta "nocite" meta
-  let meta' = deleteMeta "nocite" meta
   let nocites = maybe mempty (query getCiteId) metanocites
   let citeIds = query getCiteId (Pandoc meta bs)
   let idpred = if "*" `Set.member` nocites
                   then const True
                   else (`Set.member` citeIds)
-  refs <- map (linkifyVariables . legacyDateRanges) <$>
-          case lookupMeta "references" meta of
-            Just (MetaList rs) -> return $ mapMaybe metaValueToReference rs
-            _                  ->
-              case lookupMeta "bibliography" meta of
-                 Just (MetaList xs) ->
-                   mconcat <$>
-                     mapM (getRefsFromBib locale idpred)
-                       (mapMaybe metaValueToText xs)
-                 Just x ->
-                   case metaValueToText x of
-                     Just fp -> getRefsFromBib locale idpred fp
-                     Nothing -> return []
-                 Nothing -> return []
-  let otherIdsMap = foldr (\ref m ->
-                             case T.words . extractText <$>
-                                  M.lookup "other-ids"
-                                      (referenceVariables ref) of
-                                Nothing  -> m
-                                Just ids -> foldr
-                                  (\id' ->
-                                    M.insert id' (referenceId ref)) m ids)
-                          M.empty refs
-  -- TODO: issue warning if no refs defined
-  let citations = getCitations locale otherIdsMap $ Pandoc meta' bs
-  let linkCites = maybe False truish $ lookupMeta "link-citations" meta
-  let opts = defaultCiteprocOptions{ linkCitations = linkCites }
-  let result = Citeproc.citeproc opts style (localeLanguage locale)
-                  refs citations
-  mapM_ (report . CiteprocWarning) (resultWarnings result)
-  let sopts = styleOptions style
-  let classes = "references" : -- TODO remove this or keep for compatibility?
-                "csl-bib-body" :
-                ["hanging-indent" | styleHangingIndent sopts]
-  let refkvs = (case styleEntrySpacing sopts of
-                   Just es | es > 0 -> (("entry-spacing",T.pack $ show es):)
-                   _ -> id) .
-               (case styleLineSpacing sopts of
-                   Just ls | ls > 1 -> (("line-spacing",T.pack $ show ls):)
-                   _ -> id) $ []
-  let bibs = mconcat $ map (\(ident, out) ->
-                     B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para .
-                       walk (convertQuotes locale) .  insertSpace $ out)
-                      (resultBibliography result)
-  let moveNotes = maybe True truish $
-                        lookupMeta "notes-after-punctuation" meta
-  let cits = map (walk fixLinks . walk (convertQuotes locale)) $
-               resultCitations result
+  let inlineRefs = case lookupMeta "references" meta of
+                    Just (MetaList rs) ->
+                      filter (idpred . unItemId . referenceId)
+                         $  mapMaybe metaValueToReference rs
+                    _                  -> []
+  externalRefs <- case lookupMeta "bibliography" meta of
+                    Just (MetaList xs) ->
+                      mconcat <$>
+                        mapM (getRefsFromBib locale idpred)
+                          (mapMaybe metaValueToText xs)
+                    Just x ->
+                      case metaValueToText x of
+                        Just fp -> getRefsFromBib locale idpred fp
+                        Nothing -> return []
+                    Nothing -> return []
+  return $ map (linkifyVariables . legacyDateRanges)
+               (externalRefs ++ inlineRefs)
+            -- note that inlineRefs can override externalRefs
 
-  let fixQuotes = case localePunctuationInQuote locale of
-                    Just True ->
-                      B.toList . movePunctuationInsideQuotes .  B.fromList
-                    _ -> id
 
-  let Pandoc meta'' bs' =
-         maybe id (setMeta "nocite") metanocites .
-         walk (map capitalizeNoteCitation .
-                fixQuotes .  mvPunct moveNotes locale) .
-         walk deNote .
-         evalState (walkM insertResolvedCitations $ Pandoc meta' bs)
-         $ cits
-  return $ Pandoc meta''
-         $ insertRefs refkvs classes meta''
-            (walk fixLinks $ B.toList bibs) bs'
 
 -- If we have a span.csl-left-margin followed by span.csl-right-inline,
 -- we insert a space. This ensures that they will be separated by a space,
@@ -187,24 +230,23 @@ insertSpace ils =
 
 getRefsFromBib :: PandocMonad m
                => Locale -> (Text -> Bool) -> Text -> m [Reference Inlines]
-getRefsFromBib locale idpred t = do
-  let fp = T.unpack t
-  raw <- readFileStrict fp
-  case formatFromExtension fp of
+getRefsFromBib locale idpred fp = do
+  (raw, _) <- fetchItem fp
+  case formatFromExtension (T.unpack fp) of
     Just f -> getRefs locale f idpred (Just fp) raw
     Nothing -> throwError $ PandocAppError $
-                 "Could not determine bibliography format for " <> t
+                 "Could not determine bibliography format for " <> fp
 
 getRefs :: PandocMonad m
         => Locale
         -> BibFormat
         -> (Text -> Bool)
-        -> Maybe FilePath
+        -> Maybe Text
         -> ByteString
         -> m [Reference Inlines]
 getRefs locale format idpred mbfp raw = do
   let err' = throwError .
-             PandocBibliographyError (maybe mempty T.pack mbfp)
+             PandocBibliographyError (fromMaybe mempty mbfp)
   case format of
     Format_bibtex ->
       either (err' . tshow) return .
@@ -219,7 +261,7 @@ getRefs locale format idpred mbfp raw = do
     Format_yaml -> do
       rs <- yamlToRefs idpred
               def{ readerExtensions = pandocExtensions }
-              mbfp
+              (T.unpack <$> mbfp)
               (L.fromStrict raw)
       return $ mapMaybe metaValueToReference rs
 
@@ -248,7 +290,7 @@ insertResolvedCitations (Cite cs ils) = do
     [] -> return (Cite cs ils)
     (x:xs) -> do
       put xs
-      return $ Cite cs (B.toList x)
+      return $ Cite cs (walk fixLinks $ B.toList x)
 insertResolvedCitations x = return x
 
 getCitations :: Locale
@@ -330,7 +372,6 @@ formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of
 
 
 isNote :: Inline -> Bool
-isNote (Note _)          = True
 isNote (Cite _ [Note _]) = True
  -- the following allows citation styles that are "in-text" but use superscript
  -- references to be treated as if they are "notes" for the purposes of moving
@@ -343,6 +384,12 @@ isSpacy Space     = True
 isSpacy SoftBreak = True
 isSpacy _         = False
 
+movePunctInsideQuotes :: Locale -> [Inline] -> [Inline]
+movePunctInsideQuotes locale
+  | localePunctuationInQuote locale == Just True
+    = B.toList . movePunctuationInsideQuotes . B.fromList
+  | otherwise
+    = id
 
 mvPunct :: Bool -> Locale -> [Inline] -> [Inline]
 mvPunct moveNotes locale (x : xs)
@@ -355,7 +402,8 @@ mvPunct moveNotes locale (q : s : x : ys)
     in  if moveNotes
            then if T.null spunct
                    then q : x : mvPunct moveNotes locale ys
-                   else q : Str spunct : x : mvPunct moveNotes locale
+                   else movePunctInsideQuotes locale
+                        [q , Str spunct , x] ++ mvPunct moveNotes locale
                         (B.toList
                           (dropTextWhile isPunctuation (B.fromList ys)))
            else q : x : mvPunct moveNotes locale ys
@@ -367,9 +415,10 @@ mvPunct moveNotes locale (Cite cs ils : ys)
    , moveNotes
    = let s = stringify ys
          spunct = T.takeWhile isPunctuation s
-     in  Cite cs (init ils
-                  ++ [Str spunct | not (endWithPunct False (init ils))]
-                  ++ [last ils]) :
+     in  Cite cs (movePunctInsideQuotes locale $
+                    init ils
+                    ++ [Str spunct | not (endWithPunct False (init ils))]
+                    ++ [last ils]) :
          mvPunct moveNotes locale
            (B.toList (dropTextWhile isPunctuation (B.fromList ys)))
 mvPunct moveNotes locale (s : x : ys) | isSpacy s, isNote x =
@@ -385,7 +434,7 @@ mvPunct _ _ [] = []
 -- move https://doi.org etc. prefix inside link text (#6723):
 fixLinks :: [Inline] -> [Inline]
 fixLinks (Str t : Link attr [Str u1] (u2,tit) : xs)
-  | t <> u1 == u2
+  | u2 == t <> u1
   = Link attr [Str (t <> u1)] (u2,tit) : fixLinks xs
 fixLinks (x:xs) = x : fixLinks xs
 fixLinks [] = []
@@ -455,7 +504,8 @@ insertRefs refkvs refclasses meta refs bs =
      put True
      -- refHeader isn't used if you have an explicit references div
      let cs' = ordNub $ cs ++ refclasses
-     return $ Div ("refs",cs',kvs) (xs ++ refs)
+     let kvs' = ordNub $ kvs ++ refkvs
+     return $ Div ("refs",cs',kvs') (xs ++ refs)
    go x = return x
 
 refTitle :: Meta -> Maybe [Inline]
@@ -498,13 +548,15 @@ linkifyVariables ref =
   fixShortDOI x = let x' = extractText x
                   in  if "10/" `T.isPrefixOf` x'
                          then TextVal $ T.drop 3 x'
-                              -- see http://shortdoi.org
+                              -- see https://shortdoi.org
                          else TextVal x'
   tolink pref x = let x' = extractText x
                       x'' = if "://" `T.isInfixOf` x'
                                then x'
                                else pref <> x'
-                  in  FancyVal (B.link x'' "" (B.str x'))
+                  in  if T.null x'
+                         then x
+                         else FancyVal (B.link x'' "" (B.str x'))
 
 extractText :: Val Inlines -> Text
 extractText (TextVal x)  = x
@@ -512,42 +564,62 @@ extractText (FancyVal x) = toText x
 extractText (NumVal n)   = T.pack (show n)
 extractText _            = mempty
 
-capitalizeNoteCitation :: Inline -> Inline
-capitalizeNoteCitation (Cite cs [Note [Para ils]]) =
-  Cite cs
-  [Note [Para $ B.toList $ addTextCase Nothing CapitalizeFirst
-              $ B.fromList ils]]
-capitalizeNoteCitation x = x
-
-deNote :: [Inline] -> [Inline]
-deNote [] = []
-deNote (Note bs:rest) =
-  Note (walk go bs) : deNote rest
+-- Here we take the Spans with class csl-note that are left
+-- after deNote has removed nested ones, and convert them
+-- into real notes.
+addNote :: Inline -> Inline
+addNote (Span ("",["csl-note"],[]) ils) =
+  Note [Para $
+         B.toList . addTextCase Nothing CapitalizeFirst . B.fromList $ ils]
+addNote x = x
+
+-- Here we handle citation notes that occur inside footnotes
+-- or other citation notes, in a note style.  We don't want
+-- notes inside notes, so we convert these to parenthesized
+-- or comma-separated citations.
+deNote :: Inline -> Inline
+deNote (Note bs) =
+  case bs of
+    [Para (cit@(Cite (c:_) _) : ils)]
+       | citationMode c /= AuthorInText ->
+         -- if citation is first in note, no need to parenthesize.
+         Note [Para (walk removeNotes $ cit : walk addParens ils)]
+    _ -> Note (walk removeNotes . walk addParens $ bs)
+
  where
-  go [] = []
-  go (Cite (c:cs) ils : zs)
+  addParens [] = []
+  addParens (Cite (c:cs) ils : zs)
     | citationMode c == AuthorInText
-      = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) : go zs
+      = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) :
+        addParens zs
     | otherwise
-      = Cite (c:cs) (concatMap noteInParens ils) : go zs
-  go (x:xs) = x : go xs
+      = Cite (c:cs) (concatMap noteInParens ils) : addParens zs
+  addParens (x:xs) = x : addParens xs
+
+  removeNotes (Span ("",["csl-note"],[]) ils) = Span ("",[],[]) ils
+  removeNotes x = x
+
   needsPeriod [] = True
   needsPeriod (Str t:_) = case T.uncons t of
                             Nothing    -> False
                             Just (c,_) -> isUpper c
   needsPeriod (Space:zs) = needsPeriod zs
   needsPeriod _ = False
-  noteInParens (Note bs')
+
+  noteInParens (Span ("",["csl-note"],[]) ils)
        = Space : Str "(" :
-         removeFinalPeriod (blocksToInlines bs') ++ [Str ")"]
+         removeFinalPeriod ils ++ [Str ")"]
   noteInParens x = [x]
-  noteAfterComma needsPer (Note bs')
+
+  noteAfterComma needsPer (Span ("",["csl-note"],[]) ils)
+    | not (null ils)
        = Str "," : Space :
-         (if needsPer
-             then id
-             else removeFinalPeriod) (blocksToInlines bs')
+         if needsPer
+            then ils
+            else removeFinalPeriod ils
   noteAfterComma _ x = [x]
-deNote (x:xs) = x : deNote xs
+
+deNote x = x
 
 -- Note: we can't use dropTextWhileEnd indiscriminately,
 -- because this would remove the final period on abbreviations like Ibid.
@@ -579,3 +651,11 @@ removeFinalPeriod ils =
   isRightQuote "\8217" = True
   isRightQuote "\187"  = True
   isRightQuote _       = False
+
+bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
+bcp47LangToIETF bcplang =
+  case parseLang bcplang of
+    Left _ -> do
+      report $ InvalidLang bcplang
+      return Nothing
+    Right lang -> return $ Just lang
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 552339df0..c178de6e9 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE ViewPatterns      #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE FlexibleContexts #-}
@@ -17,6 +18,7 @@
 module Text.Pandoc.Citeproc.BibTeX
     ( Variant(..)
     , readBibtexString
+    , writeBibtexString
     )
     where
 
@@ -24,10 +26,11 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Readers.LaTeX (readLaTeX)
 import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
-import Text.Pandoc.Options (ReaderOptions(..))
-import Text.Pandoc.Class (runPure)
+import Text.Pandoc.Options (ReaderOptions(..), WriterOptions)
 import Text.Pandoc.Error (PandocError)
 import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.Writers.LaTeX (writeLaTeX)
+import Text.Pandoc.Class (runPure)
 import qualified Text.Pandoc.Walk       as Walk
 import Citeproc.Types
 import Citeproc.Pandoc ()
@@ -46,17 +49,21 @@ import qualified Data.Sequence          as Seq
 import           Data.Char              (isAlphaNum, isDigit, isLetter,
                                          isUpper, toLower, toUpper,
                                          isLower, isPunctuation)
-import           Data.List              (foldl', intercalate)
+import           Data.List              (foldl', intercalate, intersperse)
 import           Safe                   (readMay)
+import           Text.Printf            (printf)
+import           Text.DocLayout         (literal, hsep, nest, hang, Doc(..),
+                                         braces, ($$), cr)
 
 data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
 -- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant           -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+                 => Variant           -- ^ bibtex or biblatex
                  -> Locale            -- ^ Locale
                  -> (Text -> Bool)    -- ^ Filter on citation ids
-                 -> Text              -- ^ bibtex/biblatex text
+                 -> a                 -- ^ bibtex/biblatex text
                  -> Either ParseError [Reference Inlines]
 readBibtexString variant locale idpred contents = do
   case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -64,17 +71,280 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" contents of
+           (initialSourceName sources) sources of
           Left err -> Left err
           Right xs -> return xs
+ where
+  sources = toSources contents
+
+-- | Write BibTeX or BibLaTeX given given a 'Reference'.
+writeBibtexString :: WriterOptions       -- ^ options (for writing LaTex)
+                  -> Variant             -- ^ bibtex or biblatex
+                  -> Maybe Lang          -- ^ Language
+                  -> Reference Inlines   -- ^ Reference to write
+                  -> Doc Text
+writeBibtexString opts variant mblang ref =
+  "@" <> bibtexType <> "{" <> literal (unItemId (referenceId ref)) <> ","
+  $$ nest 2 (renderFields fs)
+  $$ "}" <> cr
+
+ where
+  bibtexType =
+    case referenceType ref of
+      "article-magazine"  -> "article"
+      "article-newspaper" -> "article"
+      "article-journal"   -> "article"
+      "book"              -> "book"
+      "pamphlet"          -> "booklet"
+      "dataset" | variant == Biblatex -> "dataset"
+      "webpage" | variant == Biblatex -> "online"
+      "chapter"           -> case getVariable "editor" of
+                                Just _  -> "incollection"
+                                Nothing -> "inbook"
+      "entry-encyclopedia" | variant == Biblatex -> "inreference"
+                           | otherwise -> "inbook"
+      "paper-conference"  -> "inproceedings"
+      "thesis" -> case getVariableAsText "genre" of
+                    Just "mathesis" -> "mastersthesis"
+                    _               -> "phdthesis"
+      "patent"            | variant == Biblatex -> "patent"
+      "report"            | variant == Biblatex -> "report"
+                          | otherwise -> "techreport"
+      "speech"            -> "unpublished"
+      "manuscript"        -> "unpublished"
+      "graphic"           | variant == Biblatex -> "artwork"
+      "song"              | variant == Biblatex -> "music"
+      "legal_case"        | variant == Biblatex -> "jurisdictionN"
+      "legislation"       | variant == Biblatex -> "legislation"
+      "treaty"            | variant == Biblatex -> "legal"
+      "personal_communication" | variant == Biblatex -> "letter"
+      "motion_picture"    | variant == Biblatex -> "movie"
+      "review"             | variant == Biblatex -> "review"
+      _                   -> "misc"
+
+  mbSubtype =
+    case referenceType ref of
+      "article-magazine"  -> Just "magazine"
+      "article-newspaper" -> Just "newspaper"
+      _ -> Nothing
+
+  fs =
+    case variant of
+      Biblatex ->
+           [ "author"
+           , "editor"
+           , "translator"
+           , "publisher"
+           , "title"
+           , "booktitle"
+           , "journal"
+           , "series"
+           , "edition"
+           , "volume"
+           , "volumes"
+           , "number"
+           , "pages"
+           , "date"
+           , "eventdate"
+           , "urldate"
+           , "address"
+           , "url"
+           , "doi"
+           , "isbn"
+           , "issn"
+           , "type"
+           , "entrysubtype"
+           , "note"
+           , "langid"
+           , "abstract"
+           , "keywords"
+           , "annote"
+           ]
+      Bibtex ->
+           [ "author"
+           , "editor"
+           , "translator"
+           , "publisher"
+           , "title"
+           , "booktitle"
+           , "journal"
+           , "series"
+           , "edition"
+           , "volume"
+           , "number"
+           , "pages"
+           , "year"
+           , "month"
+           , "address"
+           , "type"
+           , "note"
+           , "annote"
+           ]
+
+  valToInlines (TextVal t) = B.text t
+  valToInlines (FancyVal ils) = ils
+  valToInlines (NumVal n) = B.text (T.pack $ show n)
+  valToInlines (NamesVal names) =
+    mconcat $ intersperse (B.space <> B.text "and" <> B.space)
+            $ map renderName names
+  valToInlines (DateVal date) = B.text $
+    case dateLiteral date of
+      Just t  -> t
+      Nothing -> T.intercalate "/" (map renderDatePart (dateParts date)) <>
+                    (if dateCirca date then "~" else mempty)
+
+  renderDatePart (DateParts xs) = T.intercalate "-" $
+                                    map (T.pack . printf "%02d") xs
+
+  renderName name =
+    case nameLiteral name of
+      Just t  -> B.text t
+      Nothing -> spacedMaybes
+                  [ nameNonDroppingParticle name
+                  , nameFamily name
+                  , if nameCommaSuffix name
+                        then (", " <>) <$> nameSuffix name
+                        else nameSuffix name ]
+                  <>
+                  spacedMaybes
+                   [ (", " <>) <$> nameGiven name,
+                     nameDroppingParticle name ]
+
+  mblang' = case getVariableAsText "language" of
+              Just l  -> either (const Nothing) Just $ parseLang l
+              Nothing -> mblang
+
+  titlecase = case mblang' of
+                Just lang | langLanguage lang == "en"
+                                   -> titlecase'
+                Nothing            -> titlecase'
+                _                  ->
+                  case variant of
+                    Bibtex         -> B.spanWith nullAttr
+                     -- BibTex lacks a language field, so we wrap non-English
+                     -- titles in {} to protect case.
+                    Biblatex       -> id
+
+  titlecase' = addTextCase mblang' TitleCase .
+    (\ils -> B.fromList
+               (case B.toList ils of
+                  Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs
+                  xs         -> Walk.walk spanAroundCapitalizedWords xs))
+
+  -- protect capitalized words when we titlecase
+  spanAroundCapitalizedWords (Str t)
+    | not (T.all (\c -> isLower c || not (isLetter c)) t) =
+       Span ("",["nocase"],[]) [Str t]
+  spanAroundCapitalizedWords x = x
+
+  spacedMaybes = mconcat . intersperse B.space . mapMaybe (fmap B.text)
+
+  toLaTeX x =
+    case runPure (writeLaTeX opts $ doc (B.plain x)) of
+           Left _  -> Nothing
+           Right t -> Just $ hsep . map literal $ T.words t
+
+  renderField :: Text -> Maybe (Doc Text)
+  renderField name =
+    (((literal name) <>) . hang 2 " = " . braces)
+      <$> getContentsFor name
+
+  getVariable v = lookupVariable (toVariable v) ref
+
+  getVariableAsText v = (stringify . valToInlines) <$> getVariable v
+
+  getYear val =
+    case val of
+       DateVal date ->
+         case dateLiteral date of
+           Just t -> toLaTeX (B.text t)
+           Nothing ->
+             case dateParts date of
+               [DateParts (y1:_), DateParts (y2:_)] ->
+                 Just $ literal (T.pack (printf "%04d" y1) <> "--" <>
+                        T.pack (printf "%04d" y2))
+               [DateParts (y1:_)] ->
+                 Just $ literal (T.pack (printf "%04d" y1))
+               _ -> Nothing
+       _ -> Nothing
+
+  toMonth 1 = "jan"
+  toMonth 2 = "feb"
+  toMonth 3 = "mar"
+  toMonth 4 = "apr"
+  toMonth 5 = "may"
+  toMonth 6 = "jun"
+  toMonth 7 = "jul"
+  toMonth 8 = "aug"
+  toMonth 9 = "sep"
+  toMonth 10 = "oct"
+  toMonth 11 = "nov"
+  toMonth 12 = "dec"
+  toMonth x  = T.pack $ show x
+
+  getMonth val =
+    case val of
+       DateVal date ->
+         case dateParts date of
+           [DateParts (_:m1:_), DateParts (_:m2:_)] ->
+             Just $ literal (toMonth m1 <> "--" <> toMonth m2)
+           [DateParts (_:m1:_)] -> Just $ literal (toMonth m1)
+           _ -> Nothing
+       _ -> Nothing
+
+  getContentsFor :: Text -> Maybe (Doc Text)
+  getContentsFor "type" =
+    getVariableAsText "genre" >>=
+       \case
+          "mathesis"  -> Just "mastersthesis"
+          "phdthesis" -> Just "phdthesis"
+          _           -> Nothing
+  getContentsFor "entrysubtype" = literal <$> mbSubtype
+  getContentsFor "journal"
+    | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"]
+      = getVariable "container-title" >>= toLaTeX . valToInlines
+    | otherwise = Nothing
+  getContentsFor "booktitle"
+    | bibtexType `elem`
+       ["inbook","incollection","inproceedings","inreference","bookinbook"]
+    = (getVariable "volume-title" <|> getVariable "container-title")
+                               >>= toLaTeX . valToInlines
+    | otherwise = Nothing
+  getContentsFor "series" = getVariable "collection-title"
+                               >>= toLaTeX . valToInlines
+  getContentsFor "address" = getVariable "publisher-place"
+                               >>= toLaTeX . valToInlines
+  getContentsFor "date"  = getVariable "issued" >>= toLaTeX . valToInlines
+  getContentsFor "eventdate" = getVariable "event-date" >>= toLaTeX . valToInlines
+  getContentsFor "urldate"  = getVariable "accessed" >>= toLaTeX . valToInlines
+  getContentsFor "year"  = getVariable "issued" >>= getYear
+  getContentsFor "month"  = getVariable "issued" >>= getMonth
+  getContentsFor "pages"  = getVariable "page" >>= toLaTeX . valToInlines
+  getContentsFor "langid"  = getVariable "language" >>= toLaTeX . valToInlines
+  getContentsFor "number" = (getVariable "number"
+                         <|> getVariable "collection-number"
+                         <|> getVariable "issue") >>= toLaTeX . valToInlines
+
+  getContentsFor x = getVariable x >>=
+    if isURL x
+       then Just . literal . stringify . valToInlines
+       else toLaTeX .
+            (if x == "title"
+                then titlecase
+                else id) .
+            valToInlines
+
+  isURL x = x `elem` ["url","doi","issn","isbn"]
+
+  renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
-defaultLang = Lang "en" (Just "US")
+defaultLang = Lang "en" Nothing (Just "US") [] [] []
 
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
 
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
 
 data Item = Item{ identifier :: Text
                 , sourcePos  :: SourcePos
@@ -89,9 +359,7 @@ itemToReference locale variant item = do
   bib item $ do
     let lang = fromMaybe defaultLang $ localeLanguage locale
     modify $ \st -> st{ localeLang = lang,
-                        untitlecase = case lang of
-                                           (Lang "en" _) -> True
-                                           _             -> False }
+                        untitlecase = langLanguage lang == "en" }
 
     id' <- asks identifier
     otherIds <- (Just <$> getRawField "ids")
@@ -315,10 +583,10 @@ itemToReference locale variant item = do
                  eprint <- getRawField "eprint"
                  let baseUrl =
                        case T.toLower etype of
-                         "arxiv"       -> "http://arxiv.org/abs/"
-                         "jstor"       -> "http://www.jstor.org/stable/"
-                         "pubmed"      -> "http://www.ncbi.nlm.nih.gov/pubmed/"
-                         "googlebooks" -> "http://books.google.com?id="
+                         "arxiv"       -> "https://arxiv.org/abs/"
+                         "jstor"       -> "https://www.jstor.org/stable/"
+                         "pubmed"      -> "https://www.ncbi.nlm.nih.gov/pubmed/"
+                         "googlebooks" -> "https://books.google.com?id="
                          _             -> ""
                  if T.null baseUrl
                     then mzero
@@ -449,7 +717,7 @@ itemToReference locale variant item = do
 
 
 bib :: Item -> Bib a -> BibParser a
-bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+bib entry m = fst <$> evalRWST m entry (BibState True defaultLang)
 
 resolveCrossRefs :: Variant -> [Item] -> [Item]
 resolveCrossRefs variant entries =
@@ -502,41 +770,10 @@ blocksToInlines bs =
        _          -> B.fromList $ Walk.query (:[]) bs
 
 adjustSpans :: Lang -> Inline -> Inline
-adjustSpans lang (RawInline (Format "latex") s)
-  | s == "\\hyphen" || s == "\\hyphen " = Str "-"
-  | otherwise = parseRawLaTeX lang s
+adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s
 adjustSpans _ SoftBreak = Space
 adjustSpans _ x = x
 
-parseRawLaTeX :: Lang -> Text -> Inline
-parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
-  case parseLaTeX lang contents of
-          Right [Para ys]  -> f command ys
-          Right [Plain ys] -> f command ys
-          Right []         -> f command []
-          _                -> RawInline (Format "latex") t
-   where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
-         command  = T.strip command'
-         contents = T.drop 1 $ T.dropEnd 1 contents'
-         f "mkbibquote"    ils = Span nullAttr [Quoted DoubleQuote ils]
-         f "mkbibemph"     ils = Span nullAttr [Emph ils]
-         f "mkbibitalic"   ils = Span nullAttr [Emph ils]
-         f "mkbibbold"     ils = Span nullAttr [Strong ils]
-         f "mkbibparens"   ils = Span nullAttr $
-                                  [Str "("] ++ ils ++ [Str ")"]
-         f "mkbibbrackets" ils = Span nullAttr $
-                                  [Str "["] ++ ils ++ [Str "]"]
-         -- ... both should be nestable & should work in year fields
-         f "autocap"    ils    = Span nullAttr ils
-           -- TODO: should work in year fields
-         f "textnormal" ils    = Span ("",["nodecor"],[]) ils
-         f "bibstring" [Str s] = Str $ resolveKey' lang s
-         f "adddot"    []      = Str "."
-         f "adddotspace" []    = Span nullAttr [Str ".", Space]
-         f "addabbrvspace" []  = Space
-         f _            ils    = Span nullAttr ils
-parseRawLaTeX _ t = RawInline (Format "latex") t
-
 latex' :: Text -> Bib [Block]
 latex' t = do
   lang <- gets localeLang
@@ -572,7 +809,7 @@ bibEntries = do
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
 
 bibComment :: BibParser ()
 bibComment = do
@@ -597,11 +834,14 @@ bibString = do
   updateState (\(l,m) -> (l, Map.insert k v m))
   return ()
 
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
 inBraces :: BibParser Text
 inBraces = do
   char '{'
   res <- manyTill
-         (  (T.pack <$> many1 (noneOf "{}\\"))
+         (  take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
         <|> (char '\\' >> (  (char '{' >> return "\\{")
                          <|> (char '}' >> return "\\}")
                          <|> return "\\"))
@@ -616,7 +856,7 @@ inQuotes :: BibParser Text
 inQuotes = do
   char '"'
   T.concat <$> manyTill
-             (  (T.pack <$> many1 (noneOf "\"\\{"))
+             ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
                <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
                <|> braced <$> inBraces
             ) (char '"')
@@ -628,7 +868,7 @@ fieldName = resolveAlias . T.toLower
 
 isBibtexKeyChar :: Char -> Bool
 isBibtexKeyChar c =
-  isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
+  isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char])
 
 bibItem :: BibParser Item
 bibItem = do
@@ -812,14 +1052,14 @@ getOldDate prefix = do
   let dateparts = filter (\x -> x /= DateParts [])
                   $ map toDateParts [(year',month',day'),
                                      (endyear',endmonth',endday')]
-  literal <- if null dateparts
-                then Just <$> getRawField (prefix <> "year")
-                else return Nothing
+  literal' <- if null dateparts
+                 then Just <$> getRawField (prefix <> "year")
+                 else return Nothing
   return $
     Date { dateParts = dateparts
          , dateCirca = False
          , dateSeason = Nothing
-         , dateLiteral = literal }
+         , dateLiteral = literal' }
 
 getRawField :: Text -> Bib Text
 getRawField f = do
@@ -1225,8 +1465,9 @@ resolveKey lang ils = Walk.walk go ils
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
-resolveKey' lang@(Lang l _) k =
-  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+resolveKey' lang k =
+  case Map.lookup (langLanguage lang) biblatexStringMap >>=
+        Map.lookup (T.toLower k) of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index dfdaf2598..848a83a1e 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import Citeproc (Lang(..), parseLang)
+import Text.Collate.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
@@ -21,11 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
-        ls = T.lines $ TE.decodeUtf8 bs
-     in if length ls > 4
-           then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
-           else id
+    let ls = T.lines $ TE.decodeUtf8 bs
+     in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of
+          Right lang | length ls > 4
+            -> M.insert (langLanguage lang)
+                        (toStringMap $ map (T.splitOn "|") ls)
+          _ -> id
   toStringMap = foldr go' mempty
   go' [term, x, y] = M.insert term (x, y)
   go' _ = id
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index dba762c02..f8931d7b5 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -7,6 +7,7 @@ where
 import Citeproc.Types
 import Data.Text (Text)
 import qualified Data.Text as T
+import Data.List (foldl')
 import Text.Parsec
 import Text.Pandoc.Definition
 import Text.Pandoc.Parsing (romanNumeral)
@@ -19,7 +20,7 @@ parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
 parseLocator locale inp =
   case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of
        Right r -> r
-       Left _  -> (Nothing, inp)
+       Left _  -> (Nothing, maybeAddComma inp)
 
 splitInp :: [Inline] -> [Inline]
 splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':'))
@@ -41,9 +42,17 @@ pLocatorWords locMap = do
   -- i.e. the first one will be " 9"
   return $
     if T.null la && T.null lo
-       then (Nothing, s)
+       then (Nothing, maybeAddComma s)
        else (Just (la, T.strip lo), s)
 
+maybeAddComma :: [Inline] -> [Inline]
+maybeAddComma [] = []
+maybeAddComma ils@(Space : _) = ils
+maybeAddComma ils@(Str t : _)
+  | Just (c, _) <- T.uncons t
+  , isPunctuation c = ils
+maybeAddComma ils = Str "," : Space : ils
+
 pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
 pLocatorDelimited locMap = try $ do
   _ <- pMatchChar "{" (== '{')
@@ -96,7 +105,7 @@ pLocatorLabel' locMap lim = go ""
           t <- anyToken
           ts <- manyTill anyToken (try $ lookAhead lim)
           let s = acc <> stringify (t:ts)
-          case M.lookup (T.strip s) locMap of
+          case M.lookup (T.toCaseFold $ T.strip s) locMap of
             -- try to find a longer one, or return this one
             Just l -> go s <|> return (l, False)
             Nothing -> go s
@@ -139,7 +148,7 @@ pBalancedBraces braces p = try $ do
   where
       except = notFollowedBy pBraces >> p
       -- outer and inner
-      surround = foldl (\a (open, close) -> sur open close except <|> a)
+      surround = foldl' (\a (open, close) -> sur open close except <|> a)
                        except
                        braces
 
@@ -180,6 +189,7 @@ pPageUnit = roman <|> plainUnit
       plainUnit = do
           ts <- many1 (notFollowedBy pSpace >>
                        notFollowedBy pLocatorPunct >>
+                       notFollowedBy pMath >>
                        anyToken)
           let s = stringify ts
           -- otherwise look for actual digits or -s
@@ -210,6 +220,12 @@ pMatchChar msg f = satisfyTok f' <?> msg
 pSpace :: LocatorParser Inline
 pSpace = satisfyTok (\t -> isSpacey t || t == Str "\160") <?> "space"
 
+pMath :: LocatorParser Inline
+pMath = satisfyTok isMath
+ where
+  isMath (Math{}) = True
+  isMath _ = False
+
 satisfyTok :: (Inline -> Bool) -> LocatorParser Inline
 satisfyTok f = tokenPrim show (\sp _ _ -> sp) (\tok -> if f tok
                                                           then Just tok
diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs
index f5a49f49e..b43ca7314 100644
--- a/src/Text/Pandoc/Citeproc/MetaValue.hs
+++ b/src/Text/Pandoc/Citeproc/MetaValue.hs
@@ -135,12 +135,13 @@ metaValueToVal k v
       MetaMap  _   -> TextVal mempty
 
 metaValueToDate :: MetaValue -> Date
-metaValueToDate (MetaMap m) =
-  Date
+metaValueToDate (MetaMap m) = fromMaybe
+  (Date
    { dateParts = dateparts
    , dateCirca = circa
    , dateSeason = season
-   , dateLiteral = literal }
+   , dateLiteral = literal })
+  rawdate
  where
   dateparts = case M.lookup "date-parts" m of
                 Just (MetaList xs) ->
@@ -152,6 +153,7 @@ metaValueToDate (MetaMap m) =
             M.lookup "circa" m >>= metaValueToBool
   season = M.lookup "season" m >>= metaValueToInt
   literal = M.lookup "literal" m >>= metaValueToText
+  rawdate = M.lookup "raw" m >>= metaValueToText >>= rawDateEDTF
 metaValueToDate (MetaList xs) =
   Date{ dateParts = mapMaybe metaValueToDateParts xs
       , dateCirca = False
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 7e1735c2b..796a4afd5 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang)
+import Text.Collate.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index eecda5711..f4cfc8682 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))
 import Text.Pandoc.Error (PandocError (..))
 import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
 import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)
 import Text.Pandoc.Walk (walk)
 import qualified Control.Exception as E
 import qualified Data.ByteString as B
@@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
 logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
 logOutput msg = liftIO $ do
   UTF8.hPutStr stderr $
-      "[" ++ show (messageVerbosity msg) ++ "] "
+      "[" <> T.pack (show (messageVerbosity msg)) <> "] "
   alertIndent $ T.lines $ showLogMessage msg
 
 -- | Prints the list of lines to @stderr@, indenting every but the first
@@ -191,42 +191,41 @@ logOutput msg = liftIO $ do
 alertIndent :: [Text] -> IO ()
 alertIndent [] = return ()
 alertIndent (l:ls) = do
-  UTF8.hPutStrLn stderr $ unpack l
+  UTF8.hPutStrLn stderr l
   mapM_ go ls
   where go l' = do UTF8.hPutStr stderr "  "
-                   UTF8.hPutStrLn stderr $ unpack l'
+                   UTF8.hPutStrLn stderr l'
 
 -- | Extract media from the mediabag into a directory.
 extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
 extractMedia dir d = do
   media <- getMediaBag
-  case [fp | (fp, _, _) <- mediaDirectory media] of
-    []  -> return d
-    fps -> do
-      mapM_ (writeMedia dir media) fps
-      return $ walk (adjustImagePath dir fps) d
+  let items = mediaItems media
+  if null items
+    then return d
+    else do
+      mapM_ (writeMedia dir) items
+      return $ walk (adjustImagePath dir media) d
 
 -- | Write the contents of a media bag to a path.
 writeMedia :: (PandocMonad m, MonadIO m)
-           => FilePath -> MediaBag -> FilePath
+           => FilePath
+           -> (FilePath, MimeType, BL.ByteString)
            -> m ()
-writeMedia dir mediabag subpath = do
-  -- we join and split to convert a/b/c to a\b\c on Windows;
-  -- in zip containers all paths use /
-  let fullpath = dir </> unEscapeString (normalise subpath)
-  let mbcontents = lookupMedia subpath mediabag
-  case mbcontents of
-       Nothing -> throwError $ PandocResourceNotFound $ pack subpath
-       Just (_, bs) -> do
-         report $ Extracting $ pack fullpath
-         liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
-         logIOError $ BL.writeFile fullpath bs
+writeMedia dir (fp, _mt, bs) = do
+  -- we normalize to get proper path separators for the platform
+  let fullpath = normalise $ dir </> fp
+  liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+  logIOError $ BL.writeFile fullpath bs
 
 -- | If the given Inline element is an image with a @src@ path equal to
 -- one in the list of @paths@, then prepends @dir@ to the image source;
 -- returns the element unchanged otherwise.
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
-   | unpack src `elem` paths
-     = Image attr lab (pack (normalise $ dir </> unpack src), tit)
+adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline
+adjustImagePath dir mediabag (Image attr lab (src, tit)) =
+  case lookupMedia (T.unpack src) mediabag of
+    Nothing -> Image attr lab (src, tit)
+    Just item ->
+      let fullpath = dir <> "/" <> mediaPath item
+      in  Image attr lab (T.pack fullpath, tit)
 adjustImagePath _ _ x = x
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 374da161b..439aec071 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE TupleSections #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -37,7 +38,6 @@ module Text.Pandoc.Class.PandocMonad
   , setUserDataDir
   , getUserDataDir
   , fetchItem
-  , fetchMediaResource
   , getInputFiles
   , setInputFiles
   , getOutputFile
@@ -51,30 +51,31 @@ module Text.Pandoc.Class.PandocMonad
   , setTranslations
   , translateTerm
   , makeCanonical
+  , getTimestamp
   ) where
 
 import Codec.Archive.Zip
 import Control.Monad.Except (MonadError (catchError, throwError),
                              MonadTrans, lift, when)
-import Data.Digest.Pure.SHA (sha1, showDigest)
-import Data.Maybe (fromMaybe)
+import Data.List (foldl')
 import Data.Time (UTCTime)
-import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
+import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
+                             posixSecondsToUTCTime)
 import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
 import Network.URI ( escapeURIString, nonStrictRelativeTo,
                      unEscapeString, parseURIReference, isAllowedInURI,
                      parseURI, URI(..) )
-import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
-                        isRelative, splitDirectories)
+import System.FilePath ((</>), takeExtension, dropExtension,
+                        isRelative, splitDirectories, makeRelative)
 import System.Random (StdGen)
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import Text.Collate.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
-import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import Text.Pandoc.Shared (uriPathToPath)
+import Text.Pandoc.MIME (MimeType, getMimeType)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..))
+import Text.Pandoc.Shared (uriPathToPath, safeRead)
 import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
                                  readTranslations)
 import Text.Pandoc.Walk (walkM)
@@ -175,6 +176,21 @@ report msg = do
   when (level <= verbosity) $ logOutput msg
   modifyCommonState $ \st -> st{ stLog = msg : stLog st }
 
+-- | Get the time from the @SOURCE_DATE_EPOCH@
+-- environment variable. The variable should contain a
+-- unix time stamp, the number of seconds since midnight Jan 01
+-- 1970 UTC.  If the variable is not set or cannot be
+-- parsed as a unix time stamp, the current time is returned.
+-- This function is designed to make possible reproducible
+-- builds in formats that include a creation timestamp.
+getTimestamp :: PandocMonad m => m UTCTime
+getTimestamp = do
+  mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH"
+  case mbSourceDateEpoch >>= safeRead of
+    Just (epoch :: Integer) ->
+      return $ posixSecondsToUTCTime $ fromIntegral epoch
+    Nothing -> getCurrentTime
+
 -- | Determine whether tracing is enabled.  This affects
 -- the behavior of 'trace'.  If tracing is not enabled,
 -- 'trace' does nothing.
@@ -267,7 +283,7 @@ readFileFromDirs (d:ds) f = catchError
 toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
 toLang Nothing = return Nothing
 toLang (Just s) =
-  case parseBCP47 s of
+  case parseLang s of
        Left _ -> do
          report $ InvalidLang s
          return Nothing
@@ -358,7 +374,8 @@ fetchItem :: PandocMonad m
 fetchItem s = do
   mediabag <- getMediaBag
   case lookupMedia (T.unpack s) mediabag of
-    Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+    Just item -> return (BL.toStrict (mediaContents item),
+                         Just (mediaMimeType item))
     Nothing -> downloadOrRead s
 
 -- | Returns the content and, if available, the MIME type of a resource.
@@ -393,9 +410,10 @@ downloadOrRead s = do
             _ -> readLocalFile fp -- get from local file system
    where readLocalFile f = do
              resourcePath <- getResourcePath
-             cont <- if isRelative f
-                        then withPaths resourcePath readFileStrict f
-                        else readFileStrict f
+             (fp', cont) <- if isRelative f
+                               then withPaths resourcePath readFileStrict f
+                               else (f,) <$> readFileStrict f
+             report $ LoadedResource f (makeRelative "." fp')
              return (cont, mime)
          httpcolon = URI{ uriScheme = "http:",
                           uriAuthority = Nothing,
@@ -595,7 +613,7 @@ checkExistence fn = do
 -- | Canonicalizes a file path by removing redundant @.@ and @..@.
 makeCanonical :: FilePath -> FilePath
 makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- where  transformPathParts = reverse . foldl go []
+ where  transformPathParts = reverse . foldl' go []
         go as     "."  = as
         go (_:as) ".." = as
         go as     x    = x : as
@@ -605,25 +623,13 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
 -- that filepath. Returns the result of the first successful execution
 -- of the action, or throws a @PandocResourceNotFound@ exception if the
 -- action errors for all filepaths.
-withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths :: PandocMonad m
+          => [FilePath] -> (FilePath -> m a) -> FilePath -> m (FilePath, a)
 withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
 withPaths (p:ps) action fp =
-  catchError (action (p </> fp))
+  catchError ((p </> fp,) <$> action (p </> fp))
              (\_ -> withPaths ps action fp)
 
--- | Fetch local or remote resource (like an image) and provide data suitable
--- for adding it to the MediaBag.
-fetchMediaResource :: PandocMonad m
-              => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
-fetchMediaResource src = do
-  (bs, mt) <- downloadOrRead src
-  let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
-                      (mt >>= extensionFromMimeType)
-  let bs' = BL.fromChunks [bs]
-  let basename = showDigest $ sha1 bs'
-  let fname = basename <.> T.unpack ext
-  return (fname, mt, bs')
-
 -- | Traverse tree, filling media bag for any images that
 -- aren't already in the media bag.
 fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
@@ -631,12 +637,13 @@ fillMediaBag d = walkM handleImage d
   where handleImage :: PandocMonad m => Inline -> m Inline
         handleImage (Image attr lab (src, tit)) = catchError
           (do mediabag <- getMediaBag
-              case lookupMedia (T.unpack src) mediabag of
-                Just (_, _) -> return $ Image attr lab (src, tit)
+              let fp = T.unpack src
+              case lookupMedia fp mediabag of
+                Just _ -> return ()
                 Nothing -> do
-                  (fname, mt, bs) <- fetchMediaResource src
-                  insertMedia fname mt bs
-                  return $ Image attr lab (T.pack fname, tit))
+                  (bs, mt) <- fetchItem src
+                  insertMedia fp mt (BL.fromStrict bs)
+              return $ Image attr lab (src, tit))
           (\e ->
               case e of
                 PandocResourceNotFound _ -> do
diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs
index 38682b9f9..55ed3f5bf 100644
--- a/src/Text/Pandoc/Data.hs
+++ b/src/Text/Pandoc/Data.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE TemplateHaskell #-}
 {- |
 Module      : Text.Pandoc.Data
-Copyright   : Copyright (C) 2013-2020 John MacFarlane
+Copyright   : Copyright (C) 2013-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 2c311bb49..9dee8356b 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Error
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -16,38 +16,41 @@ encountered during parsing.
 -}
 module Text.Pandoc.Error (
   PandocError(..),
+  renderError,
   handleError) where
 
-import Control.Exception (Exception)
+import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
+import Data.List (sortOn)
 import qualified Data.Text as T
+import Data.Ord (Down(..))
 import GHC.Generics (Generic)
 import Network.HTTP.Client (HttpException)
 import System.Exit (ExitCode (..), exitWith)
 import System.IO (stderr)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
 import Text.Printf (printf)
 import Text.Parsec.Error
 import Text.Parsec.Pos hiding (Line)
 import Text.Pandoc.Shared (tshow)
 import Citeproc (CiteprocError, prettyCiteprocError)
 
-type Input = Text
-
 data PandocError = PandocIOError Text IOError
                  | PandocHttpError Text HttpException
                  | PandocShouldNeverHappenError Text
                  | PandocSomeError Text
                  | PandocParseError Text
-                 | PandocParsecError Input ParseError
+                 | PandocParsecError Sources ParseError
                  | PandocMakePDFError Text
                  | PandocOptionError Text
                  | PandocSyntaxMapError Text
                  | PandocFailOnWarningError
                  | PandocPDFProgramNotFoundError Text
                  | PandocPDFError Text
+                 | PandocXMLError Text Text
                  | PandocFilterError Text Text
                  | PandocLuaError Text
                  | PandocCouldNotFindDataFileError Text
@@ -58,6 +61,7 @@ data PandocError = PandocIOError Text IOError
                  | PandocMacroLoop Text
                  | PandocUTF8DecodingError Text Int Word8
                  | PandocIpynbDecodingError Text
+                 | PandocUnsupportedCharsetError Text
                  | PandocUnknownReaderError Text
                  | PandocUnknownWriterError Text
                  | PandocUnsupportedExtensionError Text Text
@@ -67,62 +71,70 @@ data PandocError = PandocIOError Text IOError
 
 instance Exception PandocError
 
--- | Handle PandocError by exiting with an error message.
-handleError :: Either PandocError a -> IO a
-handleError (Right r) = return r
-handleError (Left e) =
+renderError :: PandocError -> Text
+renderError e =
   case e of
-    PandocIOError _ err' -> ioError err'
-    PandocHttpError u err' -> err 61 $
+    PandocIOError _ err' -> T.pack $ displayException err'
+    PandocHttpError u err' ->
       "Could not fetch " <> u <> "\n" <> tshow err'
-    PandocShouldNeverHappenError s -> err 62 $
+    PandocShouldNeverHappenError s ->
       "Something we thought was impossible happened!\n" <>
       "Please report this to pandoc's developers: " <> s
-    PandocSomeError s -> err 63 s
-    PandocParseError s -> err 64 s
-    PandocParsecError input err' ->
+    PandocSomeError s -> s
+    PandocParseError s -> s
+    PandocParsecError (Sources inputs) err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
             errColumn = sourceColumn errPos
-            ls = T.lines input <> [""]
-            errorInFile = if length ls > errLine - 1
-                            then T.concat ["\n", ls !! (errLine - 1)
-                                          ,"\n", T.replicate (errColumn - 1) " "
-                                          ,"^"]
-                        else ""
-        in  err 65 $ "\nError at " <> tshow  err' <>
-                     -- if error comes from a chunk or included file,
-                     -- then we won't get the right text this way:
-                     if sourceName errPos == "source"
-                        then errorInFile
-                        else ""
-    PandocMakePDFError s -> err 66 s
-    PandocOptionError s -> err 6 s
-    PandocSyntaxMapError s -> err 67 s
-    PandocFailOnWarningError -> err 3 "Failing because there were warnings."
-    PandocPDFProgramNotFoundError pdfprog -> err 47 $
+            errFile = sourceName errPos
+            errorInFile =
+              case sortOn (Down . sourceLine . fst)
+                      [ (pos,t)
+                        | (pos,t) <- inputs
+                        , sourceName pos == errFile
+                        , sourceLine pos <= errLine
+                      ] of
+                []  -> ""
+                ((pos,txt):_) ->
+                  let ls = T.lines txt <> [""]
+                      ln = (errLine - sourceLine pos) + 1
+                   in if length ls > ln && ln >= 1
+                         then T.concat ["\n", ls !! (ln - 1)
+                                       ,"\n", T.replicate (errColumn - 1) " "
+                                       ,"^"]
+                         else ""
+        in  "Error at " <> tshow  err' <> errorInFile
+    PandocMakePDFError s -> s
+    PandocOptionError s -> s
+    PandocSyntaxMapError s -> s
+    PandocFailOnWarningError -> "Failing because there were warnings."
+    PandocPDFProgramNotFoundError pdfprog ->
         pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
-    PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
-    PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
+    PandocPDFError logmsg -> "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> "Invalid XML" <>
+        (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
+    PandocFilterError filtername msg -> "Error running filter " <>
         filtername <> ":\n" <> msg
-    PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
-    PandocCouldNotFindDataFileError fn -> err 97 $
+    PandocLuaError msg -> "Error running Lua:\n" <> msg
+    PandocCouldNotFindDataFileError fn ->
         "Could not find data file " <> fn
-    PandocResourceNotFound fn -> err 99 $
+    PandocResourceNotFound fn ->
         "File " <> fn <> " not found in resource path"
-    PandocTemplateError s -> err 5 $ "Error compiling template " <> s
-    PandocAppError s -> err 4 s
-    PandocEpubSubdirectoryError s -> err 31 $
+    PandocTemplateError s -> "Error compiling template " <> s
+    PandocAppError s -> s
+    PandocEpubSubdirectoryError s ->
       "EPUB subdirectory name '" <> s <> "' contains illegal characters"
-    PandocMacroLoop s -> err 91 $
+    PandocMacroLoop s ->
       "Loop encountered in expanding macro " <> s
-    PandocUTF8DecodingError f offset w -> err 92 $
+    PandocUTF8DecodingError f offset w ->
       "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <>
       " (" <> T.pack (printf "%2x" w) <> ").\n" <>
       "The input must be a UTF-8 encoded text."
-    PandocIpynbDecodingError w -> err 93 $
+    PandocIpynbDecodingError w ->
       "ipynb decoding error: " <> w
-    PandocUnknownReaderError r -> err 21 $
+    PandocUnsupportedCharsetError charset ->
+      "Unsupported charset " <> charset
+    PandocUnknownReaderError r ->
       "Unknown input format " <> r <>
       case r of
         "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <>
@@ -130,7 +142,7 @@ handleError (Left e) =
                  " and convert that with pandoc."
         "pdf" -> "\nPandoc can convert to PDF, but not from PDF."
         _     -> ""
-    PandocUnknownWriterError w -> err 22 $
+    PandocUnknownWriterError w ->
        "Unknown output format " <> w <>
        case w of
          "pdf" -> "To create a pdf using pandoc, use" <>
@@ -139,16 +151,57 @@ handleError (Left e) =
                  ".pdf extension (-o filename.pdf)."
          "doc" -> "\nPandoc can convert to DOCX, but not to DOC."
          _     -> ""
-    PandocUnsupportedExtensionError ext f -> err 23 $
+    PandocUnsupportedExtensionError ext f ->
       "The extension " <> ext <> " is not supported " <>
       "for " <> f
-    PandocCiteprocError e' -> err 24 $
+    PandocCiteprocError e' ->
       prettyCiteprocError e'
-    PandocBibliographyError fp msg -> err 25 $
+    PandocBibliographyError fp msg ->
       "Error reading bibliography file " <> fp <> ":\n" <> msg
 
+
+-- | Handle PandocError by exiting with an error message.
+handleError :: Either PandocError a -> IO a
+handleError (Right r) = return r
+handleError (Left e) =
+  case e of
+    PandocIOError _ err' -> ioError err'
+    _ -> err exitCode (renderError e)
+ where
+  exitCode =
+    case e of
+      PandocIOError{} -> 1
+      PandocHttpError{} -> 61
+      PandocShouldNeverHappenError{} -> 62
+      PandocSomeError{} -> 63
+      PandocParseError{} -> 64
+      PandocParsecError{} -> 65
+      PandocMakePDFError{} -> 66
+      PandocOptionError{} -> 6
+      PandocSyntaxMapError{} -> 67
+      PandocFailOnWarningError{} -> 3
+      PandocPDFProgramNotFoundError{} -> 47
+      PandocPDFError{} -> 43
+      PandocXMLError{} -> 44
+      PandocFilterError{} -> 83
+      PandocLuaError{} -> 84
+      PandocCouldNotFindDataFileError{} -> 97
+      PandocResourceNotFound{} -> 99
+      PandocTemplateError{} -> 5
+      PandocAppError{} -> 4
+      PandocEpubSubdirectoryError{} -> 31
+      PandocMacroLoop{} -> 91
+      PandocUTF8DecodingError{} -> 92
+      PandocIpynbDecodingError{} -> 93
+      PandocUnsupportedCharsetError{} -> 94
+      PandocUnknownReaderError{} -> 21
+      PandocUnknownWriterError{} -> 22
+      PandocUnsupportedExtensionError{} -> 23
+      PandocCiteprocError{} -> 24
+      PandocBibliographyError{} -> 25
+
 err :: Int -> Text -> IO a
 err exitCode msg = do
-  UTF8.hPutStrLn stderr (T.unpack msg)
+  UTF8.hPutStrLn stderr msg
   exitWith $ ExitFailure exitCode
   return undefined
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 646f7abfb..9c55d0a7a 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Extensions
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..)
 where
 import Data.Bits (clearBit, setBit, testBit, (.|.))
 import Data.Data (Data)
+import Data.List (foldl')
 import qualified Data.Text as T
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
@@ -88,6 +89,7 @@ data Extension =
                                   --   does not affect readers/writers directly; it causes
                                   --   the eastAsianLineBreakFilter to be applied after
                                   --   parsing, in Text.Pandoc.App.convertWithOpts.
+    | Ext_element_citations   -- ^ Use element-citation elements for JATS citations
     | Ext_emoji               -- ^ Support emoji like :smile:
     | Ext_empty_paragraphs -- ^ Allow empty paragraphs
     | Ext_epub_html_exts      -- ^ Recognise the EPUB extended version of HTML
@@ -134,6 +136,8 @@ data Extension =
     | Ext_raw_html            -- ^ Allow raw HTML
     | Ext_raw_tex             -- ^ Allow raw TeX (other than math)
     | Ext_raw_markdown        -- ^ Parse markdown in ipynb as raw markdown
+    | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths,
+                                -- relative to directory of containing file
     | Ext_shortcut_reference_links -- ^ Shortcut reference links
     | Ext_simple_tables       -- ^ Pandoc-style simple tables
     | Ext_smart               -- ^ "Smart" quotes, apostrophes, ellipses, dashes
@@ -149,9 +153,12 @@ data Extension =
     | Ext_tex_math_dollars    -- ^ TeX math between $..$ or $$..$$
     | Ext_tex_math_double_backslash  -- ^ TeX math btw \\(..\\) \\[..\\]
     | Ext_tex_math_single_backslash  -- ^ TeX math btw \(..\) \[..\]
+    | Ext_xrefs_name          -- ^ Use xrefs with names
+    | Ext_xrefs_number        -- ^ Use xrefs with numbers
     | Ext_yaml_metadata_block -- ^ YAML metadata block
     | Ext_gutenberg           -- ^ Use Project Gutenberg conventions for plain
     | Ext_attributes          -- ^ Generic attribute syntax
+    | Ext_sourcepos           -- ^ Include source position attributes
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
 -- | Extensions to be used with pandoc-flavored markdown.
@@ -349,6 +356,7 @@ getDefaultExtensions "gfm"             = extensionsFromList
   , Ext_strikeout
   , Ext_task_lists
   , Ext_emoji
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "commonmark"      = extensionsFromList
                                           [Ext_raw_html]
@@ -374,10 +382,11 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   , Ext_raw_attribute
   , Ext_implicit_header_references
   , Ext_attributes
-  , Ext_fenced_code_attributes
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
+                                           Ext_task_lists,
                                            Ext_auto_identifiers]
 getDefaultExtensions "html"            = extensionsFromList
                                           [Ext_auto_identifiers,
@@ -409,6 +418,11 @@ getDefaultExtensions "textile"         = extensionsFromList
                                            Ext_smart,
                                            Ext_raw_html,
                                            Ext_auto_identifiers]
+getDefaultExtensions "jats"            = extensionsFromList
+                                          [Ext_auto_identifiers]
+getDefaultExtensions "jats_archiving"  = getDefaultExtensions "jats"
+getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats"
+getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats"
 getDefaultExtensions "opml"            = pandocExtensions -- affects notes
 getDefaultExtensions _                 = extensionsFromList
                                           [Ext_auto_identifiers]
@@ -450,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_gutenberg
        , Ext_smart
        , Ext_literate_haskell
+       , Ext_rebase_relative_paths
        ]
   getAll "markdown_strict"   = allMarkdownExtensions
   getAll "markdown_phpextra" = allMarkdownExtensions
@@ -465,6 +480,8 @@ getAllExtensions f = universalExtensions <> getAll f
   getAll "opendocument"    = extensionsFromList
     [ Ext_empty_paragraphs
     , Ext_native_numbering
+    , Ext_xrefs_name
+    , Ext_xrefs_number
     ]
   getAll "odt"             = getAll "opendocument" <> autoIdExtensions
   getAll "muse"            = autoIdExtensions <>
@@ -498,13 +515,16 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_raw_attribute
     , Ext_implicit_header_references
     , Ext_attributes
-    , Ext_fenced_code_attributes
+    , Ext_sourcepos
+    , Ext_yaml_metadata_block
+    , Ext_rebase_relative_paths
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
     extensionsFromList
     [ Ext_citations
     , Ext_smart
+    , Ext_task_lists
     ]
   getAll "html"            = autoIdExtensions <>
     extensionsFromList
@@ -548,6 +568,14 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_smart
     , Ext_raw_tex
     ]
+  getAll "jats"            =
+    extensionsFromList
+    [ Ext_auto_identifiers
+    , Ext_element_citations
+    ]
+  getAll "jats_archiving"  = getAll "jats"
+  getAll "jats_publishing" = getAll "jats"
+  getAll "jats_articleauthoring" = getAll "jats"
   getAll "opml"            = allMarkdownExtensions -- affects notes
   getAll "twiki"           = autoIdExtensions <>
     extensionsFromList
@@ -573,7 +601,7 @@ parseFormatSpec :: T.Text
 parseFormatSpec = parse formatSpec ""
   where formatSpec = do
           name <- formatName
-          (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$>
+          (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$>
                                              many extMod
           return (T.pack name, reverse extsToEnable, reverse extsToDisable)
         formatName = many1 $ noneOf "-+"
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
index 6d4846f98..1209ceeb7 100644
--- a/src/Text/Pandoc/Filter.hs
+++ b/src/Text/Pandoc/Filter.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs
index 83ec9a97c..d2323fac4 100644
--- a/src/Text/Pandoc/Filter/JSON.hs
+++ b/src/Text/Pandoc/Filter/JSON.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs
index a76c8da2f..c238e53d9 100644
--- a/src/Text/Pandoc/Filter/Lua.hs
+++ b/src/Text/Pandoc/Filter/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Lua
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs
index 9e5e8fa77..1d16c53b9 100644
--- a/src/Text/Pandoc/Filter/Path.hs
+++ b/src/Text/Pandoc/Filter/Path.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Path
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index ce8880f84..62a261e50 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Highlighting
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -52,12 +52,12 @@ highlightingStyles =
    ("breezedark", breezeDark),
    ("haddock", haddock)]
 
-languages :: [T.Text]
-languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap]
+languages :: SyntaxMap -> [T.Text]
+languages syntaxmap = [T.toLower (sName s) | s <- M.elems syntaxmap]
 
-languagesByExtension :: T.Text -> [T.Text]
-languagesByExtension ext =
-  [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)]
+languagesByExtension :: SyntaxMap -> T.Text -> [T.Text]
+languagesByExtension syntaxmap ext =
+  [T.toLower (sName s) | s <- syntaxesByExtension syntaxmap (T.unpack ext)]
 
 highlight :: SyntaxMap
           -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs
index e37de4e00..e0c938938 100644
--- a/src/Text/Pandoc/Image.hs
+++ b/src/Text/Pandoc/Image.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
 {- |
 Module      : Text.Pandoc.Image
-Copyright   : Copyright (C) 2020 John MacFarlane
+Copyright   : Copyright (C) 2020-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index fc9e1854b..2b7d10611 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,9 +1,9 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
 {-# LANGUAGE ViewPatterns      #-}
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {- |
 Module      : Text.Pandoc.ImageSize
-Copyright   : Copyright (C) 2011-2020 John MacFarlane
+Copyright   : Copyright (C) 2011-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -32,33 +32,33 @@ module Text.Pandoc.ImageSize ( ImageType(..)
                              , showInPixel
                              , showFl
                              ) where
-import Data.ByteString (ByteString, unpack)
+import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy as BL
+import Data.Binary.Get
 import Data.Char (isDigit)
 import Control.Monad
-import Data.Bits
-import Data.Binary
-import Data.Binary.Get
 import Text.Pandoc.Shared (safeRead)
 import Data.Default (Default)
 import Numeric (showFFloat)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Text.XML.Light as Xml
-import qualified Data.Map as M
+import Text.Pandoc.XML.Light hiding (Attr)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
-import Control.Monad.Except
 import Control.Applicative
-import Data.Maybe (fromMaybe)
 import qualified Data.Attoparsec.ByteString.Char8 as A
+import qualified Codec.Picture.Metadata as Metadata
+import qualified Codec.Picture.Metadata.Exif as Exif
+import Codec.Picture (decodeImageWithMetadata)
 
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
 
-data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show
+data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf | Tiff
+                 deriving Show
 data Direction = Width | Height
 instance Show Direction where
   show Width  = "width"
@@ -101,6 +101,8 @@ imageType :: ByteString -> Maybe ImageType
 imageType img = case B.take 4 img of
                      "\x89\x50\x4e\x47" -> return Png
                      "\x47\x49\x46\x38" -> return Gif
+                     "\x49\x49\x2a\x00" -> return Tiff
+                     "\x4D\x4D\x00\x2a" -> return Tiff
                      "\xff\xd8\xff\xe0" -> return Jpeg  -- JFIF
                      "\xff\xd8\xff\xe1" -> return Jpeg  -- Exif
                      "%PDF"             -> return Pdf
@@ -122,9 +124,10 @@ findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img
 imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize
 imageSize opts img = checkDpi <$>
   case imageType img of
-       Just Png  -> mbToEither "could not determine PNG size" $ pngSize img
-       Just Gif  -> mbToEither "could not determine GIF size" $ gifSize img
-       Just Jpeg -> jpegSize img
+       Just Png  -> getSize img
+       Just Gif  -> getSize img
+       Just Jpeg -> getSize img
+       Just Tiff -> getSize img
        Just Svg  -> mbToEither "could not determine SVG size" $ svgSize opts img
        Just Eps  -> mbToEither "could not determine EPS size" $ epsSize img
        Just Pdf  -> mbToEither "could not determine PDF size" $ pdfSize img
@@ -139,9 +142,6 @@ imageSize opts img = checkDpi <$>
               , dpiY = if dpiY size == 0 then 72 else dpiY size }
 
 
-defaultSize :: (Integer, Integer)
-defaultSize = (72, 72)
-
 sizeInPixels :: ImageSize -> (Integer, Integer)
 sizeInPixels s = (pxX s, pxY s)
 
@@ -300,59 +300,50 @@ pPdfSize = do
             , dpiY = 72 }
    ) <|> pPdfSize
 
-pngSize :: ByteString -> Maybe ImageSize
-pngSize img = do
-  let (h, rest) = B.splitAt 8 img
-  guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
-          h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
-  let (i, rest') = B.splitAt 4 $ B.drop 4 rest
-  guard $ i == "MHDR" || i == "IHDR"
-  let (sizes, rest'') = B.splitAt 8 rest'
-  (x,y) <- case map fromIntegral $unpack sizes of
-                ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
-                    (shift w1 24 + shift w2 16 + shift w3 8 + w4,
-                     shift h1 24 + shift h2 16 + shift h3 8 + h4)
-                _ -> Nothing -- "PNG parse error"
-  (dpix, dpiy) <- findpHYs rest''
-  return ImageSize { pxX  = x, pxY = y, dpiX = dpix, dpiY = dpiy }
-
-findpHYs :: ByteString -> Maybe (Integer, Integer)
-findpHYs x
-  | B.null x || "IDAT" `B.isPrefixOf` x = return (72,72)
-  | "pHYs" `B.isPrefixOf` x =
-    case map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x of
-         [x1,x2,x3,x4,y1,y2,y3,y4,u] -> do
-           let factor = if u == 1 -- dots per meter
-                          then \z -> z * 254 `div` 10000
-                          else const 72
-           return
-              ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4,
-                factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 )
-         _ -> mzero
-  | otherwise = findpHYs $ B.drop 1 x  -- read another byte
-
-gifSize :: ByteString -> Maybe ImageSize
-gifSize img = do
-  let (h, rest) = B.splitAt 6 img
-  guard $ h == "GIF87a" || h == "GIF89a"
-  case map fromIntegral $ unpack $ B.take 4 rest of
-       [w2,w1,h2,h1] -> return ImageSize {
-                          pxX  = shift w1 8 + w2,
-                          pxY  = shift h1 8 + h2,
-                          dpiX = 72,
-                          dpiY = 72
-                          }
-       _             -> Nothing -- "GIF parse error"
+getSize :: ByteString -> Either T.Text ImageSize
+getSize img =
+  case decodeImageWithMetadata img of
+    Left e -> Left (T.pack e)
+    Right (_, meta) -> do
+      pxx <- maybe (Left "Could not determine width") Right $
+                   -- first look for exif image width, then width
+                   (Metadata.lookup
+                     (Metadata.Exif (Exif.TagUnknown 0xA002)) meta >>=
+                       exifDataToWord) <|>
+                   Metadata.lookup Metadata.Width meta
+      pxy <- maybe (Left "Could not determine height") Right $
+                  -- first look for exif image height, then height
+                  (Metadata.lookup
+                     (Metadata.Exif (Exif.TagUnknown 0xA003)) meta >>=
+                       exifDataToWord) <|>
+                  Metadata.lookup Metadata.Height meta
+      dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta
+      dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta
+      return $ ImageSize
+                { pxX = fromIntegral pxx
+                , pxY = fromIntegral pxy
+                , dpiX = fromIntegral dpix
+                , dpiY = fromIntegral dpiy }
+ where
+  exifDataToWord (Exif.ExifLong x) = Just $ fromIntegral x
+  exifDataToWord (Exif.ExifShort x) = Just $ fromIntegral x
+  exifDataToWord _ = Nothing
+
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
-  doc <- Xml.parseXMLDoc $ UTF8.toString img
+  doc <- either (const mzero) return $ parseXMLElement
+                                     $ TL.fromStrict $ UTF8.toText img
+  let viewboxSize = do
+        vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc
+        [_,_,w,h] <- mapM safeRead (T.words vb)
+        return (w,h)
   let dpi = fromIntegral $ writerDpi opts
   let dirToInt dir = do
-        dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
+        dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim
         return $ inPixel opts dim
-  w <- dirToInt "width"
-  h <- dirToInt "height"
+  w <- dirToInt "width" <|> (fst <$> viewboxSize)
+  h <- dirToInt "height" <|> (snd <$> viewboxSize)
   return ImageSize {
     pxX  = w
   , pxY  = h
@@ -390,280 +381,3 @@ emfSize img =
     case parseheader . BL.fromStrict $ img of
       Left _ -> Nothing
       Right (_, _, size) -> Just size
-
-
-jpegSize :: ByteString -> Either T.Text ImageSize
-jpegSize img =
-  let (hdr, rest) = B.splitAt 4 img
-  in if B.length rest < 14
-        then Left "unable to determine JPEG size"
-        else case hdr of
-               "\xff\xd8\xff\xe0" -> jfifSize rest
-               "\xff\xd8\xff\xe1" -> exifSize rest
-               _                  -> Left "unable to determine JPEG size"
-
-jfifSize :: ByteString -> Either T.Text ImageSize
-jfifSize rest =
-  case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of
-    [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] ->
-      let factor = case dpiDensity of
-                        1 -> id
-                        2 -> \x -> x * 254 `div` 10
-                        _ -> const 72
-          dpix = factor (shift dpix1 8 + dpix2)
-          dpiy = factor (shift dpiy1 8 + dpiy2)
-      in case findJfifSize rest of
-         Left msg    -> Left msg
-         Right (w,h) -> Right ImageSize { pxX = w
-                                        , pxY = h
-                                        , dpiX = dpix
-                                        , dpiY = dpiy }
-    _ -> Left "unable to determine JFIF size"
-
-findJfifSize :: ByteString -> Either T.Text (Integer,Integer)
-findJfifSize bs =
-  let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
-  in case B.uncons bs' of
-       Just (c,bs'') | c >= '\xc0' && c <= '\xc3' ->
-         case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
-              [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2)
-              _             -> Left "JFIF parse error"
-       Just (_,bs'') ->
-         case map fromIntegral $ unpack $ B.take 2 bs'' of
-              [c1,c2] ->
-                let len = shift c1 8 + c2
-                -- skip variables
-                in  findJfifSize $ B.drop len bs''
-              _       -> Left "JFIF parse error"
-       Nothing -> Left "Did not find JFIF length record"
-
-runGet' :: Get (Either T.Text a) -> BL.ByteString -> Either T.Text a
-runGet' p bl =
-#if MIN_VERSION_binary(0,7,0)
-  case runGetOrFail p bl of
-       Left (_,_,msg) -> Left $ T.pack msg
-       Right (_,_,x)  -> x
-#else
-  runGet p bl
-#endif
-
-exifSize :: ByteString -> Either T.Text ImageSize
-exifSize bs = runGet' header bl
-  where bl = BL.fromChunks [bs]
-        header = runExceptT $ exifHeader bl
--- NOTE:  It would be nicer to do
--- runGet ((Just <$> exifHeader) <|> return Nothing)
--- which would prevent pandoc from raising an error when an exif header can't
--- be parsed.  But we only get an Alternative instance for Get in binary 0.6,
--- and binary 0.5 ships with ghc 7.6.
-
-exifHeader :: BL.ByteString -> ExceptT T.Text Get ImageSize
-exifHeader hdr = do
-  _app1DataSize <- lift getWord16be
-  exifHdr <- lift getWord32be
-  unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
-  zeros <- lift getWord16be
-  unless (zeros == 0) $ throwError "Expected zeros after exif header"
-  -- beginning of tiff header -- we read whole thing to use
-  -- in getting data from offsets:
-  let tiffHeader = BL.drop 8 hdr
-  byteAlign <- lift getWord16be
-  let bigEndian = byteAlign == 0x4d4d
-  let (getWord16, getWord32, getWord64) =
-        if bigEndian
-           then (getWord16be, getWord32be, getWord64be)
-           else (getWord16le, getWord32le, getWord64le)
-  let getRational = do
-        num <- getWord32
-        den <- getWord32
-        return $ fromIntegral num / fromIntegral den
-  tagmark <- lift getWord16
-  unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
-  ifdOffset <- lift getWord32
-  lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
-  numentries <- lift  getWord16
-  let ifdEntry :: ExceptT T.Text Get (TagType, DataFormat)
-      ifdEntry = do
-       tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
-                <$> lift getWord16
-       dataFormat <- lift getWord16
-       numComponents <- lift getWord32
-       (fmt, bytesPerComponent) <-
-             case dataFormat of
-                  1  -> return (UnsignedByte <$> getWord8, 1)
-                  2  -> return (AsciiString <$>
-                                getLazyByteString
-                                (fromIntegral numComponents), 1)
-                  3  -> return (UnsignedShort <$> getWord16, 2)
-                  4  -> return (UnsignedLong <$> getWord32, 4)
-                  5  -> return (UnsignedRational <$> getRational, 8)
-                  6  -> return (SignedByte <$> getWord8, 1)
-                  7  -> return (Undefined <$> getLazyByteString
-                                (fromIntegral numComponents), 1)
-                  8  -> return (SignedShort <$> getWord16, 2)
-                  9  -> return (SignedLong <$> getWord32, 4)
-                  10 -> return (SignedRational <$> getRational, 8)
-                  11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4)
-                  12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8)
-                  _  -> throwError $ "Unknown data format " <> T.pack (show dataFormat)
-       let totalBytes = fromIntegral $ numComponents * bytesPerComponent
-       payload <- if totalBytes <= 4 -- data is right here
-                     then lift $ fmt <* skip (4 - totalBytes)
-                     else do  -- get data from offset
-                          offs <- lift getWord32
-                          let bytesAtOffset =
-                                 BL.take (fromIntegral totalBytes)
-                                 $ BL.drop (fromIntegral offs) tiffHeader
-                          case runGet' (Right <$> fmt) bytesAtOffset of
-                               Left msg -> throwError msg
-                               Right x  -> return x
-       return (tag, payload)
-  entries <- replicateM (fromIntegral numentries) ifdEntry
-  subentries <- case lookup ExifOffset entries of
-                      Just (UnsignedLong offset') -> do
-                        pos <- lift bytesRead
-                        lift $ skip (fromIntegral offset' - (fromIntegral pos - 8))
-                        numsubentries <- lift getWord16
-                        replicateM (fromIntegral numsubentries) ifdEntry
-                      _ -> return []
-  let allentries = entries ++ subentries
-  (wdth, hght) <- case (lookup ExifImageWidth allentries,
-                        lookup ExifImageHeight allentries) of
-                       (Just (UnsignedLong w), Just (UnsignedLong h)) ->
-                         return (fromIntegral w, fromIntegral h)
-                       _ -> return defaultSize
-                            -- we return a default width and height when
-                            -- the exif header doesn't contain these
-  let resfactor = case lookup ResolutionUnit allentries of
-                        Just (UnsignedShort 1) -> 100 / 254
-                        _ -> 1
-  let xres = case lookup XResolution allentries of
-                  Just (UnsignedRational x) -> floor (x * resfactor)
-                  _ -> 72
-  let yres = case lookup YResolution allentries of
-                  Just (UnsignedRational y) -> floor (y * resfactor)
-                  _ -> 72
-  return ImageSize{
-                    pxX  = wdth
-                  , pxY  = hght
-                  , dpiX = xres
-                  , dpiY = yres }
-
-data DataFormat = UnsignedByte Word8
-                | AsciiString BL.ByteString
-                | UnsignedShort Word16
-                | UnsignedLong Word32
-                | UnsignedRational Rational
-                | SignedByte Word8
-                | Undefined BL.ByteString
-                | SignedShort Word16
-                | SignedLong Word32
-                | SignedRational Rational
-                | SingleFloat Word32
-                | DoubleFloat Word64
-                deriving (Show)
-
-data TagType = ImageDescription
-             | Make
-             | Model
-             | Orientation
-             | XResolution
-             | YResolution
-             | ResolutionUnit
-             | Software
-             | DateTime
-             | WhitePoint
-             | PrimaryChromaticities
-             | YCbCrCoefficients
-             | YCbCrPositioning
-             | ReferenceBlackWhite
-             | Copyright
-             | ExifOffset
-             | ExposureTime
-             | FNumber
-             | ExposureProgram
-             | ISOSpeedRatings
-             | ExifVersion
-             | DateTimeOriginal
-             | DateTimeDigitized
-             | ComponentConfiguration
-             | CompressedBitsPerPixel
-             | ShutterSpeedValue
-             | ApertureValue
-             | BrightnessValue
-             | ExposureBiasValue
-             | MaxApertureValue
-             | SubjectDistance
-             | MeteringMode
-             | LightSource
-             | Flash
-             | FocalLength
-             | MakerNote
-             | UserComment
-             | FlashPixVersion
-             | ColorSpace
-             | ExifImageWidth
-             | ExifImageHeight
-             | RelatedSoundFile
-             | ExifInteroperabilityOffset
-             | FocalPlaneXResolution
-             | FocalPlaneYResolution
-             | FocalPlaneResolutionUnit
-             | SensingMethod
-             | FileSource
-             | SceneType
-             | UnknownTagType
-             deriving (Show, Eq, Ord)
-
-tagTypeTable :: M.Map Word16 TagType
-tagTypeTable = M.fromList
-  [ (0x010e, ImageDescription)
-  , (0x010f, Make)
-  , (0x0110, Model)
-  , (0x0112, Orientation)
-  , (0x011a, XResolution)
-  , (0x011b, YResolution)
-  , (0x0128, ResolutionUnit)
-  , (0x0131, Software)
-  , (0x0132, DateTime)
-  , (0x013e, WhitePoint)
-  , (0x013f, PrimaryChromaticities)
-  , (0x0211, YCbCrCoefficients)
-  , (0x0213, YCbCrPositioning)
-  , (0x0214, ReferenceBlackWhite)
-  , (0x8298, Copyright)
-  , (0x8769, ExifOffset)
-  , (0x829a, ExposureTime)
-  , (0x829d, FNumber)
-  , (0x8822, ExposureProgram)
-  , (0x8827, ISOSpeedRatings)
-  , (0x9000, ExifVersion)
-  , (0x9003, DateTimeOriginal)
-  , (0x9004, DateTimeDigitized)
-  , (0x9101, ComponentConfiguration)
-  , (0x9102, CompressedBitsPerPixel)
-  , (0x9201, ShutterSpeedValue)
-  , (0x9202, ApertureValue)
-  , (0x9203, BrightnessValue)
-  , (0x9204, ExposureBiasValue)
-  , (0x9205, MaxApertureValue)
-  , (0x9206, SubjectDistance)
-  , (0x9207, MeteringMode)
-  , (0x9208, LightSource)
-  , (0x9209, Flash)
-  , (0x920a, FocalLength)
-  , (0x927c, MakerNote)
-  , (0x9286, UserComment)
-  , (0xa000, FlashPixVersion)
-  , (0xa001, ColorSpace)
-  , (0xa002, ExifImageWidth)
-  , (0xa003, ExifImageHeight)
-  , (0xa004, RelatedSoundFile)
-  , (0xa005, ExifInteroperabilityOffset)
-  , (0xa20e, FocalPlaneXResolution)
-  , (0xa20f, FocalPlaneYResolution)
-  , (0xa210, FocalPlaneResolutionUnit)
-  , (0xa217, SensingMethod)
-  , (0xa300, FileSource)
-  , (0xa301, SceneType)
-  ]
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 825fdaadb..193b8b61c 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Logging
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -36,6 +36,7 @@ import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
 import Text.Pandoc.Definition
 import Text.Parsec.Pos
+import Text.Pandoc.Shared (tshow)
 
 -- | Verbosity level.
 data Verbosity = ERROR | WARNING | INFO
@@ -84,6 +85,7 @@ data LogMessage =
   | CouldNotParseCSS Text
   | Fetching Text
   | Extracting Text
+  | LoadedResource FilePath FilePath
   | NoTitleElement Text
   | NoLangSpecified
   | InvalidLang Text
@@ -100,6 +102,8 @@ data LogMessage =
   | FilterCompleted FilePath Integer
   | CiteprocWarning Text
   | ATXHeadingInLHS Int Text
+  | EnvironmentVariableUndefined Text
+  | DuplicateAttribute Text Text
   deriving (Show, Eq, Data, Ord, Typeable, Generic)
 
 instance ToJSON LogMessage where
@@ -192,6 +196,9 @@ instance ToJSON LogMessage where
            ["path" .= fp]
       Extracting fp ->
            ["path" .= fp]
+      LoadedResource orig found ->
+           ["for"  .= orig
+           ,"from" .= found]
       NoTitleElement fallback ->
            ["fallback" .= fallback]
       NoLangSpecified -> []
@@ -229,13 +236,20 @@ instance ToJSON LogMessage where
       ATXHeadingInLHS lvl contents ->
            ["level" .= lvl
            ,"contents" .= contents]
+      EnvironmentVariableUndefined var ->
+           ["variable" .= var ]
+      DuplicateAttribute attr val ->
+           ["attribute" .= attr
+           ,"value" .= val]
 
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
      show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
-  where sn = if sourceName pos == "source" || sourceName pos == ""
-                then ""
-                else sourceName pos ++ " "
+  where
+    sn' = sourceName pos
+    sn = if sn' == "source" || sn' == "" || sn' == "-"
+            then ""
+            else sn' ++ " "
 
 encodeLogMessages :: [LogMessage] -> BL.ByteString
 encodeLogMessages ms =
@@ -268,7 +282,7 @@ showLogMessage msg =
        ParsingUnescaped s pos ->
          "Parsing unescaped '" <> s <> "' at " <> showPos pos
        CouldNotLoadIncludeFile fp pos ->
-         "Could not load include file '" <> fp <> "' at " <> showPos pos
+         "Could not load include file " <> fp <> " at " <> showPos pos
        MacroAlreadyDefined name pos ->
          "Macro '" <> name <> "' already defined, ignoring at " <> showPos pos
        InlineNotRendered il ->
@@ -280,18 +294,18 @@ showLogMessage msg =
        IgnoredIOError s ->
          "IO Error (ignored): " <> s
        CouldNotFetchResource fp s ->
-         "Could not fetch resource '" <> fp <> "'" <>
+         "Could not fetch resource " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotDetermineImageSize fp s ->
-         "Could not determine image size for '" <> fp <> "'" <>
+         "Could not determine image size for " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotConvertImage fp s ->
-         "Could not convert image '" <> fp <> "'" <>
+         "Could not convert image " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotDetermineMimeType fp ->
-         "Could not determine mime type for '" <> fp <> "'"
+         "Could not determine mime type for " <> fp
        CouldNotConvertTeXMath s m ->
-         "Could not convert TeX math '" <> s <> "', rendering as TeX" <>
+         "Could not convert TeX math " <> s <> ", rendering as TeX" <>
            if Text.null m then "" else ":\n" <> m
        CouldNotParseCSS m ->
          "Could not parse CSS" <> if Text.null m then "" else ":\n" <> m
@@ -299,6 +313,8 @@ showLogMessage msg =
          "Fetching " <> fp <> "..."
        Extracting fp ->
          "Extracting " <> fp <> "..."
+       LoadedResource orig found ->
+         "Loaded " <> Text.pack orig <> " from " <> Text.pack found
        NoTitleElement fallback ->
          "This document format requires a nonempty <title> element.\n" <>
          "Defaulting to '" <> fallback <> "' as the title.\n" <>
@@ -345,6 +361,10 @@ showLogMessage msg =
          if lvl < 3
             then " Consider using --markdown-headings=setext."
             else ""
+       EnvironmentVariableUndefined var ->
+         "Undefined environment variable " <> var <> " in defaults file."
+       DuplicateAttribute attr val ->
+         "Ignoring duplicate attribute " <> attr <> "=" <> tshow val <> "."
 
 messageVerbosity :: LogMessage -> Verbosity
 messageVerbosity msg =
@@ -375,6 +395,7 @@ messageVerbosity msg =
        CouldNotParseCSS{}            -> WARNING
        Fetching{}                    -> INFO
        Extracting{}                  -> INFO
+       LoadedResource{}              -> INFO
        NoTitleElement{}              -> WARNING
        NoLangSpecified               -> INFO
        InvalidLang{}                 -> WARNING
@@ -391,3 +412,5 @@ messageVerbosity msg =
        FilterCompleted{}             -> INFO
        CiteprocWarning{}             -> WARNING
        ATXHeadingInLHS{}             -> WARNING
+       EnvironmentVariableUndefined{}-> WARNING
+       DuplicateAttribute{}          -> WARNING
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index 39db0074a..f0e9e076b 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017–2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs
index 59c962723..4e6880722 100644
--- a/src/Text/Pandoc/Lua/ErrorConversion.hs
+++ b/src/Text/Pandoc/Lua/ErrorConversion.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.ErrorConversion
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 94d7adeb2..01bf90efa 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -1,8 +1,8 @@
 {-# LANGUAGE FlexibleContexts  #-}
 {- |
 Module      : Text.Pandoc.Lua.Filter
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
@@ -13,7 +13,9 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction
                               , LuaFilter
                               , runFilterFile
                               , walkInlines
+                              , walkInlineLists
                               , walkBlocks
+                              , walkBlockLists
                               , module Text.Pandoc.Lua.Walk
                               ) where
 import Control.Applicative ((<|>))
@@ -22,6 +24,7 @@ import Control.Monad.Catch (finally, try)
 import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
                   showConstr, toConstr, tyconUQname)
 import Data.Foldable (foldrM)
+import Data.List (foldl')
 import Data.Map (Map)
 import Data.Maybe (fromMaybe)
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
@@ -204,7 +207,7 @@ walkMeta lf (Pandoc m bs) = do
 
 walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
 walkPandoc (LuaFilter fnMap) =
-  case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+  case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
     Just fn -> \x -> runFilterFunction fn x *> singleElement x
     Nothing -> return
 
diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs
index 4285be662..29b788f04 100644
--- a/src/Text/Pandoc/Lua/Global.hs
+++ b/src/Text/Pandoc/Lua/Global.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index e89e9d6e0..baa6f0295 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -12,17 +12,18 @@ module Text.Pandoc.Lua.Init
   ( runLua
   ) where
 
+import Control.Monad (when)
 import Control.Monad.Catch (try)
 import Control.Monad.Trans (MonadIO (..))
 import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr)
 import Foreign.Lua (Lua)
 import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8)
+import Text.Pandoc.Class.PandocMonad (readDataFile)
 import Text.Pandoc.Class.PandocIO (PandocIO)
 import Text.Pandoc.Error (PandocError)
 import Text.Pandoc.Lua.Packages (installPandocPackageSearcher)
-import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua,
-                                  loadScriptFromDataDir, runPandocLua)
-
+import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua)
+import Text.Pandoc.Lua.Util (throwTopMessageAsError')
 import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Definition as Pandoc
 import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc
@@ -44,7 +45,7 @@ initLuaState = do
   liftPandocLua Lua.openlibs
   installPandocPackageSearcher
   initPandocModule
-  loadScriptFromDataDir "init.lua"
+  loadInitScript "init.lua"
  where
   initPandocModule :: PandocLua ()
   initPandocModule = do
@@ -61,6 +62,15 @@ initLuaState = do
     -- assign module to global variable
     liftPandocLua $ Lua.setglobal "pandoc"
 
+  loadInitScript :: FilePath -> PandocLua ()
+  loadInitScript scriptFile = do
+    script <- readDataFile scriptFile
+    status <- liftPandocLua $ Lua.dostring script
+    when (status /= Lua.OK) . liftPandocLua $
+      throwTopMessageAsError'
+      (("Couldn't load '" ++ scriptFile ++ "'.\n") ++)
+
+
 -- | AST elements are marshaled via normal constructor functions in the
 -- @pandoc@ module. However, accessing Lua globals from Haskell is
 -- expensive (due to error handling). Accessing the Lua registry is much
diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs
index 1254402b6..f517c7c27 100644
--- a/src/Text/Pandoc/Lua/Marshaling.hs
+++ b/src/Text/Pandoc/Lua/Marshaling.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index c889618c4..8e12d232c 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -1,9 +1,10 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE BangPatterns         #-}
 {-# LANGUAGE LambdaCase           #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AST
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,6 +18,7 @@ module Text.Pandoc.Lua.Marshaling.AST
   ) where
 
 import Control.Applicative ((<|>))
+import Control.Monad ((<$!>))
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError)
@@ -32,17 +34,16 @@ instance Pushable Pandoc where
     pushViaConstructor "Pandoc" blocks meta
 
 instance Peekable Pandoc where
-  peek idx = defineHowTo "get Pandoc value" $ do
-    blocks <- LuaUtil.rawField idx "blocks"
-    meta   <- LuaUtil.rawField idx "meta"
-    return $ Pandoc meta blocks
+  peek idx = defineHowTo "get Pandoc value" $! Pandoc
+    <$!> LuaUtil.rawField idx "meta"
+    <*>  LuaUtil.rawField idx "blocks"
 
 instance Pushable Meta where
   push (Meta mmap) =
     pushViaConstructor "Meta" mmap
 instance Peekable Meta where
-  peek idx = defineHowTo "get Meta value" $
-    Meta <$> Lua.peek idx
+  peek idx = defineHowTo "get Meta value" $!
+    Meta <$!> Lua.peek idx
 
 instance Pushable MetaValue where
   push = pushMetaValue
@@ -68,14 +69,13 @@ instance Pushable Citation where
     pushViaConstructor "Citation" cid mode prefix suffix noteNum hash
 
 instance Peekable Citation where
-  peek idx = do
-    id' <- LuaUtil.rawField idx "id"
-    prefix <- LuaUtil.rawField idx "prefix"
-    suffix <- LuaUtil.rawField idx "suffix"
-    mode <- LuaUtil.rawField idx "mode"
-    num <- LuaUtil.rawField idx "note_num"
-    hash <- LuaUtil.rawField idx "hash"
-    return $ Citation id' prefix suffix mode num hash
+  peek idx = Citation
+    <$!> LuaUtil.rawField idx "id"
+    <*> LuaUtil.rawField idx "prefix"
+    <*> LuaUtil.rawField idx "suffix"
+    <*> LuaUtil.rawField idx "mode"
+    <*> LuaUtil.rawField idx "note_num"
+    <*> LuaUtil.rawField idx "hash"
 
 instance Pushable Alignment where
   push = Lua.push . show
@@ -90,7 +90,7 @@ instance Peekable CitationMode where
 instance Pushable Format where
   push (Format f) = Lua.push f
 instance Peekable Format where
-  peek idx = Format <$> Lua.peek idx
+  peek idx = Format <$!> Lua.peek idx
 
 instance Pushable ListNumberDelim where
   push = Lua.push . show
@@ -130,26 +130,26 @@ peekMetaValue idx = defineHowTo "get MetaValue" $ do
       elementContent = Lua.peek idx
   luatype <- Lua.ltype idx
   case luatype of
-    Lua.TypeBoolean -> MetaBool <$> Lua.peek idx
-    Lua.TypeString  -> MetaString <$> Lua.peek idx
+    Lua.TypeBoolean -> MetaBool <$!> Lua.peek idx
+    Lua.TypeString  -> MetaString <$!> Lua.peek idx
     Lua.TypeTable   -> do
       tag <- try $ LuaUtil.getTag idx
       case tag of
-        Right "MetaBlocks"  -> MetaBlocks  <$> elementContent
-        Right "MetaBool"    -> MetaBool    <$> elementContent
-        Right "MetaMap"     -> MetaMap     <$> elementContent
-        Right "MetaInlines" -> MetaInlines <$> elementContent
-        Right "MetaList"    -> MetaList    <$> elementContent
-        Right "MetaString"  -> MetaString  <$> elementContent
+        Right "MetaBlocks"  -> MetaBlocks  <$!> elementContent
+        Right "MetaBool"    -> MetaBool    <$!> elementContent
+        Right "MetaMap"     -> MetaMap     <$!> elementContent
+        Right "MetaInlines" -> MetaInlines <$!> elementContent
+        Right "MetaList"    -> MetaList    <$!> elementContent
+        Right "MetaString"  -> MetaString  <$!> elementContent
         Right t             -> Lua.throwMessage ("Unknown meta tag: " <> t)
         Left _ -> do
           -- no meta value tag given, try to guess.
           len <- Lua.rawlen idx
           if len <= 0
-            then MetaMap <$> Lua.peek idx
-            else  (MetaInlines <$> Lua.peek idx)
-                  <|> (MetaBlocks <$> Lua.peek idx)
-                  <|> (MetaList <$> Lua.peek idx)
+            then MetaMap <$!> Lua.peek idx
+            else  (MetaInlines <$!> Lua.peek idx)
+                  <|> (MetaBlocks <$!> Lua.peek idx)
+                  <|> (MetaList <$!> Lua.peek idx)
     _        -> Lua.throwMessage "could not get meta value"
 
 -- | Push a block element to the top of the Lua stack.
@@ -174,25 +174,25 @@ pushBlock = \case
 
 -- | Return the value at the given index as block if possible.
 peekBlock :: StackIndex -> Lua Block
-peekBlock idx = defineHowTo "get Block value" $ do
+peekBlock idx = defineHowTo "get Block value" $! do
   tag <- LuaUtil.getTag idx
   case tag of
-      "BlockQuote"     -> BlockQuote <$> elementContent
-      "BulletList"     -> BulletList <$> elementContent
-      "CodeBlock"      -> withAttr CodeBlock <$> elementContent
-      "DefinitionList" -> DefinitionList <$> elementContent
-      "Div"            -> withAttr Div <$> elementContent
+      "BlockQuote"     -> BlockQuote <$!> elementContent
+      "BulletList"     -> BulletList <$!> elementContent
+      "CodeBlock"      -> withAttr CodeBlock <$!> elementContent
+      "DefinitionList" -> DefinitionList <$!> elementContent
+      "Div"            -> withAttr Div <$!> elementContent
       "Header"         -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst)
-                          <$> elementContent
+                          <$!> elementContent
       "HorizontalRule" -> return HorizontalRule
-      "LineBlock"      -> LineBlock <$> elementContent
+      "LineBlock"      -> LineBlock <$!> elementContent
       "OrderedList"    -> (\(LuaListAttributes lstAttr, lst) ->
                              OrderedList lstAttr lst)
-                          <$> elementContent
+                          <$!> elementContent
       "Null"           -> return Null
-      "Para"           -> Para <$> elementContent
-      "Plain"          -> Plain <$> elementContent
-      "RawBlock"       -> uncurry RawBlock <$> elementContent
+      "Para"           -> Para <$!> elementContent
+      "Plain"          -> Plain <$!> elementContent
+      "RawBlock"       -> uncurry RawBlock <$!> elementContent
       "Table"          -> (\(attr, capt, colSpecs, thead, tbodies, tfoot) ->
                               Table (fromLuaAttr attr)
                                     capt
@@ -200,7 +200,7 @@ peekBlock idx = defineHowTo "get Block value" $ do
                                     thead
                                     tbodies
                                     tfoot)
-                          <$> elementContent
+                          <$!> elementContent
       _ -> Lua.throwMessage ("Unknown block type: " <> tag)
  where
    -- Get the contents of an AST element.
@@ -222,15 +222,14 @@ pushCaption (Caption shortCaption longCaption) = do
 
 -- | Peek Caption element
 peekCaption :: StackIndex -> Lua Caption
-peekCaption idx = do
-  short <- Lua.fromOptional <$> LuaUtil.rawField idx "short"
-  long  <- LuaUtil.rawField idx "long"
-  return $ Caption short long
+peekCaption idx = Caption
+  <$!> (Lua.fromOptional <$!> LuaUtil.rawField idx "short")
+  <*>  LuaUtil.rawField idx "long"
 
 instance Peekable ColWidth where
   peek idx = do
-    width <- Lua.fromOptional <$> Lua.peek idx
-    return $ maybe ColWidthDefault ColWidth width
+    width <- Lua.fromOptional <$!> Lua.peek idx
+    return $! maybe ColWidthDefault ColWidth width
 
 instance Pushable ColWidth where
   push = \case
@@ -252,12 +251,11 @@ instance Pushable TableBody where
     LuaUtil.addField "body" body
 
 instance Peekable TableBody where
-  peek idx = do
-    attr <- LuaUtil.rawField idx "attr"
-    rowHeadColumns <- LuaUtil.rawField idx "row_head_columns"
-    head' <- LuaUtil.rawField idx "head"
-    body <- LuaUtil.rawField idx "body"
-    return $ TableBody attr (RowHeadColumns rowHeadColumns) head' body
+  peek idx = TableBody
+    <$!> LuaUtil.rawField idx "attr"
+    <*>  (RowHeadColumns <$!> LuaUtil.rawField idx "row_head_columns")
+    <*>  LuaUtil.rawField idx "head"
+    <*>  LuaUtil.rawField idx "body"
 
 instance Pushable TableHead where
   push (TableHead attr rows) = Lua.push (attr, rows)
@@ -287,13 +285,12 @@ pushCell (Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents) = do
   LuaUtil.addField "contents" contents
 
 peekCell :: StackIndex -> Lua Cell
-peekCell idx = do
-  attr <- fromLuaAttr <$> LuaUtil.rawField idx "attr"
-  align <- LuaUtil.rawField idx "alignment"
-  rowSpan <- LuaUtil.rawField idx "row_span"
-  colSpan <- LuaUtil.rawField idx "col_span"
-  contents <- LuaUtil.rawField idx "contents"
-  return $ Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents
+peekCell idx = Cell
+  <$!> (fromLuaAttr <$!> LuaUtil.rawField idx "attr")
+  <*>  LuaUtil.rawField idx "alignment"
+  <*>  (RowSpan <$!> LuaUtil.rawField idx "row_span")
+  <*>  (ColSpan <$!> LuaUtil.rawField idx "col_span")
+  <*>  LuaUtil.rawField idx "contents"
 
 -- | Push an inline element to the top of the lua stack.
 pushInline :: Inline -> Lua ()
@@ -324,28 +321,29 @@ peekInline :: StackIndex -> Lua Inline
 peekInline idx = defineHowTo "get Inline value" $ do
   tag <- LuaUtil.getTag idx
   case tag of
-    "Cite"       -> uncurry Cite <$> elementContent
-    "Code"       -> withAttr Code <$> elementContent
-    "Emph"       -> Emph <$> elementContent
-    "Underline"  -> Underline <$> elementContent
-    "Image"      -> (\(LuaAttr attr, lst, tgt) -> Image attr lst tgt)
-                    <$> elementContent
-    "Link"       -> (\(LuaAttr attr, lst, tgt) -> Link attr lst tgt)
-                    <$> elementContent
+    "Cite"       -> uncurry Cite <$!> elementContent
+    "Code"       -> withAttr Code <$!> elementContent
+    "Emph"       -> Emph <$!> elementContent
+    "Underline"  -> Underline <$!> elementContent
+    "Image"      -> (\(LuaAttr !attr, !lst, !tgt) -> Image attr lst tgt)
+                    <$!> elementContent
+    "Link"       -> (\(LuaAttr !attr, !lst, !tgt) -> Link attr lst tgt)
+                    <$!> elementContent
     "LineBreak"  -> return LineBreak
-    "Note"       -> Note <$> elementContent
-    "Math"       -> uncurry Math <$> elementContent
-    "Quoted"     -> uncurry Quoted <$> elementContent
-    "RawInline"  -> uncurry RawInline <$> elementContent
-    "SmallCaps"  -> SmallCaps <$> elementContent
+    "Note"       -> Note <$!> elementContent
+    "Math"       -> uncurry Math <$!> elementContent
+    "Quoted"     -> uncurry Quoted <$!> elementContent
+    "RawInline"  -> uncurry RawInline <$!> elementContent
+    "SmallCaps"  -> SmallCaps <$!> elementContent
     "SoftBreak"  -> return SoftBreak
     "Space"      -> return Space
-    "Span"       -> withAttr Span <$> elementContent
-    "Str"        -> Str <$> elementContent
-    "Strikeout"  -> Strikeout <$> elementContent
-    "Strong"     -> Strong <$> elementContent
-    "Subscript"  -> Subscript <$> elementContent
-    "Superscript"-> Superscript <$> elementContent
+    "Span"       -> withAttr Span <$!> elementContent
+    -- strict to Lua string is copied before gc
+    "Str"        -> Str <$!> elementContent
+    "Strikeout"  -> Strikeout <$!> elementContent
+    "Strong"     -> Strong <$!> elementContent
+    "Subscript"  -> Subscript <$!> elementContent
+    "Superscript"-> Superscript <$!> elementContent
     _ -> Lua.throwMessage ("Unknown inline type: " <> tag)
  where
    -- Get the contents of an AST element.
@@ -366,7 +364,7 @@ instance Pushable LuaAttr where
     pushViaConstructor "Attr" id' classes kv
 
 instance Peekable LuaAttr where
-  peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx)
+  peek idx = defineHowTo "get Attr value" $! (LuaAttr <$!> Lua.peek idx)
 
 -- | Wrapper for ListAttributes
 newtype LuaListAttributes = LuaListAttributes  ListAttributes
diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
index c4720aedf..82e26b963 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AnyValue
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
index 636650af3..147197c5d 100644
--- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings    #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.CommonState
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs
index effcc675d..606bdcfb2 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Context.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs
@@ -1,8 +1,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Context
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -22,6 +22,7 @@ instance (TemplateTarget a, Pushable a) => Pushable (Context a) where
 
 instance (TemplateTarget a, Pushable a) => Pushable (Val a) where
   push NullVal = Lua.push ()
+  push (BoolVal b) = Lua.push b
   push (MapVal ctx) = Lua.push ctx
   push (ListVal xs) = Lua.push xs
   push (SimpleVal d) = Lua.push $ render Nothing d
diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs
index e6614400d..0446302a1 100644
--- a/src/Text/Pandoc/Lua/Marshaling/List.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/List.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE UndecidableInstances #-}
 {- |
 Module      : Text.Pandoc.Lua.Marshaling.List
-Copyright   : © 2012-2020 John MacFarlane
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
index 2cf5b8893..70bd010a0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.MediaBag
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
index 74537a1dd..f698704e0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.PandocError
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
index 2e45affe4..dd7bf2e61 100644
--- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
@@ -4,8 +4,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.ReaderOptions
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
index 98fa1efa4..6d43039fa 100644
--- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.SimpleTable
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs
index 9adb1b763..4f4ffac51 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Version.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Version.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Version
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index e5a10217a..3eed50fca 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.MediaBag
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -13,6 +13,7 @@ module Text.Pandoc.Lua.Module.MediaBag
   ( pushModule
   ) where
 
+import Prelude hiding (lookup)
 import Control.Monad (zipWithM_)
 import Foreign.Lua (Lua, NumResults, Optional)
 import Text.Pandoc.Class.CommonState (CommonState (..))
@@ -36,10 +37,10 @@ pushModule = do
   liftPandocLua Lua.newtable
   addFunction "delete" delete
   addFunction "empty" empty
-  addFunction "insert" insertMediaFn
+  addFunction "insert" insert
   addFunction "items" items
-  addFunction "lookup" lookupMediaFn
-  addFunction "list" mediaDirectoryFn
+  addFunction "lookup" lookup
+  addFunction "list" list
   addFunction "fetch" fetch
   return 1
 
@@ -53,11 +54,11 @@ empty :: PandocLua NumResults
 empty = 0 <$ modifyCommonState (\st -> st { stMediaBag = mempty })
 
 -- | Insert a new item into the media bag.
-insertMediaFn :: FilePath
-              -> Optional MimeType
-              -> BL.ByteString
-              -> PandocLua NumResults
-insertMediaFn fp optionalMime contents = do
+insert :: FilePath
+       -> Optional MimeType
+       -> BL.ByteString
+       -> PandocLua NumResults
+insert fp optionalMime contents = do
   mb <- getMediaBag
   setMediaBag $ MB.insertMedia fp (Lua.fromOptional optionalMime) contents mb
   return (Lua.NumResults 0)
@@ -66,19 +67,19 @@ insertMediaFn fp optionalMime contents = do
 items :: PandocLua NumResults
 items = getMediaBag >>= liftPandocLua . pushIterator
 
-lookupMediaFn :: FilePath
-              -> PandocLua NumResults
-lookupMediaFn fp = do
+lookup :: FilePath
+       -> PandocLua NumResults
+lookup fp = do
   res <- MB.lookupMedia fp <$> getMediaBag
   liftPandocLua $ case res of
     Nothing -> 1 <$ Lua.pushnil
-    Just (mimeType, contents) -> do
-      Lua.push mimeType
-      Lua.push contents
+    Just item -> do
+      Lua.push $ MB.mediaMimeType item
+      Lua.push $ MB.mediaContents item
       return 2
 
-mediaDirectoryFn :: PandocLua NumResults
-mediaDirectoryFn = do
+list :: PandocLua NumResults
+list = do
   dirContents <- MB.mediaDirectory <$> getMediaBag
   liftPandocLua $ do
     Lua.newtable
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 3886568b7..5c14b3a30 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Pandoc
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -14,6 +14,7 @@ module Text.Pandoc.Lua.Module.Pandoc
   ( pushModule
   ) where
 
+import Prelude hiding (read)
 import Control.Monad (when)
 import Control.Monad.Except (throwError)
 import Data.Default (Default (..))
@@ -22,10 +23,12 @@ import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable)
 import System.Exit (ExitCode (..))
 import Text.Pandoc.Class.PandocIO (runIO)
 import Text.Pandoc.Definition (Block, Inline)
-import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..))
+import Text.Pandoc.Lua.Filter (LuaFilter, SingletonsList (..), walkInlines,
+                               walkInlineLists, walkBlocks, walkBlockLists)
 import Text.Pandoc.Lua.Marshaling ()
+import Text.Pandoc.Lua.Marshaling.List (List (..))
 import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua,
-                                  loadScriptFromDataDir)
+                                  loadDefaultModule)
 import Text.Pandoc.Walk (Walkable)
 import Text.Pandoc.Options (ReaderOptions (readerExtensions))
 import Text.Pandoc.Process (pipeProcess)
@@ -38,30 +41,33 @@ import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Lua.Util as LuaUtil
 import Text.Pandoc.Error
 
--- | Push the "pandoc" on the lua stack. Requires the `list` module to be
--- loaded.
+-- | Push the "pandoc" package to the Lua stack. Requires the `List`
+-- module to be loadable.
 pushModule :: PandocLua NumResults
 pushModule = do
-  loadScriptFromDataDir "pandoc.lua"
-  addFunction "read" readDoc
-  addFunction "pipe" pipeFn
-  addFunction "walk_block" walkBlock
-  addFunction "walk_inline" walkInline
+  loadDefaultModule "pandoc"
+  addFunction "read" read
+  addFunction "pipe" pipe
+  addFunction "walk_block" walk_block
+  addFunction "walk_inline" walk_inline
   return 1
 
 walkElement :: (Walkable (SingletonsList Inline) a,
-                Walkable (SingletonsList Block) a)
+                Walkable (SingletonsList Block) a,
+                Walkable (List Inline) a,
+                Walkable (List Block) a)
             => a -> LuaFilter -> PandocLua a
-walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f
+walkElement x f = liftPandocLua $
+  walkInlines f x >>= walkInlineLists f >>= walkBlocks f >>= walkBlockLists f
 
-walkInline :: Inline -> LuaFilter -> PandocLua Inline
-walkInline = walkElement
+walk_inline :: Inline -> LuaFilter -> PandocLua Inline
+walk_inline = walkElement
 
-walkBlock :: Block -> LuaFilter -> PandocLua Block
-walkBlock = walkElement
+walk_block :: Block -> LuaFilter -> PandocLua Block
+walk_block = walkElement
 
-readDoc :: T.Text -> Optional T.Text -> PandocLua NumResults
-readDoc content formatSpecOrNil = liftPandocLua $ do
+read :: T.Text -> Optional T.Text -> PandocLua NumResults
+read content formatSpecOrNil = liftPandocLua $ do
   let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil)
   res <- Lua.liftIO . runIO $
            getReader formatSpec >>= \(rdr,es) ->
@@ -79,11 +85,11 @@ readDoc content formatSpecOrNil = liftPandocLua $ do
     Left  e      -> Lua.raiseError $ show e
 
 -- | Pipes input through a command.
-pipeFn :: String
-       -> [String]
-       -> BL.ByteString
-       -> PandocLua NumResults
-pipeFn command args input = liftPandocLua $ do
+pipe :: String           -- ^ path to executable
+     -> [String]         -- ^ list of arguments
+     -> BL.ByteString    -- ^ input passed to process via stdin
+     -> PandocLua NumResults
+pipe command args input = liftPandocLua $ do
   (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input
   case ec of
     ExitSuccess -> 1 <$ Lua.push output
diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs
index 04508e461..bd35babaf 100644
--- a/src/Text/Pandoc/Lua/Module/System.hs
+++ b/src/Text/Pandoc/Lua/Module/System.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.System
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs
index 999f2e588..bb4f02c3c 100644
--- a/src/Text/Pandoc/Lua/Module/Types.hs
+++ b/src/Text/Pandoc/Lua/Module/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.Types
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 7595b9c0f..3ec3afc26 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Utils
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -146,7 +146,7 @@ from_simple_table (SimpleTable capt aligns widths head' body) = do
     nullAttr
     (Caption Nothing [Plain capt])
     (zipWith (\a w -> (a, toColWidth w)) aligns widths)
-    (TableHead nullAttr [blockListToRow head'])
+    (TableHead nullAttr [blockListToRow head' | not (null head') ])
     [TableBody nullAttr 0 [] $ map blockListToRow body]
     (TableFoot nullAttr [])
   return (NumResults 1)
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 4c3b9d79d..2f1c139db 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -1,9 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Packages
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -15,15 +12,12 @@ module Text.Pandoc.Lua.Packages
   ( installPandocPackageSearcher
   ) where
 
-import Control.Monad.Catch (try)
 import Control.Monad (forM_)
-import Data.ByteString (ByteString)
-import Foreign.Lua (Lua, NumResults)
-import Text.Pandoc.Error (PandocError)
-import Text.Pandoc.Class.PandocMonad (readDataFile)
-import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua)
+import Foreign.Lua (NumResults)
+import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule)
 
 import qualified Foreign.Lua as Lua
+import qualified Foreign.Lua.Module.Path as Path
 import qualified Foreign.Lua.Module.Text as Text
 import qualified Text.Pandoc.Lua.Module.Pandoc as Pandoc
 import qualified Text.Pandoc.Lua.Module.MediaBag as MediaBag
@@ -50,28 +44,17 @@ pandocPackageSearcher pkgName =
   case pkgName of
     "pandoc"          -> pushWrappedHsFun Pandoc.pushModule
     "pandoc.mediabag" -> pushWrappedHsFun MediaBag.pushModule
+    "pandoc.path"     -> pushWrappedHsFun Path.pushModule
     "pandoc.system"   -> pushWrappedHsFun System.pushModule
     "pandoc.types"    -> pushWrappedHsFun Types.pushModule
     "pandoc.utils"    -> pushWrappedHsFun Utils.pushModule
     "text"            -> pushWrappedHsFun Text.pushModule
-    _                 -> searchPureLuaLoader
+    "pandoc.List"     -> pushWrappedHsFun (loadDefaultModule pkgName)
+    _                 -> reportPandocSearcherFailure
  where
   pushWrappedHsFun f = liftPandocLua $ do
     Lua.pushHaskellFunction f
     return 1
-  searchPureLuaLoader = do
-    let filename = pkgName ++ ".lua"
-    try (readDataFile filename) >>= \case
-      Right script -> pushWrappedHsFun (loadStringAsPackage pkgName script)
-      Left (_ :: PandocError) -> liftPandocLua $ do
-        Lua.push ("\n\tno file '" ++ filename ++ "' in pandoc's datadir")
-        return (1 :: NumResults)
-
-loadStringAsPackage :: String -> ByteString -> Lua NumResults
-loadStringAsPackage pkgName script = do
-  status <- Lua.dostring script
-  if status == Lua.OK
-    then return (1 :: NumResults)
-    else do
-      msg <- Lua.popValue
-      Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg)
+  reportPandocSearcherFailure = liftPandocLua $ do
+    Lua.push ("\n\t" <> pkgName <> "is not one of pandoc's default packages")
+    return (1 :: NumResults)
diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs
index 6c3b410dd..750e019b6 100644
--- a/src/Text/Pandoc/Lua/PandocLua.hs
+++ b/src/Text/Pandoc/Lua/PandocLua.hs
@@ -9,7 +9,7 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.PandocLua
-   Copyright   : Copyright © 2020 Albert Krewinkel
+   Copyright   : Copyright © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -23,24 +23,23 @@ module Text.Pandoc.Lua.PandocLua
   , runPandocLua
   , liftPandocLua
   , addFunction
-  , loadScriptFromDataDir
+  , loadDefaultModule
   ) where
 
-import Control.Monad (when)
 import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow)
 import Control.Monad.Except (MonadError (catchError, throwError))
 import Control.Monad.IO.Class (MonadIO (liftIO))
 import Foreign.Lua (Lua (..), NumResults, Pushable, ToHaskellFunction)
 import Text.Pandoc.Class.PandocIO (PandocIO)
-import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDataFile)
-import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDefaultDataFile)
+import Text.Pandoc.Error (PandocError (PandocLuaError))
 import Text.Pandoc.Lua.Global (Global (..), setGlobals)
 import Text.Pandoc.Lua.ErrorConversion (errorConversion)
 
 import qualified Control.Monad.Catch as Catch
+import qualified Data.Text as T
 import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Class.IO as IO
-import qualified Text.Pandoc.Lua.Util as LuaUtil
 
 -- | Type providing access to both, pandoc and Lua operations.
 newtype PandocLua a = PandocLua { unPandocLua :: Lua a }
@@ -86,14 +85,22 @@ addFunction name fn = liftPandocLua $ do
   Lua.pushHaskellFunction fn
   Lua.rawset (-3)
 
--- | Load a file from pandoc's data directory.
-loadScriptFromDataDir :: FilePath -> PandocLua ()
-loadScriptFromDataDir scriptFile = do
-  script <- readDataFile scriptFile
+-- | Load a pure Lua module included with pandoc. Leaves the result on
+-- the stack and returns @NumResults 1@.
+--
+-- The script is loaded from the default data directory. We do not load
+-- from data directories supplied via command line, as this could cause
+-- scripts to be executed even though they had not been passed explicitly.
+loadDefaultModule :: String -> PandocLua NumResults
+loadDefaultModule name = do
+  script <- readDefaultDataFile (name <> ".lua")
   status <- liftPandocLua $ Lua.dostring script
-  when (status /= Lua.OK) . liftPandocLua $
-    LuaUtil.throwTopMessageAsError'
-      (("Couldn't load '" ++ scriptFile ++ "'.\n") ++)
+  if status == Lua.OK
+    then return (1 :: NumResults)
+    else do
+      msg <- liftPandocLua Lua.popValue
+      let err = "Error while loading `" <> name <> "`.\n" <> msg
+      throwError $ PandocLuaError (T.pack err)
 
 -- | Global variables which should always be set.
 defaultGlobals :: PandocIO [Global]
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
index fbd013801..70a8a6d47 100644
--- a/src/Text/Pandoc/Lua/Util.hs
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Util
-   Copyright   : © 2012–2020 John MacFarlane,
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane,
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs
index 695c7b44e..d6d973496 100644
--- a/src/Text/Pandoc/Lua/Walk.hs
+++ b/src/Text/Pandoc/Lua/Walk.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
 Module      : Text.Pandoc.Lua.Walk
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 4fe25ebe1..77c7069e9 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.MIME
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -10,8 +10,13 @@
 
 Mime type lookup.
 -}
-module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
-                          extensionFromMimeType, mediaCategory ) where
+module Text.Pandoc.MIME (
+  MimeType,
+  getMimeType,
+  getMimeTypeDef,
+  getCharset,
+  extensionFromMimeType,
+  mediaCategory ) where
 import Data.List (isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
 import qualified Data.Text as T
@@ -38,6 +43,16 @@ getMimeTypeDef :: FilePath -> MimeType
 getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType
 
 extensionFromMimeType :: MimeType -> Maybe T.Text
+-- few special cases, where there are multiple options:
+extensionFromMimeType "text/plain" = Just "txt"
+extensionFromMimeType "video/quicktime" = Just "mov"
+extensionFromMimeType "video/mpeg" = Just "mpeg"
+extensionFromMimeType "video/dv" = Just "dv"
+extensionFromMimeType "image/vnd.djvu" = Just "djvu"
+extensionFromMimeType "image/tiff" = Just "tiff"
+extensionFromMimeType "image/jpeg" = Just "jpg"
+extensionFromMimeType "application/xml" = Just "xml"
+extensionFromMimeType "application/ogg" = Just "ogg"
 extensionFromMimeType mimetype =
   M.lookup (T.takeWhile (/=';') mimetype) reverseMimeTypes
   -- note:  we just look up the basic mime type, dropping the content-encoding etc.
@@ -54,6 +69,14 @@ reverseMimeTypes = M.fromList $ map swap mimeTypesList
 mimeTypes :: M.Map T.Text MimeType
 mimeTypes = M.fromList mimeTypesList
 
+-- | Get the charset from a mime type, if one is present.
+getCharset :: MimeType -> Maybe T.Text
+getCharset mt =
+  let (_,y) = T.breakOn "charset=" mt
+   in if T.null y
+         then Nothing
+         else Just $ T.toUpper $ T.takeWhile (/= ';') $ T.drop 8 y
+
 -- | Collection of common mime types.
 -- Except for first entry, list borrowed from
 -- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server>
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 26f44cef0..098e484ee 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -1,9 +1,10 @@
 {-# LANGUAGE CPP                        #-}
+{-# LANGUAGE OverloadedStrings          #-}
 {-# LANGUAGE DeriveDataTypeable         #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {- |
    Module      : Text.Pandoc.MediaBag
-   Copyright   : Copyright (C) 2014-2015, 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2014-2015, 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -14,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an
 interface for interacting with it.
 -}
 module Text.Pandoc.MediaBag (
+                     MediaItem(..),
                      MediaBag,
                      deleteMedia,
                      lookupMedia,
@@ -24,29 +26,43 @@ module Text.Pandoc.MediaBag (
 import qualified Data.ByteString.Lazy as BL
 import Data.Data (Data)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isNothing)
 import Data.Typeable (Typeable)
 import System.FilePath
-import qualified System.FilePath.Posix as Posix
-import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
+import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Digest.Pure.SHA (sha1, showDigest)
+import Network.URI (URI (..), parseURI)
+
+data MediaItem =
+  MediaItem
+  { mediaMimeType :: MimeType
+  , mediaPath :: FilePath
+  , mediaContents :: BL.ByteString
+  } deriving (Eq, Ord, Show, Data, Typeable)
 
 -- | A container for a collection of binary resources, with names and
 -- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
 -- can be used for an empty 'MediaBag', and '<>' can be used to append
 -- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString))
+newtype MediaBag = MediaBag (M.Map Text MediaItem)
         deriving (Semigroup, Monoid, Data, Typeable)
 
 instance Show MediaBag where
   show bag = "MediaBag " ++ show (mediaDirectory bag)
 
+-- | We represent paths with /, in normalized form.
+canonicalize :: FilePath -> Text
+canonicalize = T.replace "\\" "/" . T.pack . normalise
+
 -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
 -- to the given path.
 deleteMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 deleteMedia fp (MediaBag mediamap) =
-  MediaBag $ M.delete (splitDirectories fp) mediamap
+  MediaBag $ M.delete (canonicalize fp) mediamap
 
 -- | Insert a media item into a 'MediaBag', replacing any existing
 -- value with the same name.
@@ -56,26 +72,41 @@ insertMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 insertMedia fp mbMime contents (MediaBag mediamap) =
-  MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap)
-  where mime = fromMaybe fallback mbMime
+  MediaBag (M.insert fp' mediaItem mediamap)
+  where mediaItem = MediaItem{ mediaPath = newpath
+                             , mediaContents = contents
+                             , mediaMimeType = mt }
+        fp' = canonicalize fp
+        uri = parseURI fp
+        newpath = if isRelative fp
+                       && isNothing uri
+                       && ".." `notElem` splitPath fp
+                     then T.unpack fp'
+                     else showDigest (sha1 contents) <> "." <> ext
         fallback = case takeExtension fp of
                         ".gz" -> getMimeTypeDef $ dropExtension fp
                         _     -> getMimeTypeDef fp
+        mt = fromMaybe fallback mbMime
+        path = maybe fp uriPath uri
+        ext = case takeExtension path of
+                '.':e -> e
+                _ -> maybe "" T.unpack $ extensionFromMimeType mt
+
 
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
 lookupMedia :: FilePath
             -> MediaBag
-            -> Maybe (MimeType, BL.ByteString)
-lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap
+            -> Maybe MediaItem
+lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap
 
 -- | Get a list of the file paths stored in a 'MediaBag', with
 -- their corresponding mime types and the lengths in bytes of the contents.
 mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
-mediaDirectory (MediaBag mediamap) =
-  M.foldrWithKey (\fp (mime,contents) ->
-      ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+mediaDirectory mediabag =
+  map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs)))
+    (mediaItems mediabag)
 
 mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
 mediaItems (MediaBag mediamap) =
-  M.foldrWithKey (\fp (mime,contents) ->
-      ((Posix.joinPath fp, mime, contents):)) [] mediamap
+  map (\item -> (mediaPath item, mediaMimeType item, mediaContents item))
+      (M.elems mediamap)
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c7f1a56fa..85d9aa103 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE TemplateHaskell    #-}
 {- |
    Module      : Text.Pandoc.Options
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -65,6 +65,7 @@ data ReaderOptions = ReaderOptions{
        , readerDefaultImageExtension :: Text -- ^ Default extension for images
        , readerTrackChanges          :: TrackChanges -- ^ Track changes setting for docx
        , readerStripComments         :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
+                                             -- (only implemented in commonmark)
 } deriving (Show, Read, Data, Typeable, Generic)
 
 instance HasSyntaxExtensions ReaderOptions where
@@ -315,6 +316,23 @@ defaultKaTeXURL :: Text
 defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/"
 
 -- Update documentation in doc/filters.md if this is changed.
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''TrackChanges)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''WrapOption)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated . drop 8
+                           } ''TopLevelDivision)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''ReferenceLocation)
+
+-- Update documentation in doc/filters.md if this is changed.
 $(deriveJSON defaultOptions ''ReaderOptions)
 
 $(deriveJSON defaultOptions{
@@ -337,20 +355,3 @@ $(deriveJSON defaultOptions{ constructorTagModifier =
                            } ''ObfuscationMethod)
 
 $(deriveJSON defaultOptions ''HTMLSlideVariant)
-
--- Update documentation in doc/filters.md if this is changed.
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''TrackChanges)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''WrapOption)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated . drop 8
-                           } ''TopLevelDivision)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''ReferenceLocation)
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index c4080a227..c4e30af34 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.PDF
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -43,6 +43,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError))
 import Text.Pandoc.MIME (getMimeType)
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..))
+import Text.Pandoc.Extensions (disableExtension, Extension(Ext_smart))
 import Text.Pandoc.Process (pipeProcess)
 import System.Process (readProcessWithExitCode)
 import Text.Pandoc.Shared (inDirectory, stringify, tshow)
@@ -114,7 +115,10 @@ makePDF program pdfargs writer opts doc =
         runIOorExplode $ do
           putCommonState commonState
           doc' <- handleImages opts tmpdir doc
-          source <- writer opts doc'
+          source <- writer opts{ writerExtensions = -- disable use of quote
+                                    -- ligatures to avoid bad ligatures like ?`
+                                    disableExtension Ext_smart
+                                     (writerExtensions opts) } doc'
           res <- case baseProg of
             "context" -> context2pdf verbosity program pdfargs tmpdir source
             "tectonic" -> tectonic2pdf verbosity program pdfargs tmpdir source
@@ -198,7 +202,7 @@ convertImage opts tmpdir fname = do
     Just "image/svg+xml" -> E.catch (do
       (exit, _) <- pipeProcess Nothing "rsvg-convert"
                      ["-f","pdf","-a","--dpi-x",dpi,"--dpi-y",dpi,
-                      "-o",pdfOut,fname] BL.empty
+                      "-o",pdfOut,svgIn] BL.empty
       if exit == ExitSuccess
          then return $ Right pdfOut
          else return $ Left "conversion from SVG failed")
@@ -211,8 +215,9 @@ convertImage opts tmpdir fname = do
                  E.catch (Right pngOut <$ JP.savePngImage pngOut img) $
                      \(e :: E.SomeException) -> return (Left (tshow e))
   where
-    pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir
-    pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir
+    pngOut = normalise $ replaceDirectory (replaceExtension fname ".png") tmpdir
+    pdfOut = normalise $ replaceDirectory (replaceExtension fname ".pdf") tmpdir
+    svgIn = normalise fname
     mime = getMimeType fname
     doNothing = return (Right fname)
 
@@ -266,7 +271,7 @@ missingCharacterWarnings verbosity log' = do
         | isAscii c   = T.singleton c
         | otherwise   = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
   let addCodePoint = T.concatMap toCodePoint
-  let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
+  let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
                  | l <- ls
                  , isMissingCharacterWarning l
                  ]
@@ -310,7 +315,7 @@ runTectonic verbosity program args' tmpDir' source = do
     env <- liftIO getEnvironment
     when (verbosity >= INFO) $ liftIO $
       showVerboseInfo (Just tmpDir) program programArgs env
-         (utf8ToString sourceBL)
+         (utf8ToText sourceBL)
     (exit, out) <- liftIO $ E.catch
       (pipeProcess (Just env) program programArgs sourceBL)
       (handlePDFProgramNotFound program)
@@ -381,7 +386,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
             (pipeProcess (Just env'') program programArgs BL.empty)
             (handlePDFProgramNotFound program)
           when (verbosity >= INFO) $ liftIO $ do
-            UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
+            UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
             BL.hPutStr stderr out
             UTF8.hPutStr stderr "\n"
           if runNumber < numRuns
@@ -401,7 +406,7 @@ generic2pdf :: Verbosity
 generic2pdf verbosity program args source = do
   env' <- getEnvironment
   when (verbosity >= INFO) $
-    showVerboseInfo Nothing program args env' (T.unpack source)
+    showVerboseInfo Nothing program args env' source
   (exit, out) <- E.catch
     (pipeProcess (Just env') program args
                      (BL.fromStrict $ UTF8.fromText source))
@@ -490,19 +495,32 @@ showVerboseInfo :: Maybe FilePath
                 -> String
                 -> [String]
                 -> [(String, String)]
-                -> String
+                -> Text
                 -> IO ()
 showVerboseInfo mbTmpDir program programArgs env source = do
   case mbTmpDir of
     Just tmpDir -> do
       UTF8.hPutStrLn stderr "[makePDF] temp dir:"
-      UTF8.hPutStrLn stderr tmpDir
+      UTF8.hPutStrLn stderr (T.pack tmpDir)
     Nothing -> return ()
   UTF8.hPutStrLn stderr "[makePDF] Command line:"
-  UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
+  UTF8.hPutStrLn stderr $
+       T.pack program <> " " <> T.pack (unwords (map show programArgs))
   UTF8.hPutStr stderr "\n"
-  UTF8.hPutStrLn stderr "[makePDF] Environment:"
-  mapM_ (UTF8.hPutStrLn stderr . show) env
+  UTF8.hPutStrLn stderr "[makePDF] Relevant environment variables:"
+  -- we filter out irrelevant stuff to avoid leaking passwords and keys!
+  let isRelevant ("PATH",_) = True
+      isRelevant ("TMPDIR",_) = True
+      isRelevant ("PWD",_) = True
+      isRelevant ("LANG",_) = True
+      isRelevant ("HOME",_) = True
+      isRelevant ("LUA_PATH",_) = True
+      isRelevant ("LUA_CPATH",_) = True
+      isRelevant ("SHELL",_) = True
+      isRelevant ("TEXINPUTS",_) = True
+      isRelevant ("TEXMFOUTPUT",_) = True
+      isRelevant _ = False
+  mapM_ (UTF8.hPutStrLn stderr . tshow) (filter isRelevant env)
   UTF8.hPutStr stderr "\n"
   UTF8.hPutStrLn stderr "[makePDF] Source:"
   UTF8.hPutStrLn stderr source
@@ -513,8 +531,8 @@ handlePDFProgramNotFound program e
       E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
   | otherwise = E.throwIO e
 
-utf8ToString :: ByteString -> String
-utf8ToString lbs =
+utf8ToText :: ByteString -> Text
+utf8ToText lbs =
   case decodeUtf8' lbs of
-    Left _  -> BC.unpack lbs  -- if decoding fails, treat as latin1
-    Right t -> TL.unpack t
+    Left _  -> T.pack $ BC.unpack lbs  -- if decoding fails, treat as latin1
+    Right t -> TL.toStrict t
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4bae8942b..09445622d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,11 +5,10 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -105,14 +103,14 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              singleQuoteEnd,
                              doubleQuoteStart,
                              doubleQuoteEnd,
-                             ellipses,
                              apostrophe,
+                             doubleCloseQuote,
+                             ellipses,
                              dash,
                              nested,
                              citeKey,
                              Parser,
                              ParserT,
-                             F,
                              Future(..),
                              runF,
                              askF,
@@ -123,7 +121,6 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              (<+?>),
                              extractIdClass,
                              insertIncludedFile,
-                             insertIncludedFileF,
                              -- * Re-exports from Text.Parsec
                              Stream,
                              runParser,
@@ -134,22 +131,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +159,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,48 +176,141 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
-import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper,
-                  isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
+import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
+                  isSpace, ord, toLower, toUpper)
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
-import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
 newtype Future s a = Future { runDelayed :: Reader s a }
   deriving (Monad, Applicative, Functor)
 
-type F = Future ParserState
-
 runF :: Future s a -> s -> a
 runF = runReader . runDelayed
 
@@ -253,70 +333,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -401,11 +459,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -420,14 +480,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -438,29 +500,41 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
-nonspaceChar = noneOf ['\t', '\n', ' ', '\r']
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
+nonspaceChar = satisfy (not . isSpaceChar)
+
+isSpaceChar :: Char -> Bool
+isSpaceChar ' '  = True
+isSpaceChar '\t' = True
+isSpaceChar '\n' = True
+isSpaceChar '\r' = True
+isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -468,18 +542,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -488,7 +570,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -496,39 +579,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
   setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -537,7 +622,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -546,7 +631,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -565,7 +650,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -601,20 +686,19 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
        mailbox           = intercalate "." <$> (emailWord `sepBy1'` dot)
        domain            = intercalate "." <$> (subdomain `sepBy1'` dot)
        dot               = char '.'
-       subdomain         = many1 $ alphaNum <|> innerPunct
+       subdomain         = many1 $ alphaNum <|> innerPunct (=='-')
        -- this excludes some valid email addresses, since an
        -- email could contain e.g. '__', but gives better results
        -- for our purposes, when combined with markdown parsing:
-       innerPunct        = try (satisfy (\c -> isEmailPunct c || c == '@')
-                                 <* notFollowedBy space
-                                 <* notFollowedBy (satisfy isPunctuation))
+       innerPunct f      = try (satisfy f
+                                 <* notFollowedBy (satisfy (not . isAlphaNum)))
        -- technically an email address could begin with a symbol,
        -- but allowing this creates too many problems.
        -- See e.g. https://github.com/jgm/pandoc/issues/2940
@@ -625,16 +709,16 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
   -- Avoid parsing e.g. "**Notes:**" as a raw URI:
-  notFollowedBy (oneOf "*_]")
+  notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']')
   -- We allow sentence punctuation except at the end, since
   -- we don't want the trailing '.' in 'http://google.com.' We want to allow
   -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
@@ -648,7 +732,20 @@ uri = try $ do
   let uri' = scheme <> ":" <> fromEntities str'
   return (uri', escapeURI uri')
   where
-    wordChar = alphaNum <|> oneOf "#$%+/@\\_-&="
+    isWordChar '#' = True
+    isWordChar '$' = True
+    isWordChar '%' = True
+    isWordChar '+' = True
+    isWordChar '/' = True
+    isWordChar '@' = True
+    isWordChar '\\' = True
+    isWordChar '_' = True
+    isWordChar '-' = True
+    isWordChar '&' = True
+    isWordChar '=' = True
+    isWordChar c   = isAlphaNum c
+
+    wordChar = satisfy isWordChar
     percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
     entity = try $ pure <$> characterReference
     punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
@@ -659,11 +756,13 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
-  words' <- many1Till (countChar 1 (noneOf " \t\n\\")
+  words' <- many1Till (
+                       (T.singleton <$>
+                          satisfy (\c -> not (isSpaceChar c || c == '\\')))
                    <|> (char '\\' >>
                            -- This next clause is needed because \text{..} can
                            -- contain $, \(\), etc.
@@ -671,17 +770,17 @@ mathInlineWith op cl = try $ do
                                  (("\\text" <>) <$> inBalancedBraces 0 ""))
                             <|>  (\c -> T.pack ['\\',c]) <$> anyChar))
                    <|> do (blankline <* notFollowedBy' blankline) <|>
-                             (oneOf " \t" <* skipMany (oneOf " \t"))
+                             (spaceChar <* skipMany spaceChar)
                           notFollowedBy (char '$')
                           return " "
                     ) (try $ textStr cl)
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -698,12 +797,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
-  many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl)
+  many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
+            (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -712,7 +812,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -725,7 +825,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -737,30 +837,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -773,19 +880,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -794,7 +901,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -813,37 +920,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
-  ch <- oneOf ['a'..'z']
+  ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
-  ch <- oneOf ['A'..'Z']
+  ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -855,7 +962,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -864,7 +971,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -875,7 +982,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -898,10 +1005,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -911,11 +1018,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -923,7 +1030,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -943,7 +1051,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -992,20 +1101,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1014,7 +1122,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1029,7 +1137,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1038,47 +1146,47 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
-gridTableHeader headless blocks = try $ do
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
+gridTableHeader True _ = do
+  optional blanklines
+  dashes <- gridDashedLines '-'
+  let aligns = map snd dashes
+  let lines'   = map (snd . fst) dashes
+  let indices  = scanl (+) 0 lines'
+  return (return [], aligns, indices)
+gridTableHeader False blocks = try $ do
   optional blanklines
   dashes <- gridDashedLines '-'
-  rawContent  <- if headless
-                    then return $ repeat ""
-                    else many1
-                         (notFollowedBy (gridTableSep '=') >> char '|' >>
+  rawContent  <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >>
                            T.pack <$> many1Till anyChar newline)
-  underDashes <- if headless
-                    then return dashes
-                    else gridDashedLines '='
+  underDashes <- gridDashedLines '='
   guard $ length dashes == length underDashes
   let lines'   = map (snd . fst) underDashes
   let indices  = scanl (+) 0 lines'
   let aligns   = map snd underDashes
-  let rawHeads = if headless
-                    then replicate (length underDashes) ""
-                    else map (T.unlines . map trim) $ transpose
+  let rawHeads = map (T.unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1099,34 +1207,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Stream s m Char, ToText s)
-          => ParserT s st m a    -- ^ parser
-          -> st                  -- ^ initial state
-          -> s                   -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
-                            readWith parser defaultParserState str
+testStringWith parser str = UTF8.putStrLn $ tshow $
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1146,7 +1258,7 @@ data ParserState = ParserState
       stateInNote            :: Bool,          -- ^ True if parsing note contents
       stateNoteNumber        :: Int,           -- ^ Last note number for citations
       stateMeta              :: Meta,          -- ^ Document metadata
-      stateMeta'             :: F Meta,        -- ^ Document metadata
+      stateMeta'             :: Future ParserState Meta, -- ^ Document metadata
       stateCitations         :: M.Map Text Text, -- ^ RST-style citations
       stateHeaderTable       :: [HeaderType],  -- ^ Ordered list of header types used
       stateIdentifiers       :: Set.Set Text, -- ^ Header identifiers used
@@ -1325,7 +1437,7 @@ data QuoteContext
 
 type NoteTable = [(Text, Text)]
 
-type NoteTable' = M.Map Text (SourcePos, F Blocks)
+type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
 -- used in markdown reader
 
 newtype Key = Key Text deriving (Show, Read, Eq, Ord)
@@ -1360,7 +1472,7 @@ registerHeader (ident,classes,kvs) header' = do
      then do
        let id' = uniqueIdent exts (B.toList header') ids
        let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
-                     then T.pack $ mapMaybe toAsciiChar $ T.unpack id'
+                     then toAsciiText id'
                      else id'
        updateState $ updateIdentifierList $ Set.insert id'
        updateState $ updateIdentifierList $ Set.insert id''
@@ -1373,34 +1485,42 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
-  choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
+  choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ B.singleQuoted . mconcat
-  <$  singleQuoteStart
-  <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+singleQuoted inlineParser = do
+  singleQuoteStart
+  (B.singleQuoted . mconcat <$>
+    try
+     (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
+   <|> pure "\8217"
+
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
-  <$  doubleQuoteStart
-  <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
+doubleQuoted inlineParser = do
+  doubleQuoteStart
+  (B.doubleQuoted . mconcat <$>
+    try
+     (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)))
+   <|> pure (B.str "\8220")
 
 failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
                      => QuoteContext
@@ -1409,13 +1529,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1423,30 +1544,39 @@ singleQuoteStart = do
   guard =<< notAfterString
   try $ do
     charOrRef "'\8216\145"
-    notFollowedBy (oneOf [' ', '\t', '\n'])
+    void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
   notFollowedBy alphaNum
 
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+doubleQuoteStart :: (HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
+  guard =<< notAfterString
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (oneOf [' ', '\t', '\n'])
+           void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-ellipses :: Stream s m Char
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
+
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
+doubleCloseQuote = B.str "\8221" <$ char '"'
+
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1473,20 +1603,28 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
-        => ParserT s st m (Bool, Text)
-citeKey = try $ do
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
+        => Bool -- ^ If True, allow expanded @{..} syntax.
+        -> ParserT s st m (Bool, Text)
+citeKey allowBraced = try $ do
   guard =<< notAfterString
   suppress_author <- option False (True <$ char '-')
   char '@'
+  key <- simpleCiteIdentifier
+        <|> if allowBraced
+               then charsInBalanced '{' '}' (satisfy (not . isSpace))
+               else mzero
+  return (suppress_author, key)
+
+simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m Text
+simpleCiteIdentifier = do
   firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
   let regchar = satisfy (\c -> isAlphaNum c || c == '_')
   let internal p = try $ p <* lookAhead regchar
   rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
                  try (oneOf ":/" <* lookAhead (char '/'))
-  let key = firstChar:rest
-  return (suppress_author, T.pack key)
-
+  return $ T.pack $ firstChar:rest
 
 token :: (Stream s m t)
       => (t -> Text)
@@ -1506,12 +1644,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
     cls'   = maybe cls T.words $ lookup "class" kvs
     kvs'   = filter (\(k,_) -> k /= "id" || k /= "class") kvs
 
-insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT a st m (mf Blocks)
-                    -> (Text -> a)
-                    -> [FilePath] -> FilePath
-                    -> ParserT a st m (mf Blocks)
-insertIncludedFile' blocks totoks dirs f = do
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT a st m b -- ^ parser to apply
+                   -> (Text -> a) -- ^ convert Text to stream type
+                   -> [FilePath]  -- ^ search path (directories)
+                   -> FilePath    -- ^ path of file to include
+                   -> Maybe Int   -- ^ start line (negative counts from end)
+                   -> Maybe Int   -- ^ end line (negative counts from end)
+                   -> ParserT a st m b
+insertIncludedFile parser toStream dirs f mbstartline mbendline = do
   oldPos <- getPosition
   oldInput <- getInput
   containers <- getIncludeFiles <$> getState
@@ -1520,32 +1661,32 @@ insertIncludedFile' blocks totoks dirs f = do
   updateState $ addIncludeFile $ T.pack f
   mbcontents <- readFileFromDirs dirs f
   contents <- case mbcontents of
-                   Just s -> return s
+                   Just s -> return $ exciseLines mbstartline mbendline s
                    Nothing -> do
                      report $ CouldNotLoadIncludeFile (T.pack f) oldPos
                      return ""
-  setPosition $ newPos f 1 1
-  setInput $ totoks contents
-  bs <- blocks
+  setInput $ toStream contents
+  setPosition $ newPos f (fromMaybe 1 mbstartline) 1
+  result <- parser
   setInput oldInput
   setPosition oldPos
   updateState dropLatestIncludeFile
-  return bs
+  return result
+
+exciseLines :: Maybe Int -> Maybe Int -> Text -> Text
+exciseLines Nothing Nothing t = t
+exciseLines mbstartline mbendline t =
+  T.unlines $ take (endline' - (startline' - 1))
+            $ drop (startline' - 1) contentLines
+ where
+  contentLines = T.lines t
+  numLines = length contentLines
+  startline' = case mbstartline of
+                 Nothing -> 1
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
+  endline' = case mbendline of
+                 Nothing -> numLines
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
 
--- | Parse content of include file as blocks. Circular includes result in an
--- @PandocParseError@.
-insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
-                   => ParserT [a] st m Blocks
-                   -> (Text -> [a])
-                   -> [FilePath] -> FilePath
-                   -> ParserT [a] st m Blocks
-insertIncludedFile blocks totoks dirs f =
-  runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-
--- | Parse content of include file as future blocks. Circular includes result in
--- an @PandocParseError@.
-insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
-                    -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 866972e3f..b896feb7e 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Process
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 9a069f7d0..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,9 +1,10 @@
 {-# LANGUAGE MonoLocalBinds      #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Readers
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -65,12 +66,14 @@ import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Class
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Extensions
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
@@ -83,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.JATS (readJATS)
 import Text.Pandoc.Readers.Jira (readJira)
 import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.Muse
 import Text.Pandoc.Readers.Native
@@ -101,59 +103,60 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Parsec.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+                                ReaderOptions -> a -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
 
 -- | Association list of formats and readers.
 readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native"       , TextReader readNative)
-           ,("json"         , TextReader readJSON)
-           ,("markdown"     , TextReader readMarkdown)
-           ,("markdown_strict" , TextReader readMarkdown)
-           ,("markdown_phpextra" , TextReader readMarkdown)
-           ,("markdown_github" , TextReader readMarkdown)
-           ,("markdown_mmd",  TextReader readMarkdown)
-           ,("commonmark"   , TextReader readCommonMark)
-           ,("commonmark_x" , TextReader readCommonMark)
-           ,("creole"       , TextReader readCreole)
-           ,("dokuwiki"     , TextReader readDokuWiki)
-           ,("gfm"          , TextReader readCommonMark)
-           ,("rst"          , TextReader readRST)
-           ,("mediawiki"    , TextReader readMediaWiki)
-           ,("vimwiki"      , TextReader readVimwiki)
-           ,("docbook"      , TextReader readDocBook)
-           ,("opml"         , TextReader readOPML)
-           ,("org"          , TextReader readOrg)
-           ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
-           ,("html"         , TextReader readHtml)
-           ,("jats"         , TextReader readJATS)
-           ,("jira"         , TextReader readJira)
-           ,("latex"        , TextReader readLaTeX)
-           ,("haddock"      , TextReader readHaddock)
-           ,("twiki"        , TextReader readTWiki)
-           ,("tikiwiki"     , TextReader readTikiWiki)
-           ,("docx"         , ByteStringReader readDocx)
-           ,("odt"          , ByteStringReader readOdt)
-           ,("t2t"          , TextReader readTxt2Tags)
-           ,("epub"         , ByteStringReader readEPUB)
-           ,("muse"         , TextReader readMuse)
-           ,("man"          , TextReader readMan)
-           ,("fb2"          , TextReader readFB2)
-           ,("ipynb"        , TextReader readIpynb)
-           ,("csv"          , TextReader readCSV)
-           ,("csljson"      , TextReader readCslJson)
-           ,("bibtex"       , TextReader readBibTeX)
-           ,("biblatex"     , TextReader readBibLaTeX)
+readers = [("native"       , TextReader readNative)
+          ,("json"         , TextReader readJSON)
+          ,("markdown"     , TextReader readMarkdown)
+          ,("markdown_strict" , TextReader readMarkdown)
+          ,("markdown_phpextra" , TextReader readMarkdown)
+          ,("markdown_github" , TextReader readMarkdown)
+          ,("markdown_mmd",  TextReader readMarkdown)
+          ,("commonmark"   , TextReader readCommonMark)
+          ,("commonmark_x" , TextReader readCommonMark)
+          ,("creole"       , TextReader readCreole)
+          ,("dokuwiki"     , TextReader readDokuWiki)
+          ,("gfm"          , TextReader readCommonMark)
+          ,("rst"          , TextReader readRST)
+          ,("mediawiki"    , TextReader readMediaWiki)
+          ,("vimwiki"      , TextReader readVimwiki)
+          ,("docbook"      , TextReader readDocBook)
+          ,("opml"         , TextReader readOPML)
+          ,("org"          , TextReader readOrg)
+          ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
+          ,("html"         , TextReader readHtml)
+          ,("jats"         , TextReader readJATS)
+          ,("jira"         , TextReader readJira)
+          ,("latex"        , TextReader readLaTeX)
+          ,("haddock"      , TextReader readHaddock)
+          ,("twiki"        , TextReader readTWiki)
+          ,("tikiwiki"     , TextReader readTikiWiki)
+          ,("docx"         , ByteStringReader readDocx)
+          ,("odt"          , ByteStringReader readOdt)
+          ,("t2t"          , TextReader readTxt2Tags)
+          ,("epub"         , ByteStringReader readEPUB)
+          ,("muse"         , TextReader readMuse)
+          ,("man"          , TextReader readMan)
+          ,("fb2"          , TextReader readFB2)
+          ,("ipynb"        , TextReader readIpynb)
+          ,("csv"          , TextReader readCSV)
+          ,("csljson"      , TextReader readCslJson)
+          ,("bibtex"       , TextReader readBibTeX)
+          ,("biblatex"     , TextReader readBibLaTeX)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
 getReader :: PandocMonad m => Text -> m (Reader m, Extensions)
 getReader s =
   case parseFormatSpec s of
-       Left e  -> throwError $ PandocAppError
-                    $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e]
+       Left e  -> throwError $ PandocAppError $
+                    "Error parsing reader format " <> tshow s <> ": " <> tshow e
        Right (readerName, extsToEnable, extsToDisable) ->
            case lookup readerName readers of
                    Nothing  -> throwError $ PandocUnknownReaderError
@@ -173,9 +176,13 @@ getReader s =
                      return (r, exts)
 
 -- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
-         => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
-  case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readJSON _ s =
+  case eitherDecode' . BL.fromStrict . UTF8.fromText
+                     . sourcesToText . toSources $ s of
        Right doc -> return doc
-       Left e    -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+       Left e    -> throwError $ PandocParseError ("JSON parse error: "
+                                                   <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.BibTeX
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -23,41 +23,47 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index f0edcaa16..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -2,8 +2,8 @@
 
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-   Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Module      : Text.Pandoc.Readers.CSV
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index c1773eaab..411d64278 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,8 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -25,17 +26,66 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Options
 import Text.Pandoc.Error
+import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
+import Data.Typeable
+import Text.Pandoc.Parsing (runParserT, getInput,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
+import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMark opts s = do
-  let res = runIdentity $
-              commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s
-  case res of
-    Left err -> throwError $ PandocParsecError s err
-    Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
+readCommonMark opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          rest <- getInput
+                          return (meta, rest))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, rest) -> do
+        -- strip off metadata section and parse body
+        let body = concatMap sourceToToks (unSources rest)
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
+
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
+  | isEnabled Ext_sourcepos opts =
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+  | otherwise =
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+specFor :: (Monad m, Typeable m, Typeable a,
+            Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
+        => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks)
+specFor opts = foldr ($) defaultSyntaxSpec exts
  where
   exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++
          [ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++
@@ -62,5 +112,7 @@ readCommonMark opts s = do
            | isEnabled Ext_implicit_header_references opts ] ++
          [ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++
          [ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++
-         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ]
+         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++
+         [ (rebaseRelativePathsSpec <>)
+           | isEnabled Ext_rebase_relative_paths opts ]
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..c49b82ccf 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.DocBook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -12,23 +12,28 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
 import Data.Generics
 import Data.List (intersperse,elemIndex)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
 {-
 
@@ -92,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] chapterinfo - Meta-information for a Chapter
 [ ] citation - An inline bibliographic reference to another published work
 [ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
+[x] citerefentry - A citation to a reference page
 [ ] citetitle - The title of a cited work
 [ ] city - The name of a city in an address
 [x] classname - The name of a class, in the object-oriented programming sense
@@ -129,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] corpcredit - A corporation or organization credited in a document
 [ ] corpname - The name of a corporation
 [ ] country - The name of a country
+[x] danger - An admonition set off from the text indicating hazardous situation
 [ ] database - The name of a database, or part of a database
 [x] date - The date of publication or revision of a document
 [ ] dedication - A wrapper for the dedication section of a book
@@ -206,7 +212,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] info - A wrapper for information about a component or other block. (DocBook v5)
 [x] informalequation - A displayed mathematical equation without a title
 [x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
+[x] informalfigure - An untitled figure
 [ ] informaltable - A table without a title
 [ ] initializer - The initializer for a FieldSynopsis
 [x] inlineequation - A mathematical equation or expression occurring inline
@@ -535,24 +541,32 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  let sources = toSources inp
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)
 
 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@@ -565,32 +579,14 @@ getFigure e = do
   modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty }
   return res
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- convenience function
 named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
 
 --
 
@@ -605,16 +601,24 @@ addMetadataFromElement e = do
          Nothing -> return ()
          Just z  -> addMetaField "author" z
     addMetaField "subtitle" e
-    addMetaField "author" e
+    addAuthor e
     addMetaField "date" e
     addMetaField "release" e
     addMetaField "releaseinfo" e
     return mempty
-  where addMetaField fieldname elt =
-            case filterChildren (named fieldname) elt of
-                   []  -> return ()
-                   [z] -> getInlines z >>= addMeta fieldname
-                   zs  -> mapM getInlines zs >>= addMeta fieldname
+  where
+   addAuthor elt =
+     case filterChildren (named "author") elt of
+       [] -> return ()
+       [z] -> fromAuthor z >>= addMeta "author"
+       zs  -> mapM fromAuthor zs >>= addMeta "author"
+   fromAuthor elt =
+     mconcat . intersperse space <$> mapM getInlines (elChildren elt)
+   addMetaField fieldname elt =
+     case filterChildren (named fieldname) elt of
+       []  -> return ()
+       [z] -> getInlines z >>= addMeta fieldname
+       zs  -> mapM getInlines zs >>= addMeta fieldname
 
 addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
 addMeta field val = modify (setMeta field val)
@@ -627,7 +631,7 @@ isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blockTags
 isBlockElement _ = False
 
-blockTags :: [String]
+blockTags :: [Text]
 blockTags =
   [ "abstract"
   , "ackno"
@@ -669,6 +673,7 @@ blockTags =
   , "index"
   , "info"
   , "informalexample"
+  , "informalfigure"
   , "informaltable"
   , "itemizedlist"
   , "linegroup"
@@ -713,8 +718,8 @@ blockTags =
   , "variablelist"
   ] ++ admonitionTags
 
-admonitionTags :: [String]
-admonitionTags = ["important","caution","note","tip","warning"]
+admonitionTags :: [Text]
+admonitionTags = ["caution","danger","important","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
 trimNl :: Text -> Text
@@ -736,9 +741,9 @@ getMediaobject e = do
   figTitle <- gets dbFigureTitle
   ident <- gets dbFigureId
   (imageUrl, attr) <-
-    case filterChild (named "imageobject") e of
-      Nothing  -> return (mempty, nullAttr)
-      Just z   -> case filterChild (named "imagedata") z of
+    case filterElements (named "imageobject") e of
+      []  -> return (mempty, nullAttr)
+      (z:_) -> case filterChild (named "imagedata") z of
                     Nothing -> return (mempty, nullAttr)
                     Just i  -> let atVal a = attrValue a i
                                    w = case atVal "width" of
@@ -771,10 +776,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -829,7 +834,7 @@ parseBlock (Elem e) =
         "refsect2" -> sect 2
         "refsect3" -> sect 3
         "refsection" -> gets dbSectionLevel >>= sect . (+1)
-        l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+        l | l `elem` admonitionTags -> parseAdmonition l
         "area" -> skip
         "areaset" -> skip
         "areaspec" -> skip
@@ -855,6 +860,7 @@ parseBlock (Elem e) =
         "variablelist" -> definitionList <$> deflistitems
         "procedure" -> bulletList <$> steps
         "figure" -> getFigure e
+        "informalfigure" -> getFigure e
         "mediaobject" -> para <$> getMediaobject e
         "caption" -> skip
         "info" -> addMetadataFromElement e
@@ -890,7 +896,11 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          codeBlockWithLang = do
@@ -898,7 +908,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ T.pack $ strContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -952,17 +962,16 @@ parseBlock (Elem e) =
                             w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x ->
                                                      (x >= '0' && x <= '9')
-                                                      || x == '.') (T.pack w)
+                                                      || x == '.') w
                             if n > 0 then Just n else Nothing
-                      let numrows = case bodyrows of
-                                         [] -> 0
-                                         xs -> maximum $ map length xs
+                      let numrows = maybe 0 maximum $ nonEmpty
+                                                    $ map length bodyrows
                       let aligns = case colspecs of
                                      [] -> replicate numrows AlignDefault
                                      cs -> map toAlignment cs
                       let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                    || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                         Just d  -> case attrValue "table-width" d of
                                                      "" -> 1.0
                                                      w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1035,12 +1044,12 @@ parseMixed container conts = do
       x <- parseMixed container rs
       return $ p <> b <> x
 
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
 parseRow cn = do
   let isEntry x  = named "entry" x || named "td" x || named "th" x
   mapM (parseEntry cn) . filterChildren isEntry
 
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
 parseEntry cn el = do
   let colDistance sa ea = do
         let iStrt = elemIndex sa cn
@@ -1062,7 +1071,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = trimInlines . mconcat <$>
                  mapM parseInline (elContent e')
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -1071,16 +1080,16 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
-  return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+  return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "anchor" -> do
            return $ spanWith (attrValue "id" e, [], []) mempty
         "phrase" -> do
           let ident = attrValue "id" e
-          let classes = T.words $ attrValue "class" e
+          let classes = T.words $ attrValue "role" e
           if ident /= "" || classes /= []
             then innerInlines (spanWith (ident,classes,[]))
             else innerInlines id
@@ -1103,6 +1112,10 @@ parseInline (Elem e) =
         "segmentedlist" -> segmentedList
         "classname" -> codeWithLang
         "code" -> codeWithLang
+        "citerefentry" -> do
+             let title = maybe mempty strContent $ filterChild (named "refentrytitle") e
+             let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e
+             return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum)
         "filename" -> codeWithLang
         "envar" -> codeWithLang
         "literal" -> codeWithLang
@@ -1125,7 +1138,7 @@ parseInline (Elem e) =
         "userinput" -> codeWithLang
         "systemitem" -> codeWithLang
         "varargs" -> return $ code "(...)"
-        "keycap" -> return (str $ T.pack $ strContent e)
+        "keycap" -> return (str $ strContent e)
         "keycombo" -> keycombo <$>
                          mapM parseInline (elContent e)
         "menuchoice" -> menuchoice <$>
@@ -1137,17 +1150,17 @@ parseInline (Elem e) =
             let title = case attrValue "endterm" e of
                             ""      -> maybe "???" xrefTitleByElem
                                          (findElementById linkend content)
-                            endterm -> maybe "???" (T.pack . strContent)
+                            endterm -> maybe "???" strContent
                                          (findElementById endterm content)
             return $ link ("#" <> linkend) "" (text title)
-        "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
-                          $ str $ T.pack $ strContent e
-        "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+        "email" -> return $ link ("mailto:" <> strContent e) ""
+                          $ str $ strContent e
+        "uri" -> return $ link (strContent e) "" $ str $ strContent e
         "ulink" -> innerInlines (link (attrValue "url" e) "")
         "link" -> do
              ils <- innerInlines id
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "linkend" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1163,12 +1176,15 @@ parseInline (Elem e) =
         "title" -> return mempty
         "affiliation" -> skip
         -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          innerInlines f = extractSpaces f . mconcat <$>
@@ -1177,7 +1193,7 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
          simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
                          (filterChildren (named "member") e)
          segmentedList = do
@@ -1218,10 +1234,10 @@ parseInline (Elem e) =
                   "sect5"        -> descendantContent "title" el
                   "cmdsynopsis"  -> descendantContent "command" el
                   "funcsynopsis" -> descendantContent "function" el
-                  _              -> T.pack $ qName (elName el) ++ "_title"
+                  _              -> qName (elName el) <> "_title"
           where
             xrefLabel = attrValue "xreflabel" el
-            descendantContent name = maybe "???" (T.pack . strContent)
+            descendantContent name = maybe "???" strContent
                                    . filterElementName (\n -> qName n == name)
 
 -- | Extract a math equation from an element
@@ -1241,8 +1257,9 @@ equation e constructor =
   where
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
-      (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
-      (readMathML . T.pack . showElement)
+      (\x -> qName (elName x) == "math" &&
+             qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML")
+      (readMathML . showElement)
 
     latexEquations :: [Text]
     latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1256,8 +1273,8 @@ equation e constructor =
 -- | Get the actual text stored in a CData block. 'showContent'
 -- returns the text still surrounded by the [[CDATA]] tags.
 showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
 
 
 -- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00de6a0cd..c06adf7e3 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
 import Codec.Archive.Zip
 import Control.Monad.Reader
 import Control.Monad.State.Strict
+import Data.Bifunctor (bimap, first)
 import qualified Data.ByteString.Lazy as B
 import Data.Default (Default)
-import Data.List (delete, intersect)
+import Data.List (delete, intersect, foldl')
 import Data.Char (isSpace)
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Data.Maybe (isJust, fromMaybe)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
 import Data.Sequence (ViewL (..), viewl)
 import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
@@ -85,6 +86,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
+import Data.List.NonEmpty (nonEmpty)
 
 readDocx :: PandocMonad m
          => ReaderOptions
@@ -112,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
                      -- restarting
                      , docxListState :: M.Map (T.Text, T.Text) Integer
                      , docxPrevPara  :: Inlines
+                     , docxTableCaptions :: [Blocks]
                      }
 
 instance Default DState where
@@ -122,6 +125,7 @@ instance Default DState where
                , docxDropCap   = mempty
                , docxListState = M.empty
                , docxPrevPara  = mempty
+               , docxTableCaptions = []
                }
 
 data DEnv = DEnv { docxOptions       :: ReaderOptions
@@ -490,15 +494,32 @@ singleParaToPlain blks
       singleton $ Plain ils
 singleParaToPlain blks = blks
 
-cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
-cellToBlocks (Docx.Cell bps) = do
+cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell
+cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do
   blks <- smushBlocks <$> mapM bodyPartToBlocks bps
-  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
+
+rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row]
+rowsToRows rows = do
+  let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows)
+  cells <- traverse (traverse (uncurry cellToCell)) rowspans
+  return (fmap (Pandoc.Row nullAttr) cells)
+
+splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row])
+splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
+  $ if hasFirstRowFormatting
+    then foldl' f ((take 1 rs, []), True) (drop 1 rs)
+    else foldl' f (([], []), False) rs
+  where
+    f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs)
+      | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
+        = ((r : headerRows, bodyRows), True)
+      | otherwise
+        = ((headerRows, r : bodyRows), False)
+
+    isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue
 
-rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
-rowToBlocksList (Docx.Row cells) = do
-  blksList <- mapM cellToBlocks cells
-  return $ map singleParaToPlain blksList
 
 -- like trimInlines, but also take out linebreaks
 trimSps :: Inlines -> Inlines
@@ -545,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
   where go c | isSpace c = '-'
              | otherwise = c
 
+bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks)
+bodyPartToTableCaption (TblCaption pPr parparts) =
+  Just <$> bodyPartToBlocks (Paragraph pPr parparts)
+bodyPartToTableCaption _ = pure Nothing
+
 bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
 bodyPartToBlocks (Paragraph pPr parparts)
   | Just True <- pBidi pPr = do
@@ -636,54 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
   let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
   in
     bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (TblCaption _ _) =
+  return $ para mempty -- collected separately
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
-  let cap' = simpleCaption $ plain $ text cap
-      (hdr, rows) = case firstRowFormatting look of
-        True | null rs -> (Nothing, [r])
-             | otherwise -> (Just r, rs)
-        False -> (Nothing, r:rs)
-
-  cells <- mapM rowToBlocksList rows
+bodyPartToBlocks (Tbl cap grid look parts) = do
+  captions <- gets docxTableCaptions
+  fullCaption <- case captions of
+    c : cs -> do
+      modify (\s -> s { docxTableCaptions = cs })
+      return c
+    [] -> return $ if T.null cap then mempty else plain (text cap)
+  let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
+      cap' = caption shortCaption fullCaption
+      (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
-      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
-      -- our own, see
-      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
-      nonEmpty [] = Nothing
-      nonEmpty l  = Just l
       rowLength :: Docx.Row -> Int
-      rowLength (Docx.Row c) = length c
+      rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c)
 
-  let toRow = Pandoc.Row nullAttr . map simpleCell
-      toHeaderRow l = [toRow l | not (null l)]
+  headerCells <- rowsToRows hdr
+  bodyCells <- rowsToRows rows
 
-  -- pad cells.  New Text.Pandoc.Builder will do that for us,
-  -- so this is for compatibility while we switch over.
-  let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
-
-  hdrCells <- case hdr of
-    Just r' -> toHeaderRow <$> rowToBlocksList r'
-    Nothing -> return []
-
-      -- The two following variables (horizontal column alignment and
-      -- relative column widths) go to the default at the
-      -- moment. Width information is in the TblGrid field of the Tbl,
-      -- so should be possible. Alignment might be more difficult,
-      -- since there doesn't seem to be a column entity in docx.
+      -- Horizontal column alignment goes to the default at the moment. Getting
+      -- it might be difficult, since there doesn't seem to be a column entity
+      -- in docx.
   let alignments = replicate width AlignDefault
-      widths = replicate width ColWidthDefault
+      totalWidth = sum grid
+      widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
-                 (TableHead nullAttr hdrCells)
-                 [TableBody nullAttr 0 [] cells']
+                 (TableHead nullAttr headerCells)
+                 [TableBody nullAttr 0 [] bodyCells]
                  (TableFoot nullAttr [])
 bodyPartToBlocks (OMathPara e) =
   return $ para $ displayMath (writeTeX e)
 
-
 -- replace targets with generated anchors.
 rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
 rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
@@ -719,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
 bodyToOutput (Body bps) = do
   let (metabps, blkbps) = sepBodyParts bps
   meta <- bodyPartsToMeta metabps
+  captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+  modify (\s -> s { docxTableCaptions = captions })
   blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
   blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
   blks'' <- removeOrphanAnchors blks'
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..6e4faa639 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.Combine
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2020 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
@@ -61,7 +61,7 @@ import Data.List
 import Data.Bifunctor
 import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
                      , (><), (|>) )
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 
 data Modifier a = Modifier (a -> a)
                 | AttrModifier (Attr -> a -> a) Attr
@@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of
 
 inlinesL :: Inlines -> (Inlines, Inlines)
 inlinesL ils = case viewl $ unMany ils of
-  (s :< sq) -> (singleton s, Many sq)
+  (s :< sq) -> (B.singleton s, Many sq)
   _         -> (mempty, ils)
 
 inlinesR :: Inlines -> (Inlines, Inlines)
 inlinesR ils = case viewr $ unMany ils of
-  (sq :> s) -> (Many sq, singleton s)
+  (sq :> s) -> (Many sq, B.singleton s)
   _         -> (ils, mempty)
 
 combineInlines :: Inlines -> Inlines -> Inlines
@@ -182,7 +182,7 @@ isAttrModifier _                  = False
 
 smushInlines :: [Inlines] -> Inlines
 smushInlines xs = combineInlines xs' mempty
-  where xs' = foldl combineInlines mempty xs
+  where xs' = foldl' combineInlines mempty xs
 
 smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..dbb16a821 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , ParStyle
                                       , CharStyle(cStyleData)
                                       , Row(..)
+                                      , TblHeader(..)
                                       , Cell(..)
+                                      , VMerge(..)
                                       , TrackedChange(..)
                                       , ChangeType(..)
                                       , ChangeInfo(..)
@@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , pHeading
                                       , constructBogusParStyleData
                                       , leftBiasedMergeRunStyle
+                                      , rowsToRowspans
                                       ) where
 import Text.Pandoc.Readers.Docx.Parse.Styles
 import Codec.Archive.Zip
@@ -63,6 +66,7 @@ import Data.Char (chr, ord, readLitChar)
 import Data.List
 import qualified Data.Map as M
 import qualified Data.Text as T
+import Data.Text (Text)
 import Data.Maybe
 import System.FilePath
 import Text.Pandoc.Readers.Docx.Util
@@ -72,8 +76,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.TeXMath (Exp)
 import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XML.Light
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -127,37 +130,23 @@ mapD f xs =
   in
    concatMapM handler xs
 
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
   | isElem ns "w" "sdt" element
   , Just sdtContent <- findChildByName ns "w" "sdtContent" element
-  = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+  = concatMap (unwrapElement ns) (elChildren sdtContent)
   | isElem ns "w" "smartTag" element
-  = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+  = concatMap (unwrapElement ns) (elChildren element)
+  | otherwise
+  = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
 
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
-  Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
 
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
-  let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
-  in
-    case XMLC.nextDF modifiedCur of
-      Just cur' -> walkDocument' ns cur'
-      Nothing   -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
 walkDocument ns element =
-  let cur = XMLC.fromContent (Elem element)
-      cur' = walkDocument' ns cur
-  in
-    case XMLC.toTree cur' of
-      Elem element' -> Just element'
-      _             -> Nothing
-
+  element{ elContent = concatMap (unwrapContent ns) (elContent element) }
 
 newtype Docx = Docx Document
           deriving Show
@@ -239,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
 data BodyPart = Paragraph ParagraphStyle [ParPart]
               | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
               | Tbl T.Text TblGrid TblLook [Row]
+              | TblCaption ParagraphStyle [ParPart]
               | OMathPara [Exp]
               deriving Show
 
@@ -250,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool}
 defaultTblLook :: TblLook
 defaultTblLook = TblLook{firstRowFormatting = False}
 
-newtype Row = Row [Cell]
-           deriving Show
+data Row = Row TblHeader [Cell] deriving Show
 
-newtype Cell = Cell [BodyPart]
+data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq)
+
+data Cell = Cell GridSpan VMerge [BodyPart]
             deriving Show
 
+type GridSpan = Integer
+
+data VMerge = Continue
+            -- ^ This cell should be merged with the one above it
+            | Restart
+            -- ^ This cell should not be merged with the one above it
+            deriving (Show, Eq)
+
+rowsToRowspans :: [Row] -> [[(Int, Cell)]]
+rowsToRowspans rows = let
+  removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart))
+  in removeMergedCells (foldr f [] rows)
+  where
+    f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]]
+    f (Row _ cells) acc = let
+      spans = g cells Nothing (listToMaybe acc)
+      in spans : acc
+
+    g ::
+      -- | The current row
+      [Cell] ->
+      -- | Number of columns left below
+      Maybe Integer ->
+      -- | (rowspan so far, cell) for the row below this one
+      Maybe [(Int, Cell)] ->
+      -- | (rowspan so far, cell) for this row
+      [(Int, Cell)]
+    g cells _ Nothing = zip (repeat 1) cells
+    g cells columnsLeftBelow (Just rowBelow) =
+        case cells of
+          [] -> []
+          thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of
+            [] -> zip (repeat 1) cells
+            (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ ->
+              let spanSoFar = case vmerge of
+                    Restart -> 1
+                    Continue -> 1 + spanSoFarBelow
+                  columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow)
+                  (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow
+              in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow)
+
+    dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)])
+    dropColumns n [] = (n, [])
+    dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) =
+      if n < gridSpan
+      then (gridSpan - n, cells)
+      else dropColumns (n - gridSpan) otherCells
+
 leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
 leftBiasedMergeRunStyle a b = RunStyle
     { isBold = isBold a <|> isBold b
@@ -343,10 +382,16 @@ archiveToDocxWithWarnings archive = do
     Right doc -> Right (Docx doc, stateWarnings st)
     Left e    -> Left e
 
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
   entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
   let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
   rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                        Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -354,18 +399,18 @@ getDocumentXmlPath zf = do
   fp <- findAttr (QName "Target" Nothing Nothing) rel
   -- sometimes there will be a leading slash, which windows seems to
   -- have trouble with.
-  return $ case fp of
+  return $ case T.unpack fp of
     '/' : fp' -> fp'
-    _         -> fp
+    fp'       -> fp'
 
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   docPath <- asks envDocXmlPath
   entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
-  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+  let bodyElem' = walkDocument namespaces bodyElem
   body <- elemToBody namespaces bodyElem'
   return $ Document namespaces body
 
@@ -401,23 +446,24 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
   let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      fn_namespaces = maybe [] elemToNameSpaces fnElem
-      en_namespaces = maybe [] elemToNameSpaces enElem
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
-      en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+               >>= parseXMLFromEntry
+      fn_namespaces = maybe mempty elemToNameSpaces fnElem
+      en_namespaces = maybe mempty elemToNameSpaces enElem
+      ns = M.union fn_namespaces en_namespaces
+      fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+      en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
    Notes ns fn en
 
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-      cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
-      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+               >>= parseXMLFromEntry
+      cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
+      cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+               cmtsElem
   in
     case cmts of
       Just c  -> Comments cmts_namespaces c
@@ -433,20 +479,26 @@ filePathToRelType path docXmlPath =
   then Just InDocument
   else Nothing
 
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
+relElemToRelationship :: FilePath -> DocumentLocation -> Element
+                      -> Maybe Relationship
+relElemToRelationship fp relType element | qName (elName element) == "Relationship" =
   do
-    relId <- findAttrText (QName "Id" Nothing Nothing) element
-    target <- findAttrText (QName "Target" Nothing Nothing) element
-    return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
+    -- target may be relative (media/image1.jpeg) or absolute
+    -- (/word/media/image1.jpeg); we need to relativize it (see #7374)
+    let frontOfFp = T.pack $ takeWhile (/= '_') fp
+    let target' = fromMaybe target $
+           T.stripPrefix frontOfFp $ T.dropWhile (== '/') target
+    return $ Relationship relType relId target'
+relElemToRelationship _ _ _ = Nothing
 
 filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
-  mapMaybe (relElemToRelationship relType) $ elChildren relElems
+  , Just relElems <- parseXMLFromEntry entry =
+  mapMaybe (relElemToRelationship fp relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
 archiveToRelationships :: Archive -> FilePath -> [Relationship]
@@ -478,10 +530,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
 loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
 loElemToLevelOverride ns element
   | isElem ns "w" "lvlOverride" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       let startOverride = findChildByName ns "w" "startOverride" element
                           >>= findAttrByName ns "w" "val"
-                          >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                          >>= stringToInteger
           lvl = findChildByName ns "w" "lvl" element
                 >>= levelElemToLevel ns
       return $ LevelOverride ilvl startOverride lvl
@@ -490,9 +542,9 @@ loElemToLevelOverride _ _ = Nothing
 numElemToNum :: NameSpaces -> Element -> Maybe Numb
 numElemToNum ns element
   | isElem ns "w" "num" element = do
-      numId <- findAttrTextByName ns "w" "numId" element
+      numId <- findAttrByName ns "w" "numId" element
       absNumId <- findChildByName ns "w" "abstractNumId" element
-                  >>= findAttrTextByName ns "w" "val"
+                  >>= findAttrByName ns "w" "val"
       let lvlOverrides = mapMaybe
                          (loElemToLevelOverride ns)
                          (findChildrenByName ns "w" "lvlOverride" element)
@@ -502,7 +554,7 @@ numElemToNum _ _ = Nothing
 absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
 absNumElemToAbsNum ns element
   | isElem ns "w" "abstractNum" element = do
-      absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+      absNumId <- findAttrByName ns "w" "abstractNumId" element
       let levelElems = findChildrenByName ns "w" "lvl" element
           levels = mapMaybe (levelElemToLevel ns) levelElems
       return $ AbstractNumb absNumId levels
@@ -511,23 +563,23 @@ absNumElemToAbsNum _ _ = Nothing
 levelElemToLevel :: NameSpaces -> Element -> Maybe Level
 levelElemToLevel ns element
   | isElem ns "w" "lvl" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       fmt <- findChildByName ns "w" "numFmt" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       txt <- findChildByName ns "w" "lvlText" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       let start = findChildByName ns "w" "start" element
                   >>= findAttrByName ns "w" "val"
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                  >>= stringToInteger
       return (Level ilvl fmt txt start)
 levelElemToLevel _ _ = Nothing
 
 archiveToNumbering' :: Archive -> Maybe Numbering
 archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
-    Nothing -> Just $ Numbering [] [] []
+    Nothing -> Just $ Numbering mempty [] []
     Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
           numElems = findChildrenByName namespaces "w" "num" numberingElem
           absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
@@ -537,13 +589,13 @@ archiveToNumbering' zf =
 
 archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
-  fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+  fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
   | isElem ns "w" (notetype <> "s") element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" notetype element)
       in
@@ -555,7 +607,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
 elemToComments ns element
   | isElem ns "w" "comments" element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" "comment" element)
       in
@@ -570,7 +622,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
 elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
   let cols = findChildrenByName ns "w" "gridCol" element
   in
-   mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+   mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
    cols
 elemToTblGrid _ _ = throwError WrongElem
 
@@ -594,14 +646,31 @@ elemToRow ns element | isElem ns "w" "tr" element =
   do
     let cellElems = findChildrenByName ns "w" "tc" element
     cells <- mapD (elemToCell ns) cellElems
-    return $ Row cells
+    let hasTblHeader = maybe NoTblHeader (const HasTblHeader)
+          (findChildByName ns "w" "trPr" element
+           >>= findChildByName ns "w" "tblHeader")
+    return $ Row hasTblHeader cells
 elemToRow _ _ = throwError WrongElem
 
 elemToCell :: NameSpaces -> Element -> D Cell
 elemToCell ns element | isElem ns "w" "tc" element =
   do
+    let properties = findChildByName ns "w" "tcPr" element
+    let gridSpan = properties
+                     >>= findChildByName ns "w" "gridSpan"
+                     >>= findAttrByName ns "w" "val"
+                     >>= stringToInteger
+    let vMerge = case properties >>= findChildByName ns "w" "vMerge" of
+                   Nothing -> Restart
+                   Just e ->
+                     fromMaybe Continue $ do
+                       s <- findAttrByName ns "w" "val" e
+                       case s of
+                         "continue" -> Just Continue
+                         "restart" -> Just Restart
+                         _ -> Nothing
     cellContents <- mapD (elemToBodyPart ns) (elChildren element)
-    return $ Cell cellContents
+    return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
 elemToCell _ _ = throwError WrongElem
 
 elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
@@ -615,12 +684,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
       stringToInteger
     , hangingParIndent =
       findAttrByName ns "w" "hanging" element >>=
-      stringToInteger}
+      stringToInteger }
 elemToParIndentation _ _ = Nothing
 
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+  case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> (n' .|. n) /= 0
 
@@ -633,10 +702,9 @@ pNumInfo = getParStyleField numInfo . pStyle
 elemToBodyPart :: NameSpaces -> Element -> D BodyPart
 elemToBodyPart ns element
   | isElem ns "w" "p" element
-  , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
-      do
-        expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
-        return $ OMathPara expsLst
+  , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do
+      expsLst <- eitherToD $ readOMML $ showElement c
+      return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
   , Just (numId, lvl) <- getNumInfo ns element = do
@@ -654,13 +722,31 @@ elemToBodyPart ns element
         Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
                     levelInfo <- lookupLevel numId lvl <$> asks envNumbering
                     return $ ListItem parstyle numId lvl levelInfo parparts
-        _ -> return $ Paragraph parstyle parparts
+        _ -> let
+          hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+          hasSimpleTableField = fromMaybe False $ do
+            fldSimple <- findChildByName ns "w" "fldSimple" element
+            instr <- findAttrByName ns "w" "instr" fldSimple
+            pure ("Table" `elem` T.words instr)
+
+          hasComplexTableField = fromMaybe False $ do
+            instrText <- findElementByName ns "w" "instrText" element
+            pure ("Table" `elem` T.words (strContent instrText))
+
+          in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+             then return $ TblCaption parstyle parparts
+             else return $ Paragraph parstyle parparts
+
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
-    let caption' = findChildByName ns "w" "tblPr" element
+    let tblProperties = findChildByName ns "w" "tblPr" element
+        caption = fromMaybe "" $ tblProperties
                    >>= findChildByName ns "w" "tblCaption"
-                   >>= findAttrTextByName ns "w" "val"
-        caption = fromMaybe "" caption'
+                   >>= findAttrByName ns "w" "val"
+        description = fromMaybe "" $ tblProperties
+                       >>= findChildByName ns "w" "tblDescription"
+                       >>= findAttrByName ns "w" "val"
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
           Nothing -> return []
@@ -673,7 +759,7 @@ elemToBodyPart ns element
     grid <- grid'
     tblLook <- tblLook'
     rows <- mapD (elemToRow ns) (elChildren element)
-    return $ Tbl caption grid tblLook rows
+    return $ Tbl (caption <> description) grid tblLook rows
 elemToBodyPart _ _ = throwError WrongElem
 
 lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
@@ -698,8 +784,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
 getTitleAndAlt ns element =
   let mbDocPr = findChildByName ns "wp" "inline" element >>=
                 findChildByName ns "wp" "docPr"
-      title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
-      alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+      title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+      alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -711,22 +797,29 @@ elemToParPart ns element
   = let (title, alt) = getTitleAndAlt ns drawingElem
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrTextByName ns "r" "embed"
+                  >>= findAttrByName ns "r" "embed"
     in
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
        Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
 elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
     let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrTextByName ns "r" "id"
+                  >>= findAttrByName ns "r" "id"
     in
      case drawing of
-       -- Todo: check out title and attr for deprecated format.
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
        Nothing -> throwError WrongElem
+elemToParPart ns element
+  | isElem ns "w" "r" element
+  , Just objectElem <- findChildByName ns "w" "object" element
+  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
 -- Chart
 elemToParPart ns element
   | isElem ns "w" "r" element
@@ -790,7 +883,7 @@ elemToParPart ns element
       fldCharState <- gets stateFldCharState
       case fldCharState of
         FldCharOpen -> do
-          info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+          info <- eitherToD $ parseFieldInfo $ strContent instrText
           modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
           return NullParPart
         _ -> return NullParPart
@@ -811,48 +904,48 @@ elemToParPart ns element
       return $ ChangedRuns change runs
 elemToParPart ns element
   | isElem ns "w" "bookmarkStart" element
-  , Just bmId <- findAttrTextByName ns "w" "id" element
-  , Just bmName <- findAttrTextByName ns "w" "name" element =
+  , Just bmId <- findAttrByName ns "w" "id" element
+  , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just relId <- findAttrTextByName ns "r" "id" element = do
+  , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
     runs <- mapD (elemToRun ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
-         case findAttrTextByName ns "w" "anchor" element of
+         case findAttrByName ns "w" "anchor" element of
              Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
              Nothing -> return $ ExternalHyperLink target runs
       Nothing     -> return $ ExternalHyperLink "" runs
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+  , Just anchor <- findAttrByName ns "w" "anchor" element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ InternalHyperLink anchor runs
 elemToParPart ns element
   | isElem ns "w" "commentRangeStart" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "commentRangeEnd" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element =
+  , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
 elemToParPart ns element
   | isElem ns "m" "oMath" element =
-    fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+    fmap PlainOMath (eitherToD $ readOMML $ showElement element)
 elemToParPart _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
   | isElem ns "w" "comment" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element
-  , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
-  , cmtDate <- findAttrTextByName ns "w" "date" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element
+  , Just cmtAuthor <- findAttrByName ns "w" "author" element
+  , cmtDate <- findAttrByName ns "w" "date" element = do
       bps <- mapD (elemToBodyPart ns) (elChildren element)
       return $ CommentStart cmtId cmtAuthor cmtDate bps
 elemToCommentStart _ _ = throwError WrongElem
@@ -871,7 +964,7 @@ elemToExtent drawingElem =
     where
       wp_ns  = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
-                    >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+                    >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
 
 childElemToRun :: NameSpaces -> Element -> D Run
@@ -882,7 +975,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
@@ -895,7 +988,7 @@ childElemToRun ns element
   = return InlineChart
 childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
-  , Just fnId <- findAttrTextByName ns "w" "id" element = do
+  , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -903,7 +996,7 @@ childElemToRun ns element
       Nothing  -> return $ Footnote []
 childElemToRun ns element
   | isElem ns "w" "endnoteReference" element
-  , Just enId <- findAttrTextByName ns "w" "id" element = do
+  , Just enId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -956,15 +1049,15 @@ getParStyleField _ _ = Nothing
 getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
 getTrackedChange ns element
   | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange ns element
   | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange _ _ = Nothing
 
@@ -973,7 +1066,7 @@ elemToParagraphStyle ns element sty
   | Just pPr <- findChildByName ns "w" "pPr" element =
     let style =
           mapMaybe
-          (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+          (fmap ParaStyleId . findAttrByName ns "w" "val")
           (findChildrenByName ns "w" "pStyle" pPr)
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
@@ -1005,7 +1098,7 @@ elemToRunStyleD ns element
     charStyles <- asks envCharStyles
     let parentSty =
           findChildByName ns "w" "rStyle" rPr >>=
-          findAttrTextByName ns "w" "val" >>=
+          findAttrByName ns "w" "val" >>=
           flip M.lookup charStyles . CharStyleId
     return $ elemToRunStyle ns element parentSty
 elemToRunStyleD _ _ = return defaultRunStyle
@@ -1015,7 +1108,7 @@ elemToRunElem ns element
   | isElem ns "w" "t" element
     || isElem ns "w" "delText" element
     || isElem ns "m" "t" element = do
-    let str = T.pack $ strContent element
+    let str = strContent element
     font <- asks envFont
     case font of
       Nothing -> return $ TextRun str
@@ -1037,14 +1130,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-    case readLitChar ("\\x" ++ s) of
+    case readLitChar ("\\x" ++ T.unpack s) of
          [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
          _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
-    getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
-    lowerFromPrivate ('F':xs) = '0':xs
-    lowerFromPrivate xs       = xs
+    getFont = textToFont =<< findAttrByName ns "w" "font" element
+    lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+                       | otherwise             = t
 getSymChar _ _ = TextRun ""
 
 elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1054,8 +1147,9 @@ elemToRunElems ns element
        let qualName = elemName ns "w"
        let font = do
                     fontElem <- findElement (qualName "rFonts") element
-                    textToFont . T.pack =<<
-                       foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+                    textToFont =<<
+                       foldr ((<|>) . (flip findAttr fontElem . qualName))
+                         Nothing ["ascii", "hAnsi"]
        local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
 elemToRunElems _ _ = throwError WrongElem
 
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,11 +48,13 @@ import Data.Function (on)
 import Data.String (IsString(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
 import Data.Maybe
 import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -108,7 +110,7 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          , isRTL        :: Maybe Bool
                          , isForceCTL   :: Maybe Bool
                          , rVertAlign   :: Maybe VertAlign
-                         , rUnderline   :: Maybe String
+                         , rUnderline   :: Maybe Text
                          , rParentStyle :: Maybe CharStyle
                          }
                 deriving Show
@@ -135,19 +137,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                            , rParentStyle = Nothing
                            }
 
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)
 
 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
@@ -155,7 +160,7 @@ isBasedOnStyle ns element parentStyle
   , Just styleType <- findAttrByName ns "w" "type" element
   , styleType == cStyleType parentStyle
   , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
-                       findAttrTextByName ns "w" "val"
+                       findAttrByName ns "w" "val"
   , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
   | isElem ns "w" "style" element
   , Just styleType <- findAttrByName ns "w" "type" element
@@ -165,7 +170,7 @@ isBasedOnStyle ns element parentStyle
   | otherwise = False
 
 class HasStyleId a => ElemToStyle a where
-  cStyleType  :: Maybe a -> String
+  cStyleType  :: Maybe a -> Text
   elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
 
 class FromStyleId (StyleId a) => HasStyleId a where
@@ -222,8 +227,10 @@ buildBasedOnList ns element rootStyle =
     stys -> stys ++
             concatMap (buildBasedOnList ns element . Just) stys
 
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+                      Right (x,_) -> Just x
+                      Left _      -> Nothing
 
 checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
 checkOnOff ns rPr tag
@@ -243,7 +250,7 @@ checkOnOff _ _ _ = Nothing
 elemToCharStyle :: NameSpaces
                 -> Element -> Maybe CharStyle -> Maybe CharStyle
 elemToCharStyle ns element parentStyle
-  = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+  = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
               <*> getElementStyleName ns element
               <*> Just (elemToRunStyle ns element parentStyle)
 
@@ -277,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
 getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
 getHeaderLevel ns element
   | Just styleName <- getElementStyleName ns element
-  , Just n <- stringToInteger . T.unpack =<<
+  , Just n <- stringToInteger =<<
               (T.stripPrefix "heading " . T.toLower $
                 fromStyleName styleName)
   , n > 0 = Just (styleName, fromInteger n)
@@ -285,8 +292,8 @@ getHeaderLevel _ _ = Nothing
 
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
-  ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
-  <|> findAttrTextByName ns "w" "styleId" el)
+  ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+  <|> findAttrByName ns "w" "styleId" el)
 
 getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
 getNumInfo ns element = do
@@ -294,15 +301,15 @@ getNumInfo ns element = do
               findChildByName ns "w" "numPr"
       lvl = fromMaybe "0" (numPr >>=
                            findChildByName ns "w" "ilvl" >>=
-                           findAttrTextByName ns "w" "val")
+                           findAttrByName ns "w" "val")
   numId <- numPr >>=
            findChildByName ns "w" "numId" >>=
-           findAttrTextByName ns "w" "val"
+           findAttrByName ns "w" "val"
   return (numId, lvl)
 
 elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
 elemToParStyleData ns element parentStyle
-  | Just styleId <- findAttrTextByName ns "w" "styleId" element
+  | Just styleId <- findAttrByName ns "w" "styleId" element
   , Just styleName <- getElementStyleName ns element
   = Just $ ParStyle
       {
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..970697a2d 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
@@ -18,51 +19,52 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
-                                      , findAttrText
+                                      , findElementByName
                                       , findAttrByName
-                                      , findAttrTextByName
                                       ) where
 
-import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
+import qualified Data.Map as M
 
-type NameSpaces = [(String, String)]
+type NameSpaces = M.Map Text Text
 
 elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
+elemToNameSpaces = foldr (\(Attr qn val) ->
+                             case qn of
+                               QName s _ (Just "xmlns") -> M.insert s val
+                               _ -> id) mempty . elAttribs
 
-attrToNSPair :: Attr -> Maybe (String, String)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _                                     = Nothing
-
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (M.lookup prefix ns)
+             (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
-  let ns' = ns ++ elemToNameSpaces element
+  let ns' = ns <> elemToNameSpaces element
   in qName (elName element) == name &&
-     qURI (elName element) == lookup prefix ns'
+     qURI (elName element) == M.lookup prefix ns'
 
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
+-- | Like 'findChildrenByName', but searches descendants.
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findElement (elemName ns' pref name) el
 
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 336be09e5..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
@@ -317,7 +318,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
 interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
-interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
+interwikiToUrl unknown page = unknown <> ">" <> page
 
 linkText :: PandocMonad m => DWParser m B.Inlines
 linkText = parseLink fromRaw "[[" "]]"
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,14 +17,14 @@ module Text.Pandoc.Readers.EPUB
   (readEPUB)
   where
 
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                           toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Map as M (Map, elems, fromList, lookup)
 import Data.Maybe (mapMaybe)
 import qualified Data.Text.Lazy as TL
@@ -40,12 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
 
 readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
 readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -125,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
 imageMimes :: [MimeType]
 imageMimes = ["image/gif", "image/jpeg", "image/png"]
 
-type CoverId = String
+type CoverId = Text
 
 type CoverImage = FilePath
 
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
 parseManifest content coverId = do
   manifest <- findElementE (dfName "manifest") content
   let items = findChildren (dfName "item") manifest
   r <- mapM parseItem items
   let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
-  return (cover `mplus` coverId, M.fromList r)
+  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
   where
-    findCover e = maybe False (isInfixOf "cover-image")
+    findCover e = maybe False (T.isInfixOf "cover-image")
                   (findAttr (emptyName "properties") e)
                || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
     parseItem e = do
       uid <- findAttrE (emptyName "id") e
       href <- findAttrE (emptyName "href") e
       mime <- findAttrE (emptyName "media-type") e
-      return (uid, (href, T.pack mime))
+      return (uid, (T.unpack href, mime))
 
 parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
 parseSpine is e = do
@@ -172,25 +173,25 @@ parseMeta content = do
 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
 parseMetaItem :: Element -> Meta -> Meta
 parseMetaItem e@(stripNamespace . elName -> field) meta =
-  addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
 
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
 renameMeta "creator" = "author"
-renameMeta s         = T.pack s
+renameMeta s         = s
 
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
   metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
   let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
     (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
-  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest
 
 -- Fixup
 
@@ -200,7 +201,8 @@ fixInternalReferences pathToFile =
   . walk (fixBlockIRs filename)
   . walk (fixInlineIRs filename)
   where
-    (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+    (root, T.unpack . escapeURI . T.pack -> filename) =
+      splitFileName pathToFile
 
 fixInlineIRs :: String -> Inline -> Inline
 fixInlineIRs s (Span as v) =
@@ -213,7 +215,7 @@ fixInlineIRs s (Link as is t) =
   Link (fixAttrs s as) is t
 fixInlineIRs _ v = v
 
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
 prependHash ps l@(Link attr is (url, tit))
   | or [s `T.isPrefixOf` url | s <- ps] =
     Link attr is ("#" <> url, tit)
@@ -230,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
 fixBlockIRs _ b = b
 
 fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+  (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
 
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
 addHash _ ""    = ""
 addHash s ident = T.pack (takeFileName s) <> "#" <> ident
 
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
 
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
 isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
 
 -- Library
@@ -256,39 +259,44 @@ uncurry3 f (a, b, c) = f a b c
 
 -- Utility
 
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
 stripNamespace (QName v _ _) = v
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
 attrToNSPair _                              = Nothing
 
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
 attrToPair (Attr (QName name _ _) val) = (name, val)
 
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
 defaultNameSpace = Just "http://www.idpf.org/2007/opf"
 
-dfName :: String -> QName
+dfName :: Text -> QName
 dfName s = QName s defaultNameSpace Nothing
 
-emptyName :: String -> QName
+emptyName :: Text -> QName
 emptyName s = QName s Nothing Nothing
 
 -- Convert Maybe interface to Either
 
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
 findAttrE q e = mkE "findAttr" $ findAttr q e
 
 findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
-  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+  mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
 
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+  mkE ("Unable to find element: " <> tshow e) $ findElement e x
 
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,13 +25,13 @@ TODO:
 module Text.Pandoc.Readers.FB2 ( readFB2 ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
 import Data.ByteString.Base64.Lazy
 import Data.Functor
 import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -40,8 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -62,12 +63,15 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
       let authors = if null $ fb2Authors st
                     then id
                     else setMeta "author" (map text $ reverse $ fb2Authors st)
@@ -83,12 +87,12 @@ removeHash t = case T.uncons t of
   Just ('#', xs) -> xs
   _              -> t
 
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
 
 parseInline :: PandocMonad m => Content -> FB2 m Inlines
 parseInline (Elem e) =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -96,12 +100,12 @@ parseInline (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement name
       pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
 parseInline (CRef r) = pure $ str $ convertEntity r
 
 parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +115,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
 
 parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
 parseRootElement e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "FictionBook" -> do
       -- Parse notes before parsing the rest of the content.
       case filterChild isNotesBody e of
@@ -144,7 +148,7 @@ parseNote e =
     Just sectionId -> do
       content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
       oldNotes <- gets fb2Notes
-      modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+      modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
       pure ()
   where
     isTitle x = qName (elName x) == "title"
@@ -156,7 +160,7 @@ parseNote e =
 -- | Parse a child of @\<FictionBook>@ element.
 parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
 parseFictionBookChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "stylesheet" -> pure mempty -- stylesheet is ignored
     "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
     "body" -> if isNotesBody e
@@ -168,7 +172,7 @@ parseFictionBookChild e =
 -- | Parse a child of @\<description>@ element.
 parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
 parseDescriptionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
     "src-title-info" -> pure () -- ignore
     "document-info" -> pure ()
@@ -182,7 +186,7 @@ parseDescriptionChild e =
 -- | Parse a child of @\<body>@ element.
 parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
 parseBodyChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "image" -> parseImageElement e
     "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
     "epigraph" -> parseEpigraph e
@@ -196,7 +200,10 @@ parseBinaryElement e =
     (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
     (Just _, Nothing) ->
       report $ IgnoredElement "binary without content-type attribute"
-    (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+    (Just filename, contentType) ->
+      insertMedia (T.unpack filename) contentType
+                    (decodeLenient
+                      (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
 
 -- * Type parsers
 
@@ -206,13 +213,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
 
 parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
 parseAuthorChild e =
-  case T.pack $ qName $ elName e of
-    "first-name" -> pure $ Just $ T.pack $ strContent e
-    "middle-name" -> pure $ Just $ T.pack $ strContent e
-    "last-name" -> pure $ Just $ T.pack $ strContent e
-    "nickname" -> pure $ Just $ T.pack $ strContent e
-    "home-page" -> pure $ Just $ T.pack $ strContent e
-    "email" -> pure $ Just $ T.pack $ strContent e
+  case qName $ elName e of
+    "first-name" -> pure $ Just $ strContent e
+    "middle-name" -> pure $ Just $ strContent e
+    "last-name" -> pure $ Just $ strContent e
+    "nickname" -> pure $ Just $ strContent e
+    "home-page" -> pure $ Just $ strContent e
+    "email" -> pure $ Just $ strContent e
     name -> do
       report $ IgnoredElement $ name <> " in author"
       pure Nothing
@@ -236,13 +243,13 @@ parseTitleContent _ = pure Nothing
 parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
 parseImageElement e =
   case href of
-    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
     Nothing -> do
       report $ IgnoredElement " image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        title = maybe "" T.pack $ findAttr (unqual "title") e
-        imgId = maybe "" T.pack $ findAttr (unqual "id") e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        title = fromMaybe "" $ findAttr (unqual "title") e
+        imgId = fromMaybe "" $ findAttr (unqual "id") e
         href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
 
 -- | Parse @pType@
@@ -256,7 +263,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
 -- | Parse @citeType@ child
 parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
 parseCiteChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "empty-line" -> pure horizontalRule
@@ -271,13 +278,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
 
 parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
 parsePoemChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "epigraph" -> parseEpigraph e
     "stanza" -> parseStanza e
     "text-author" -> para <$> parsePType e
-    "date" -> pure $ para $ text $ T.pack $ strContent e
+    "date" -> pure $ para $ text $ strContent e
     name -> report (UnexpectedXmlElement name "poem") $> mempty
 
 parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +297,7 @@ joinLineBlocks [] = []
 
 parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
 parseStanzaChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +307,11 @@ parseStanzaChild e =
 parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraph e =
   divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
-  where divId = maybe "" T.pack $ findAttr (unqual "id") e
+  where divId = fromMaybe "" $ findAttr (unqual "id") e
 
 parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraphChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -318,7 +325,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
 
 parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
 parseAnnotationChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -332,14 +339,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
 parseSection e = do
   n <- gets fb2SectionLevel
   modify $ \st -> st{ fb2SectionLevel = n + 1 }
-  let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+  let sectionId = fromMaybe "" $ findAttr (unqual "id") e
   bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
   modify $ \st -> st{ fb2SectionLevel = n }
   pure bs
 
 parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
 parseSectionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseBodyChild e
     "epigraph" -> parseEpigraph e
     "image" -> parseImageElement e
@@ -361,16 +368,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
 parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
 parseNamedStyle e = do
   content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
-  let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+  let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
   case findAttr (unqual "name") e of
-    Just name -> pure $ spanWith ("", [T.pack name], lang) content
+    Just name -> pure $ spanWith ("", [name], lang) content
     Nothing -> do
       report $ IgnoredElement "link without required name"
       pure mempty
 
 parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
 parseNamedStyleChild (Elem e) =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -378,7 +385,7 @@ parseNamedStyleChild (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement $ name <> " in style"
@@ -390,7 +397,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
 parseLinkType e = do
   content <- mconcat <$> mapM parseStyleLinkType (elContent e)
   notes <- gets fb2Notes
-  case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+  case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
     Just href -> case findAttr (QName "type" Nothing Nothing) e of
                    Just "note" -> case M.lookup href notes of
                                     Nothing -> pure $ link href "" content
@@ -417,15 +424,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
 -- | Parse @title-infoType@
 parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
 parseTitleInfoChild e =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "genre" -> pure ()
     "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
-    "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+    "book-title" -> modify (setMeta "title" (text $ strContent e))
     "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
     "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
-                                                                      $ T.pack
                                                                       $ strContent e))
-    "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+    "date" -> modify (setMeta "date" (text $ strContent e))
     "coverpage" -> parseCoverPage e
     "lang" -> pure ()
     "src-lang" -> pure ()
@@ -439,7 +445,7 @@ parseCoverPage e =
     Just img -> case href of
                   Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
                   Nothing -> pure ()
-                where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+                where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
     Nothing -> pure ()
 
 -- | Parse @inlineImageType@ element
@@ -452,5 +458,5 @@ parseInlineImageElement e =
     Nothing -> do
       report $ IgnoredElement "inline image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -19,21 +19,20 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 , htmlInBalanced
                                 , isInlineTag
                                 , isBlockTag
-                                , NamedTag(..)
                                 , isTextTag
                                 , isCommentTag
                                 ) where
 
 import Control.Applicative ((<|>))
-import Control.Arrow (first)
 import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAlphaNum, isLetter)
 import Data.Default (Default (..), def)
 import Data.Foldable (for_)
 import Data.List.Split (splitWhen)
+import Data.List (foldl')
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Monoid (First (..))
@@ -62,21 +61,22 @@ import Text.Pandoc.Options (
     ReaderOptions (readerExtensions, readerStripComments),
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
+    htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
-  let tags = stripPrefixes . canonicalizeTags $
+  let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -95,6 +95,15 @@ readHtml opts inp = do
     Right doc -> return doc
     Left  err -> throwError $ PandocParseError $ T.pack $ getError err
 
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s)   = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
 replaceNotes bs = do
   st <- getState
@@ -112,14 +121,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
 setInPlain = local (\s -> s {inPlain = True})
 
 pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
   (TagOpen "html" attr) <- lookAhead pAny
-  for_ (lookup "lang" attr) $
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
 pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+  (TagOpen "body" attr) <- lookAhead pAny
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+    updateState . B.setMeta "lang" . B.text
+  pInTags "body" block
 
 pHead :: PandocMonad m => TagParser m Blocks
 pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
@@ -145,32 +158,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
           return mempty
 
 block :: PandocMonad m => TagParser m Blocks
-block = do
-  res <- choice
-            [ eSection
-            , eSwitch B.para block
-            , mempty <$ eFootnote
-            , mempty <$ eTOC
-            , mempty <$ eTitlePage
-            , pPara
-            , pHeader
-            , pBlockQuote
-            , pCodeBlock
-            , pList
-            , pHrule
-            , pTable block
-            , pHtml
-            , pHead
-            , pBody
-            , pLineBlock
-            , pDiv
-            , pPlain
-            , pFigure
-            , pIframe
-            , pRawHtmlBlock
-            ]
-  trace (T.take 60 $ tshow $ B.toList res)
-  return res
+block = ((do
+  tag <- lookAhead (pSatisfy isBlockTag)
+  exts <- getOption readerExtensions
+  case tag of
+    TagOpen name attr ->
+      let type' = fromMaybe "" $
+                     lookup "type" attr <|> lookup "epub:type" attr
+          epubExts = extensionEnabled Ext_epub_html_exts exts
+      in
+      case name of
+        _ | name `elem` sectioningContent
+          , epubExts
+          , "chapter" `T.isInfixOf` type'
+          -> eSection
+        _ | epubExts
+          , type' `elem` ["footnote", "rearnote"]
+          -> mempty <$ eFootnote
+        _ | epubExts
+          , type' == "toc"
+          -> mempty <$ eTOC
+        _ | "titlepage" `T.isInfixOf` type'
+          , name `elem` ("section" : groupingContent)
+          -> mempty <$ eTitlePage
+        "p" -> pPara
+        "h1" -> pHeader
+        "h2" -> pHeader
+        "h3" -> pHeader
+        "h4" -> pHeader
+        "h5" -> pHeader
+        "h6" -> pHeader
+        "blockquote" -> pBlockQuote
+        "pre" -> pCodeBlock
+        "ul" -> pBulletList
+        "ol" -> pOrderedList
+        "dl" -> pDefinitionList
+        "table" -> pTable block
+        "hr" -> pHrule
+        "html" -> pHtml
+        "head" -> pHead
+        "body" -> pBody
+        "div"
+          | extensionEnabled Ext_line_blocks exts
+          , Just "line-block" <- lookup "class" attr
+          -> pLineBlock
+          | otherwise
+          -> pDiv
+        "section" -> pDiv
+        "header" -> pDiv
+        "main" -> pDiv
+        "figure" -> pFigure
+        "iframe" -> pIframe
+        "style" -> pRawHtmlBlock
+        "textarea" -> pRawHtmlBlock
+        "switch"
+          | epubExts
+          -> eSwitch B.para block
+        _ -> mzero
+    _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+        res <$ trace (T.take 60 $ tshow $ B.toList res)
 
 namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
 namespaces = [(mathMLNamespace, pMath True)]
@@ -243,9 +289,6 @@ eTOC = try $ do
   guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
   void (pInTags tag block)
 
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
 pBulletList :: PandocMonad m => TagParser m Blocks
 pBulletList = try $ do
   pSatisfy (matchTagOpen "ul" [])
@@ -319,7 +362,10 @@ pDefListItem = try $ do
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+  let term = foldl' (\x y -> if null x
+                                then trimInlines y
+                                else x <> B.linebreak <> trimInlines y)
+                    mempty terms
   return (term, map (fixPlains True) defs)
 
 fixPlains :: Bool -> Blocks -> Blocks
@@ -356,13 +402,16 @@ pLineBlock = try $ do
             B.toList ils
   return $ B.lineBlock lns
 
+isDivLike :: Text -> Bool
+isDivLike "div"     = True
+isDivLike "section" = True
+isDivLike "header"  = True
+isDivLike "main"    = True
+isDivLike _         = False
+
 pDiv :: PandocMonad m => TagParser m Blocks
 pDiv = try $ do
   guardEnabled Ext_native_divs
-  let isDivLike "div"     = True
-      isDivLike "section" = True
-      isDivLike "main"    = True
-      isDivLike _         = False
   TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
   let (ident, classes, kvs) = toAttr attr'
   contents <- pInTags tag block
@@ -380,11 +429,17 @@ pIframe = try $ do
   tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
   pCloses "iframe" <|> eof
   url <- canonicalizeUrl $ fromAttrib "src" tag
-  (bs, _) <- openURL url
-  let inp = UTF8.toText bs
-  opts <- readerOpts <$> getState
-  Pandoc _ contents <- readHtml opts inp
-  return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+  if T.null url
+     then ignore $ renderTags' [tag, TagClose "iframe"]
+     else catchError
+       (do (bs, _) <- openURL url
+           let inp = UTF8.toText bs
+           opts <- readerOpts <$> getState
+           Pandoc _ contents <- readHtml opts inp
+           return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+       (\e -> do
+         logMessage $ CouldNotFetchResource url (renderError e)
+         ignore $ renderTags' [tag, TagClose "iframe"])
 
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
@@ -446,17 +501,13 @@ pHeader = try $ do
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let attr = toStringAttr attr'
-  let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
-                                               [("class","title")]
   level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] T.words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   attr'' <- registerHeader (ident, classes, keyvals) contents
-  return $ if bodyTitle
-              then mempty  -- skip a representation of the title in the body
-              else B.headerWith attr'' level contents
+  return $ B.headerWith attr'' level contents
 
 pHrule :: PandocMonad m => TagParser m Blocks
 pHrule = do
@@ -506,7 +557,18 @@ pFigure = try $ do
 pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
-  let attr = toAttr attr'
+  -- if the `pre` has no attributes, try if it is followed by a `code`
+  -- element and use those attributes if possible.
+  attr <- case attr' of
+    _:_ -> pure (toAttr attr')
+    []  -> option nullAttr $ do
+      TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+      pure $ toAttr
+        [ (k, v') | (k, v) <- codeAttr
+                    -- strip language from class
+                  , let v' = if k == "class"
+                             then fromMaybe v (T.stripPrefix "language-" v)
+                             else v ]
   contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
@@ -525,31 +587,47 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = choice
-           [ eNoteref
-           , eSwitch id inline
-           , pTagText
-           , pQ
-           , pEmph
-           , pStrong
-           , pSuperscript
-           , pSubscript
-           , pSpanLike
-           , pSmall
-           , pStrikeout
-           , pUnderline
-           , pLineBreak
-           , pLink
-           , pImage
-           , pSvg
-           , pBdo
-           , pCode
-           , pCodeWithClass [("samp","sample"),("var","variable")]
-           , pSpan
-           , pMath False
-           , pScriptMath
-           , pRawHtmlInline
-           ]
+inline = pTagText <|> do
+  tag <- lookAhead (pSatisfy isInlineTag)
+  exts <- getOption readerExtensions
+  case tag of
+    TagOpen name attr ->
+      case name of
+        "a" | extensionEnabled Ext_epub_html_exts exts
+          , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+          , Just ('#',_) <- lookup "href" attr >>= T.uncons
+            -> eNoteref
+            | otherwise -> pLink
+        "switch" -> eSwitch id inline
+        "q" -> pQ
+        "em" -> pEmph
+        "i"  -> pEmph
+        "strong" -> pStrong
+        "b" -> pStrong
+        "sup" -> pSuperscript
+        "sub" -> pSubscript
+        "small" -> pSmall
+        "s" -> pStrikeout
+        "strike" -> pStrikeout
+        "del" -> pStrikeout
+        "u" -> pUnderline
+        "ins" -> pUnderline
+        "br" -> pLineBreak
+        "img" -> pImage
+        "svg" -> pSvg
+        "bdo" -> pBdo
+        "code" -> pCode
+        "samp" -> pCodeWithClass "samp" "sample"
+        "var" -> pCodeWithClass "var" "variable"
+        "span" -> pSpan
+        "math" -> pMath False
+        "script"
+          | Just x <- lookup "type" attr
+          , "math/tex" `T.isPrefixOf` x -> pScriptMath
+        _ | name `elem` htmlSpanLikeElements -> pSpanLike
+        _ -> pRawHtmlInline
+    TagText _ -> pTagText
+    _ -> pRawHtmlInline
 
 pSelfClosing :: PandocMonad m
              => (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -560,27 +638,25 @@ pSelfClosing f g = do
   return open
 
 pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
-  where citedQuote = do
-          tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
-          url <- canonicalizeUrl $ fromAttrib "cite" tag
-          let uid = fromMaybe (fromAttrib "name" tag) $
-                       maybeFromAttrib "id" tag
-          let cls = T.words $ fromAttrib "class" tag
-
-          makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
-        normalQuote = do
-          pSatisfy $ tagOpenLit "q" (const True)
-          makeQuote id
-        makeQuote wrapper = do
-          ctx <- asks quoteContext
-          let (constructor, innerContext) = case ctx of
-                        InDoubleQuote -> (B.singleQuoted, InSingleQuote)
-                        _             -> (B.doubleQuoted, InDoubleQuote)
-
-          content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
-          return $ extractSpaces (constructor . wrapper) content
+pQ = do
+  TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+  case lookup "cite" attrs of
+    Just url -> do
+      let uid = fromMaybe mempty $
+                   lookup "name" attrs <> lookup "id" attrs
+      let cls = maybe [] T.words $ lookup "class" attrs
+      url' <- canonicalizeUrl url
+      makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+    Nothing -> makeQuote id
+ where
+  makeQuote wrapper = do
+    ctx <- asks quoteContext
+    let (constructor, innerContext) = case ctx of
+                  InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+                  _             -> (B.doubleQuoted, InDoubleQuote)
+    content <- withQuoteContext innerContext
+                  (mconcat <$> manyTill inline (pCloses "q"))
+    return $ extractSpaces (constructor . wrapper) content
 
 pEmph :: PandocMonad m => TagParser m Inlines
 pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -646,17 +722,12 @@ pLink = try $ do
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
-  tag <- pSelfClosing (=="img") (isJust . lookup "src")
+  tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
   url <- canonicalizeUrl $ fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  let uid = fromAttrib "id" tag
-  let cls = T.words $ fromAttrib "class" tag
-  let getAtt k = case fromAttrib k tag of
-                   "" -> []
-                   v  -> [(k, v)]
-  let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
-  return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+  let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+  return $ B.imageWith attr (escapeURI url) title (B.text alt)
 
 pSvg :: PandocMonad m => TagParser m Inlines
 pSvg = do
@@ -671,13 +742,12 @@ pSvg = do
                    UTF8.toText (encode $ UTF8.fromText rawText)
   return $ B.imageWith (ident,cls,[]) svgData mempty mempty
 
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
-  let tagTest = flip elem . fmap fst $ elemToClass
-  TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+  TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
   result <- manyTill pAny (pCloses open)
   let (ids,cs,kvs) = toAttr attr'
-      cs'          = maybe cs (:cs) . lookup open $ elemToClass
+      cs'          = class' : cs
   return . B.codeWith (ids,cs',kvs) .
     T.unwords . T.lines . innerText $ result
 
@@ -764,17 +834,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -868,27 +940,23 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
                then return B.softbreak
                else return B.space
 
-class NamedTag a where
-  getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
-  getTagName (TagOpen t _) = Just t
-  getTagName (TagClose t)  = Just t
-  getTagName _             = Nothing
-
-instance NamedTag (Tag String) where
-  getTagName (TagOpen t _) = Just (T.pack t)
-  getTagName (TagClose t)  = Just (T.pack t)
-  getTagName _             = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t =
-  isCommentTag t || case getTagName t of
-                           Nothing  -> False
-                           Just x   -> x `Set.notMember` blockTags ||
-                                       T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t)  = Just t
+getTagName _             = Nothing
+
+isInlineTag :: Tag Text -> Bool
+isInlineTag t = isCommentTag t || case t of
+  TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+  TagClose "script"  -> True
+  TagOpen name _     -> isInlineTagName name
+  TagClose name      -> isInlineTagName name
+  _                  -> False
+ where isInlineTagName x =
+         x `Set.notMember` blockTags ||
+         T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
                  where isBlockTagName =
                          case getTagName t of
@@ -899,10 +967,10 @@ isBlockTag t = isBlockTagName || isTagComment t
                                     || x `Set.member` eitherBlockOrInline
                               Nothing -> False
 
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
 isTextTag = tagText (const True)
 
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
 isCommentTag = tagComment (const True)
 
 --- parsers for use in markdown, textile readers
@@ -910,13 +978,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -951,22 +1020,24 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
         go n (t:ts') = (t :) <$> go n ts'
         go _ [] = mzero
 
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
 hasTagWarning (TagWarning _:_) = True
 hasTagWarning _                = False
 
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
@@ -1024,21 +1095,6 @@ htmlTag f = try $ do
          handleTag tagname
        _ -> mzero
 
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
-    TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
-  if T.null t then s else T.drop 1 t
-  where (_, t) = T.span (/= ':') s
-
 -- Utilities
 
 -- | Adjusts a url according to the document's base URL.
@@ -1048,26 +1104,3 @@ canonicalizeUrl url = do
   return $ case (parseURIReference (T.unpack url), mbBaseHref) of
                 (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
                 _                   -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
-               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
-                                                       "meta", "link"]
-                            , optRawTag   = matchTags ["script", "style"] }
-              where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types =  -- Document divisions
-   map (\s -> (s, (["section", "body"], 0)))
-    ["volume", "part", "chapter", "division"]
-  <> -- Document section and components
-  [
-    ("abstract",  ([], 0))]
--}
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 2d58319da..bd8d7c96c 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -161,10 +161,12 @@ _ `closes` "html" = False
 "li" `closes` "li" = True
 "th" `closes` t | t `elem` ["th","td"] = True
 "td" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True
 "dd" `closes` t | t `elem` ["dt", "dd"] = True
 "dt" `closes` t | t `elem` ["dt","dd"] = True
 "rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"col" `closes` "col" = True
+"colgroup" `closes` "col" = True
 "optgroup" `closes` "optgroup" = True
 "optgroup" `closes` "option" = True
 "option" `closes` "option" = True
@@ -193,14 +195,20 @@ t1 `closes` t2 |
 _ `closes` _ = False
 
 toStringAttr :: [(Text, Text)] -> [(Text, Text)]
-toStringAttr = map go
+toStringAttr = foldr go []
   where
-   go (x,y) =
-     case T.stripPrefix "data-" x of
-       Just x' | x' `Set.notMember` (html5Attributes <>
-                                     html4Attributes <> rdfaAttributes)
-         -> (x',y)
-       _ -> (x,y)
+   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
+   -- treat xml:lang as lang
+   go ("xml:lang",y) ats = go ("lang",y) ats
+   -- prevent duplicate attributes
+   go (x,y) ats
+     | any (\(x',_) -> x == x') ats = ats
+     | otherwise      =
+        case T.stripPrefix "data-" x of
+          Just x' | x' `Set.notMember` (html5Attributes <>
+                                        html4Attributes <> rdfaAttributes)
+            -> go (x',y) ats
+          _ -> (x,y):ats
 
 -- Unlike fromAttrib from tagsoup, this distinguishes
 -- between a missing attribute and an attribute with empty content.
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 5a783988f..6e62e12f5 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
-   Copyright   : © 2006-2020 John MacFarlane,
-                   2020 Albert Krewinkel
+   Copyright   : © 2006-2021 John MacFarlane,
+                   2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
@@ -17,6 +17,8 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks)
@@ -32,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used:  Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
 pCol = try $ do
   TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
   let attribs = toStringAttr attribs'
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
+  return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
+                    maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
+                      $ safeRead (T.filter
+                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> Right ColWidthDefault
+                Just (T.unsnoc -> Just (xs, '*')) ->
+                  maybe (Left 1) Left $ safeRead xs
                 Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+                  maybe (Right ColWidthDefault)
+                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
+                _ -> Right ColWidthDefault
 
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
 pColgroup = try $ do
   pSatisfy (matchTagOpen "colgroup" [])
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+  let remaining = 1 - sum (map getColWidth $ rights ws)
+      relatives = sum $ lefts ws
+      relUnit = remaining / fromIntegral relatives
+      toColWidth (Right x) = x
+      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+  in  map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
 data CellType
   = HeaderCell
   | BodyCell
@@ -181,7 +200,8 @@ pTable :: PandocMonad m
 pTable block = try $ do
   TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
   caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
-  widths  <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+  widths <- resolveRelativeLengths <$>
+              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
   thead   <- pTableHead block               <* skipMany pBlank
   topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
   tbodies <- many (pTableBody block)        <* skipMany pBlank
@@ -214,8 +234,9 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
           -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
 normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
-  let rowLength = length . rowCells
-  let ncols = maximum (map rowLength rows)
+  let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
+  let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
+  let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
   if null rows
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.TagCategories
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Types
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 25d69f040..67b3af2d3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -17,8 +17,9 @@ module Text.Pandoc.Readers.Haddock
 
 import Control.Monad.Except (throwError)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -28,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
@@ -92,7 +95,7 @@ docHToBlocks d' =
                   then ([], map toCells bodyRows)
                   else (toCells (head headerRows),
                         map toCells (tail headerRows ++ bodyRows))
-             colspecs = replicate (maximum (map length body))
+             colspecs = replicate (maybe 0 maximum (nonEmpty (map length body)))
                              (AlignDefault, ColWidthDefault)
          in  B.table B.emptyCaption
                      colspecs
@@ -128,7 +131,8 @@ docHToInlines isCode d' =
           DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
           _               -> mempty
     DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
-    DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
+    DocModule s -> B.codeWith ("",["haskell","module"],[]) $
+                   T.pack (modLinkName s)
     DocWarning _ -> mempty -- TODO
     DocEmphasis d -> B.emph (docHToInlines isCode d)
     DocMonospaced (DocString s) -> B.code $ T.pack s
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,7 +14,9 @@ Conversion of JATS XML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
-import Data.Char (isDigit, isSpace, toUpper)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
+import Data.Char (isDigit, isSpace)
 import Data.Default
 import Data.Generics
 import Data.List (foldl', intersperse)
@@ -22,15 +24,17 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
+import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -49,42 +53,28 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  let sources = toSources inp
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr =
   fromMaybe "" . maybeAttrValue attr
 
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
 maybeAttrValue attr elt =
-  T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+  lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
 
 -- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
 named s e = qName (elName e) == s
 
 --
@@ -150,10 +140,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> JATS m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "p" -> parseMixed para (elContent e)
@@ -202,7 +192,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ textContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -266,7 +256,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = do
-                            w <- findAttrText (unqual "colwidth") c
+                            w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = foldl' max 0 $ map length bodyrows
@@ -437,16 +427,10 @@ parseRef e = do
        Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
        -- TODO handle mixed-citation
 
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
 textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -455,9 +439,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
-  return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack)
+                                $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
@@ -502,9 +486,9 @@ parseInline (Elem e) =
                         else linkWith attr ("#" <> rid) "" ils
         "ext-link" -> do
              ils <- innerInlines id
-             let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+             let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "rid" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, [], [])
@@ -512,7 +496,8 @@ parseInline (Elem e) =
 
         "disp-formula" -> formula displayMath
         "inline-formula" -> formula math
-        "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+        "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
+                   -> return . math $ mathML e
         "tex-math" -> return . math $ textContent e
 
         "email" -> return $ link ("mailto:" <> textContent e) ""
@@ -524,7 +509,7 @@ parseInline (Elem e) =
    where innerInlines f = extractSpaces f . mconcat <$>
                           mapM parseInline (elContent e)
          mathML x =
-            case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+            case readMathML . showElement $ everywhere (mkT removePrefix) x of
                 Left _ -> mempty
                 Right m -> writeTeX m
          formula constructor = do
@@ -535,11 +520,12 @@ parseInline (Elem e) =
                             filterChildren isMathML whereToLook
             return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
 
-         isMathML x = qName   (elName x) == "math" &&
-                      qPrefix (elName x) == Just "mml"
+         isMathML x = qName (elName x) == "math" &&
+                      qURI  (elName x) ==
+                                      Just "http://www.w3.org/1998/Math/MathML"
          removePrefix elname = elname { qPrefix = Nothing }
          codeWithLang = do
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..cf111f173 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
@@ -71,10 +73,10 @@ toPandocCodeBlocks langMay params txt =
                   Nothing                   -> []
   in codeBlockWith ("", classes, map paramToPair params) txt
 
--- | Create a pandoc @'Div'@
+-- | Create a pandoc @'Div'@ from a panel.
 toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks
 toPandocDiv params =
-  divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks
+  divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks
 
 paramToPair :: Jira.Parameter -> (Text, Text)
 paramToPair (Jira.Parameter key value) = (key, value)
@@ -170,6 +172,8 @@ jiraLinkToPandoc linkType alias url =
     Jira.Email      -> link ("mailto:" <> url') "" alias'
     Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias'
     Jira.User       -> linkWith ("", ["user-account"], []) url' "" alias'
+    Jira.SmartCard  -> linkWith ("", ["smart-card"], []) url' "" alias'
+    Jira.SmartLink  -> linkWith ("", ["smart-link"], []) url' "" alias'
 
 -- | Get unicode representation of a Jira icon.
 iconUnicode :: Jira.Icon -> Text
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index cdccaa535..27c018e73 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,14 +1,10 @@
-{-# LANGUAGE BangPatterns          #-}
-{-# LANGUAGE CPP                   #-}
-{-# LANGUAGE FlexibleInstances     #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -22,50 +18,58 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    applyMacros,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
-                                   inlineCommand,
-                                   tokenize,
-                                   untokenize
+                                   inlineCommand
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toUpper, chr)
+import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr)
 import Data.Default
-import Data.Functor (($>))
 import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
 import Data.Text (Text)
 import qualified Data.Text as T
+import Skylighting (defaultSyntaxMap)
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Collate.Lang (renderLang)
+import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report, setResourcePath,
-                                      setTranslations, translateTerm)
+                                      readFileFromDirs, report,
+                                      setResourcePath)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
-                                       babelLangToBCP47)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+                                       inlineEnvironment,
+                                       mathDisplay, mathInline,
+                                       newtheorem, theoremstyle, proof,
+                                       theoremEnvironment)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+                                       enquoteCommands,
+                                       babelLangToBCP47, setDefaultLanguage)
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+                                         nameCommands, charCommands,
+                                         accentCommands,
+                                         biblatexInlineCommands,
+                                         verbCommands, rawInlineOr,
+                                         listingsLanguage)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
-import Safe
+import Data.List.NonEmpty (nonEmpty)
 
 -- for debugging:
 -- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -73,16 +77,17 @@ import Safe
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -93,11 +98,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-#if MIN_VERSION_safe(0,3,18)
-  let bottomLevel = minimumBound 1 $ query headerLevel doc'
-#else
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
-#endif
+  let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
@@ -132,11 +133,10 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,14 +163,13 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   raw <- snd <$>
           (   rawLaTeXParser toks True
-              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
               inlines
           <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
               inlines
@@ -178,11 +177,10 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenize "source" inp
+  toks <- getInputTokens
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -191,12 +189,6 @@ inlineCommand = do
 word :: PandocMonad m => LP m Inlines
 word = str . untoken <$> satisfyTok isWordTok
 
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
-  where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
-        isRegularSymbol _                = False
-        isSpecial c = c `Set.member` specialChars
-
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
   ils <- grouped inline
@@ -237,19 +229,6 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
-
-lit :: Text -> LP m Inlines
-lit = pure . str
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,23 +275,14 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
-  skipopts
-  let lang = mblang >>= babelLangToBCP47
-  let langspan = case lang of
-                      Nothing -> id
-                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
-  quoteContext <- sQuoteContext <$> getState
-  if starred || quoteContext == InDoubleQuote
-     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+lit :: Text -> LP m Inlines
+lit = pure . str
 
 blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
                 then (\xs -> para (cite xs mempty))
-                       <$> cites NormalCitation False
+                       <$> cites inline NormalCitation False
                 else option mempty $ para <$> bracketed inline
   let lang = mblang >>= babelLangToBCP47
   let langdiv = case lang of
@@ -323,224 +293,13 @@ blockquote cvariant mblang = do
   optional $ symbolIn (".:;?!" :: [Char])  -- currently ignored
   return $ blockQuote . langdiv $ (bs <> citepar)
 
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
-  acro <- braced
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "singular+" <> form)])
-    $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
-  acro <- braced
-  plural <- lit "s"
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "plural+" <> form)]) $
-   mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  withVerbatimMode $
-    code . untokenize <$>
-      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
-  t@(Tok pos toktype txt) <- anyTok
-  case T.findIndex (== stopchar) txt of
-       Nothing -> return t
-       Just i  -> do
-         let (t1, t2) = T.splitAt i txt
-         inp <- getInput
-         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
-                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
-         return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
-  case lookup "language" opts of
-    Nothing  -> Nothing
-    Just l   -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
-  options <- option [] keyvals
-  let classes = maybeToList $ listingsLanguage options
-  doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
-  skipopts
-  cls <- untokenize <$> braced
-  doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  let stopchar = if marker == '{' then '}' else marker
-  withVerbatimMode $
-    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
-      manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x    = x
-
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
-  symbol '$'
-  display <- option False (True <$ symbol '$')
-  (do contents <- try $ untokenize <$> pDollarsMath 0
-      if display
-         then mathDisplay contents <$ symbol '$'
-         else return $ mathInline contents)
-   <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
-  tk@(Tok _ toktype t) <- anyTok
-  case toktype of
-       Symbol | t == "$"
-              , n == 0 -> return []
-              | t == "\\" -> do
-                  tk' <- anyTok
-                  (tk :) . (tk' :) <$> pDollarsMath n
-              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
-              | t == "}" ->
-                if n > 0
-                then (tk :) <$> pDollarsMath (n-1)
-                else mzero
-       _ -> (tk :) <$> pDollarsMath n
-
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _      = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
-  let k = last ks
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ toList <$> opt
-  second <- optionMaybe $ toList <$> opt
-  keys <- try $ bgroup *> manyTill citationLabel egroup
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> (mempty, s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> (mempty, mempty)
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel  = do
-  sp
-  untokenize <$>
-    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
-          <* sp
-          <* optional (symbol ',')
-          <* sp)
-  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
-  cits <- if multi
-             then do
-               multiprenote <- optionMaybe $ toList <$> paropt
-               multipostnote <- optionMaybe $ toList <$> paropt
-               let (pre, suf) = case (multiprenote, multipostnote) of
-                     (Just s , Nothing) -> (mempty, s)
-                     (Nothing , Just t) -> (mempty, t)
-                     (Just s , Just t ) -> (s, t)
-                     _                  -> (mempty, mempty)
-               tempCits <- many1 simpleCiteArgs
-               case tempCits of
-                 (k:ks) -> case ks of
-                             (_:_) -> return $ (addMprenote pre k : init ks) ++
-                                                 [addMpostnote suf (last ks)]
-                             _ -> return [addMprenote pre (addMpostnote suf k)]
-                 _ -> return [[]]
-             else count 1 simpleCiteArgs
-  let cs = concat cits
-  return $ case mode of
-        AuthorInText -> case cs of
-                             (c:rest) -> c {citationMode = mode} : rest
-                             []       -> []
-        _            -> map (\a -> a {citationMode = mode}) cs
-  where mprenote (k:ks) = (k:ks) ++ [Space]
-        mprenote _ = mempty
-        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
-        mpostnote _ = mempty
-        addMprenote mpn (k:ks) =
-          let mpnfinal = case citationPrefix k of
-                           (_:_) -> mprenote mpn
-                           _ -> mpn
-          in addPrefix mpnfinal (k:ks)
-        addMprenote _ _ = []
-        addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
-  (c,raw) <- withRaw $ cites mode multi
-  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
-  let isCite Cite{} = True
-      isCite _      = False
-      (pref, rest) = break isCite (toList ils)
-  in case rest of
-          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
-          _                -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
-  (cs, raw) <-
-    withRaw $ concat <$> do
-      bgroup
-      items <- mconcat <$>
-                many1 (notFollowedBy (symbol ';') >> inline)
-                  `sepBy1` symbol ';'
-      egroup
-      return $ map handleCitationPart items
-  case cs of
-       []       -> mzero
-       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
-                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
-  note $ para $ ils <> str "."
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
   guard $ name /= "begin" && name /= "end" && name /= "and"
-  star <- option "" ("*" <$ symbol '*' <* sp)
+  star <- if T.all isAlphaNum name
+             then option "" ("*" <$ symbol '*' <* sp)
+             else pure ""
   overlay <- option "" overlaySpecification
   let name' = name <> star <> overlay
   let names = ordNub [name', name] -- check non-starred as fallback
@@ -551,28 +310,8 @@ inlineCommand' = try $ do
          <|> ignore rawcommand
   lookupListDefault raw names inlineCommands
 
-
 tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
-  where singleChar' = do
-          Tok _ _ t <- singleChar
-          return $ str t
-
-opt :: PandocMonad m => LP m Inlines
-opt = do
-  toks <- try (sp *> bracketedToks <* sp)
-  -- now parse the toks as inlines
-  st <- getState
-  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
-  case parsed of
-    Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
+tok = tokWith inline
 
 unescapeURL :: Text -> Text
 unescapeURL = T.concat . go . T.splitOn "\\"
@@ -585,381 +324,109 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       , isEscapable c = t
       | otherwise = "\\" <> t
 
-mathEnvWith :: PandocMonad m
-            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
-   where inner x = case innerEnv of
-                        Nothing -> x
-                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
-                                   "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
-  skipopts
-  optional blankline
-  res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
-  controlSeq "begin"
-  name <- untokenize <$> braced
-  M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
-    ("displaymath", mathEnvWith id Nothing "displaymath")
-  , ("math", math <$> mathEnv "math")
-  , ("equation", mathEnvWith id Nothing "equation")
-  , ("equation*", mathEnvWith id Nothing "equation*")
-  , ("gather", mathEnvWith id (Just "gathered") "gather")
-  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
-  , ("multline", mathEnvWith id (Just "gathered") "multline")
-  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
-  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
-  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
-  , ("align", mathEnvWith id (Just "aligned") "align")
-  , ("align*", mathEnvWith id (Just "aligned") "align*")
-  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
-  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
-  , ("dmath", mathEnvWith id Nothing "dmath")
-  , ("dmath*", mathEnvWith id Nothing "dmath*")
-  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
-  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
-  , ("darray", mathEnvWith id (Just "aligned") "darray")
-  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
-  ]
-
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
-  [ ("emph", extractSpaces emph <$> tok)
-  , ("textit", extractSpaces emph <$> tok)
-  , ("textsl", extractSpaces emph <$> tok)
-  , ("textsc", extractSpaces smallcaps <$> tok)
-  , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
-  , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
-  , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
-  , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-  , ("texttt", ttfamily)
-  , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-  , ("lq", return (str "‘"))
-  , ("rq", return (str "’"))
-  , ("textquoteleft", return (str "‘"))
-  , ("textquoteright", return (str "’"))
-  , ("textquotedblleft", return (str "“"))
-  , ("textquotedblright", return (str "”"))
-  , ("textsuperscript", extractSpaces superscript <$> tok)
-  , ("textsubscript", extractSpaces subscript <$> tok)
-  , ("textbackslash", lit "\\")
-  , ("backslash", lit "\\")
-  , ("slash", lit "/")
-  , ("textbf", extractSpaces strong <$> tok)
-  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
-  , ("underline", underline <$> tok)
-  , ("ldots", lit "…")
-  , ("vdots", lit "\8942")
-  , ("dots", lit "…")
-  , ("mdots", lit "…")
-  , ("sim", lit "~")
-  , ("sep", lit ",")
-  , ("label", rawInlineOr "label" dolabel)
-  , ("ref", rawInlineOr "ref" $ doref "ref")
-  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
-  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
-  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
-  , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
-  , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
-  , ("lettrine", rawInlineOr "lettrine" lettrine)
-  , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
-  , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
-  , ("ensuremath", mathInline . untokenize <$> braced)
-  , ("texorpdfstring", const <$> tok <*> tok)
-  , ("P", lit "¶")
-  , ("S", lit "§")
-  , ("$", lit "$")
-  , ("%", lit "%")
-  , ("&", lit "&")
-  , ("#", lit "#")
-  , ("_", lit "_")
-  , ("{", lit "{")
-  , ("}", lit "}")
-  , ("qed", lit "\a0\x25FB")
-  -- old TeX commands
-  , ("em", extractSpaces emph <$> inlines)
-  , ("it", extractSpaces emph <$> inlines)
-  , ("sl", extractSpaces emph <$> inlines)
-  , ("bf", extractSpaces strong <$> inlines)
-  , ("tt", code . stringify . toList <$> inlines)
-  , ("rm", inlines)
-  , ("itshape", extractSpaces emph <$> inlines)
-  , ("slshape", extractSpaces emph <$> inlines)
-  , ("scshape", extractSpaces smallcaps <$> inlines)
-  , ("bfseries", extractSpaces strong <$> inlines)
-  , ("MakeUppercase", makeUppercase <$> tok)
-  , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
-  , ("uppercase", makeUppercase <$> tok)
-  , ("MakeLowercase", makeLowercase <$> tok)
-  , ("MakeTextLowercase", makeLowercase <$> tok)
-  , ("lowercase", makeLowercase <$> tok)
-  , ("/", pure mempty) -- italic correction
-  , ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
-  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
-                            guard $ not inTableCell
-                            optional opt
-                            spaces))
-  , (",", lit "\8198")
-  , ("@", pure mempty)
-  , (" ", lit "\160")
-  , ("ps", pure $ str "PS." <> space)
-  , ("TeX", lit "TeX")
-  , ("LaTeX", lit "LaTeX")
-  , ("bar", lit "|")
-  , ("textless", lit "<")
-  , ("textgreater", lit ">")
-  , ("thanks", skipopts >> note <$> grouped block)
-  , ("footnote", skipopts >> note <$> grouped block)
-  , ("passthrough", tok) -- \passthrough macro used by latex writer
-                         -- for listings
-  , ("verb", doverb)
-  , ("lstinline", dolstinline)
-  , ("mintinline", domintinline)
-  , ("Verb", doverb)
-  , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                  bracedUrl)
-  , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
-  , ("href", do url <- bracedUrl
-                sp
-                link (unescapeURL $ untokenize url) "" <$> tok)
-  , ("includegraphics", do options <- option [] keyvals
-                           src <- braced
-                           mkImage options . unescapeURL . removeDoubleQuotes $
-                               untokenize src)
-  , ("enquote*", enquote True Nothing)
-  , ("enquote", enquote False Nothing)
-  -- foreignquote is supposed to use native quote marks
-  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-  , ("foreignquote", braced >>= enquote False . Just . untokenize)
-  -- hypehnquote uses regular quotes
-  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
-  , ("figurename", doTerm Translations.Figure)
-  , ("prefacename", doTerm Translations.Preface)
-  , ("refname", doTerm Translations.References)
-  , ("bibname", doTerm Translations.Bibliography)
-  , ("chaptername", doTerm Translations.Chapter)
-  , ("partname", doTerm Translations.Part)
-  , ("contentsname", doTerm Translations.Contents)
-  , ("listfigurename", doTerm Translations.ListOfFigures)
-  , ("listtablename", doTerm Translations.ListOfTables)
-  , ("indexname", doTerm Translations.Index)
-  , ("abstractname", doTerm Translations.Abstract)
-  , ("tablename", doTerm Translations.Table)
-  , ("enclname", doTerm Translations.Encl)
-  , ("ccname", doTerm Translations.Cc)
-  , ("headtoname", doTerm Translations.To)
-  , ("pagename", doTerm Translations.Page)
-  , ("seename", doTerm Translations.See)
-  , ("seealsoname", doTerm Translations.SeeAlso)
-  , ("proofname", doTerm Translations.Proof)
-  , ("glossaryname", doTerm Translations.Glossary)
-  , ("lstlistingname", doTerm Translations.Listing)
-  , ("cite", citation "cite" NormalCitation False)
-  , ("Cite", citation "Cite" NormalCitation False)
-  , ("citep", citation "citep" NormalCitation False)
-  , ("citep*", citation "citep*" NormalCitation False)
-  , ("citeal", citation "citeal" NormalCitation False)
-  , ("citealp", citation "citealp" NormalCitation False)
-  , ("citealp*", citation "citealp*" NormalCitation False)
-  , ("autocite", citation "autocite" NormalCitation False)
-  , ("smartcite", citation "smartcite" NormalCitation False)
-  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
-  , ("parencite", citation "parencite" NormalCitation False)
-  , ("supercite", citation "supercite" NormalCitation False)
-  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
-  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
-  , ("citeyear", citation "citeyear" SuppressAuthor False)
-  , ("autocite*", citation "autocite*" SuppressAuthor False)
-  , ("cite*", citation "cite*" SuppressAuthor False)
-  , ("parencite*", citation "parencite*" SuppressAuthor False)
-  , ("textcite", citation "textcite" AuthorInText False)
-  , ("citet", citation "citet" AuthorInText False)
-  , ("citet*", citation "citet*" AuthorInText False)
-  , ("citealt", citation "citealt" AuthorInText False)
-  , ("citealt*", citation "citealt*" AuthorInText False)
-  , ("textcites", citation "textcites" AuthorInText True)
-  , ("cites", citation "cites" NormalCitation True)
-  , ("autocites", citation "autocites" NormalCitation True)
-  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
-  , ("parencites", citation "parencites" NormalCitation True)
-  , ("supercites", citation "supercites" NormalCitation True)
-  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
-  , ("Autocite", citation "Autocite" NormalCitation False)
-  , ("Smartcite", citation "Smartcite" NormalCitation False)
-  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
-  , ("Parencite", citation "Parencite" NormalCitation False)
-  , ("Supercite", citation "Supercite" NormalCitation False)
-  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
-  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
-  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
-  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
-  , ("Cite*", citation "Cite*" SuppressAuthor False)
-  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
-  , ("Textcite", citation "Textcite" AuthorInText False)
-  , ("Textcites", citation "Textcites" AuthorInText True)
-  , ("Cites", citation "Cites" NormalCitation True)
-  , ("Autocites", citation "Autocites" NormalCitation True)
-  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
-  , ("Parencites", citation "Parencites" NormalCitation True)
-  , ("Supercites", citation "Supercites" NormalCitation True)
-  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
-  , ("citetext", complexNatbibCitation NormalCitation)
-  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
-                        complexNatbibCitation AuthorInText)
-                   <|> citation "citeauthor" AuthorInText False)
-  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
-                          addMeta "nocite"))
-  , ("hyperlink", hyperlink)
-  , ("hypertarget", hypertargetInline)
-  -- glossaries package
-  , ("gls", doAcronym "short")
-  , ("Gls", doAcronym "short")
-  , ("glsdesc", doAcronym "long")
-  , ("Glsdesc", doAcronym "long")
-  , ("GLSdesc", doAcronym "long")
-  , ("acrlong", doAcronym "long")
-  , ("Acrlong", doAcronym "long")
-  , ("acrfull", doAcronym "full")
-  , ("Acrfull", doAcronym "full")
-  , ("acrshort", doAcronym "abbrv")
-  , ("Acrshort", doAcronym "abbrv")
-  , ("glspl", doAcronymPlural "short")
-  , ("Glspl", doAcronymPlural "short")
-  , ("glsdescplural", doAcronymPlural "long")
-  , ("Glsdescplural", doAcronymPlural "long")
-  , ("GLSdescplural", doAcronymPlural "long")
-  -- acronyms package
-  , ("ac", doAcronym "short")
-  , ("acf", doAcronym "full")
-  , ("acs", doAcronym "abbrv")
-  , ("acl", doAcronym "long")
-  , ("acp", doAcronymPlural "short")
-  , ("acfp", doAcronymPlural "full")
-  , ("acsp", doAcronymPlural "abbrv")
-  , ("aclp", doAcronymPlural "long")
-  , ("Ac", doAcronym "short")
-  , ("Acf", doAcronym "full")
-  , ("Acs", doAcronym "abbrv")
-  , ("Acl", doAcronym "long")
-  , ("Acp", doAcronymPlural "short")
-  , ("Acfp", doAcronymPlural "full")
-  , ("Acsp", doAcronymPlural "abbrv")
-  , ("Aclp", doAcronymPlural "long")
-  -- siuntix
-  , ("si", skipopts *> dosi tok)
-  , ("SI", doSI tok)
-  , ("SIrange", doSIrange True tok)
-  , ("numrange", doSIrange False tok)
-  , ("numlist", doSInumlist)
-  , ("num", doSInum)
-  , ("ang", doSIang)
-  -- hyphenat
-  , ("bshyp", lit "\\\173")
-  , ("fshyp", lit "/\173")
-  , ("dothyp", lit ".\173")
-  , ("colonhyp", lit ":\173")
-  , ("hyp", lit "-")
-  , ("nohyphens", tok)
-  , ("textnhtt", ttfamily)
-  , ("nhttfamily", ttfamily)
-  -- LaTeX colors
-  , ("textcolor", coloredInline "color")
-  , ("colorbox", coloredInline "background-color")
-  -- fontawesome
-  , ("faCheck", lit "\10003")
-  , ("faClose", lit "\10007")
-  -- xspace
-  , ("xspace", doxspace)
-  -- etoolbox
-  , ("ifstrequal", ifstrequal)
-  , ("newtoggle", braced >>= newToggle)
-  , ("toggletrue", braced >>= setToggle True)
-  , ("togglefalse", braced >>= setToggle False)
-  , ("iftoggle", try $ ifToggle >> inline)
-  -- biblatex misc
-  , ("RN", romanNumeralUpper)
-  , ("Rn", romanNumeralLower)
-  -- babel
-  , ("foreignlanguage", foreignlanguage)
-  -- include
-  , ("input", rawInlineOr "input" $ include "input")
-  -- soul package
-  , ("ul", underline <$> tok)
-  -- ulem package
-  , ("uline", underline <$> tok)
-  -- plain tex stuff that should just be passed through as raw tex
-  , ("ifdim", ifdim)
-   -- stackengine
-   , ("addstackgap", skipopts *> tok)
-  ]
-
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
+inlineCommands = M.unions
+  [ accentCommands tok
+  , citationCommands inline
+  , siunitxCommands tok
+  , acronymCommands
+  , refCommands
+  , nameCommands
+  , verbCommands
+  , charCommands
+  , enquoteCommands tok
+  , inlineLanguageCommands tok
+  , biblatexInlineCommands tok
+  , rest ]
+ where
+  rest = M.fromList
+    [ ("emph", extractSpaces emph <$> tok)
+    , ("textit", extractSpaces emph <$> tok)
+    , ("textsl", extractSpaces emph <$> tok)
+    , ("textsc", extractSpaces smallcaps <$> tok)
+    , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+    , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+    , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+    , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+    , ("texttt", ttfamily)
+    , ("sout", extractSpaces strikeout <$> tok)
+    , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+    , ("textsuperscript", extractSpaces superscript <$> tok)
+    , ("textsubscript", extractSpaces subscript <$> tok)
+    , ("textbf", extractSpaces strong <$> tok)
+    , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+    , ("underline", underline <$> tok)
+    , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+    , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+    , ("lettrine", rawInlineOr "lettrine" lettrine)
+    , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+    , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+    , ("ensuremath", mathInline . untokenize <$> braced)
+    , ("texorpdfstring", const <$> tok <*> tok)
+    -- old TeX commands
+    , ("em", extractSpaces emph <$> inlines)
+    , ("it", extractSpaces emph <$> inlines)
+    , ("sl", extractSpaces emph <$> inlines)
+    , ("bf", extractSpaces strong <$> inlines)
+    , ("tt", code . stringify . toList <$> inlines)
+    , ("rm", inlines)
+    , ("itshape", extractSpaces emph <$> inlines)
+    , ("slshape", extractSpaces emph <$> inlines)
+    , ("scshape", extractSpaces smallcaps <$> inlines)
+    , ("bfseries", extractSpaces strong <$> inlines)
+    , ("MakeUppercase", makeUppercase <$> tok)
+    , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+    , ("uppercase", makeUppercase <$> tok)
+    , ("MakeLowercase", makeLowercase <$> tok)
+    , ("MakeTextLowercase", makeLowercase <$> tok)
+    , ("lowercase", makeLowercase <$> tok)
+    , ("thanks", skipopts >> note <$> grouped block)
+    , ("footnote", skipopts >> note <$> grouped block)
+    , ("passthrough", tok) -- \passthrough macro used by latex writer
+                           -- for listings
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options .
+                               unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
+    -- hyperref
+    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+                    bracedUrl)
+    , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+    , ("href", do url <- bracedUrl
+                  sp
+                  link (unescapeURL $ untokenize url) "" <$> tok)
+    , ("hyperlink", hyperlink)
+    , ("hyperref", hyperref)
+    , ("hypertarget", hypertargetInline)
+    -- hyphenat
+    , ("nohyphens", tok)
+    , ("textnhtt", ttfamily)
+    , ("nhttfamily", ttfamily)
+    -- LaTeX colors
+    , ("textcolor", coloredInline "color")
+    , ("colorbox", coloredInline "background-color")
+    -- etoolbox
+    , ("ifstrequal", ifstrequal)
+    , ("newtoggle", braced >>= newToggle)
+    , ("toggletrue", braced >>= setToggle True)
+    , ("togglefalse", braced >>= setToggle False)
+    , ("iftoggle", try $ ifToggle >> inline)
+    -- include
+    , ("input", rawInlineOr "input" $ include "input")
+    -- soul package
+    , ("ul", underline <$> tok)
+    -- ulem package
+    , ("uline", underline <$> tok)
+    -- plain tex stuff that should just be passed through as raw tex
+    , ("ifdim", ifdim)
+    -- stackengine
+    , ("addstackgap", skipopts *> tok)
+    ]
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
-  optional opt
+  optional rawopt
   x <- tok
   y <- tok
   return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -979,32 +446,18 @@ alterStr :: (Text -> Text) -> Inline -> Inline
 alterStr f (Str xs) = Str (f xs)
 alterStr _ x = x
 
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
-  babelLang <- untokenize <$> braced
-  case babelLangToBCP47 babelLang of
-       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
-       _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
-  where
-    mk (polyglossia, bcp47Func) =
-      ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  let lang = renderLang $ bcp47Func o
-  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
 hyperlink :: PandocMonad m => LP m Inlines
 hyperlink = try $ do
   src <- untokenize <$> braced
   lab <- tok
   return $ link ("#" <> src) "" lab
 
+hyperref :: PandocMonad m => LP m Inlines
+hyperref = try $ do
+  url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp))
+       <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl)
+  link url "" <$> tok
+
 hypertargetBlock :: PandocMonad m => LP m Blocks
 hypertargetBlock = try $ do
   ref <- untokenize <$> braced
@@ -1019,31 +472,6 @@ hypertargetInline = try $ do
   ils <- grouped inline
   return $ spanWith (ref, [], []) ils
 
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
-  str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
-  str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
-  where
-    inBraces = do
-      symbol '{'
-      spaces
-      res <- parser
-      spaces
-      symbol '}'
-      return res
-    parser = do
-      Tok _ Word s <- satisfyTok isWordTok
-      let (digits, rest) = T.span isDigit s
-      unless (T.null rest) $
-        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
-      safeRead digits
-
 newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
 newToggle name = do
   updateState $ \st ->
@@ -1074,9 +502,6 @@ ifToggle = do
                   report $ UndefinedToggle name' pos
   return ()
 
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
 ifstrequal :: (PandocMonad m, Monoid a) => LP m a
 ifstrequal = do
   str1 <- tok
@@ -1097,13 +522,6 @@ coloredInline stylename = do
 ttfamily :: PandocMonad m => LP m Inlines
 ttfamily = code . stringify . toList <$> tok
 
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
-  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
-  if parseRaw
-     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
-     else fallback
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
@@ -1154,79 +572,90 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-label :: PandocMonad m => LP m ()
-label = do
-  controlSeq "label"
-  t <- braced
-  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
-  v <- braced
-  let refstr = untokenize v
-  updateState $ \st ->
-    st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
-  v <- braced
-  let refstr = untokenize v
-  return $ linkWith ("",[],[ ("reference-type", cls)
-                           , ("reference", refstr)])
-                    ("#" <> refstr)
-                    ""
-                    (inBrackets $ str refstr)
-
 lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
 lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
 
 inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
-     <|> (space  <$ whitespace)
-     <|> (softbreak <$ endline)
-     <|> word
-     <|> macroDef (rawInline "latex")
-     <|> inlineCommand'
-     <|> inlineEnvironment
-     <|> inlineGroup
-     <|> (symbol '-' *>
-           option (str "-") (symbol '-' *>
-             option (str "–") (str "—" <$ symbol '-')))
-     <|> doubleQuote
-     <|> singleQuote
-     <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "”" <$ symbol '”')
-     <|> (str "’" <$ symbol '\'')
-     <|> (str "’" <$ symbol '’')
-     <|> (str "\160" <$ symbol '~')
-     <|> dollarsMath
-     <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-     <|> (str . T.singleton <$> primEscape)
-     <|> regularSymbol
-     <|> (do res <- symbolIn "#^'`\"[]&"
-             pos <- getPosition
-             let s = untoken res
-             report $ ParsingUnescaped s pos
-             return $ str s)
+inline = do
+  Tok pos toktype t <- lookAhead anyTok
+  let symbolAsString = str . untoken <$> anySymbol
+  let unescapedSymbolAsString =
+        do s <- untoken <$> anySymbol
+           report $ ParsingUnescaped s pos
+           return $ str s
+  case toktype of
+    Comment     -> mempty <$ comment
+    Spaces      -> space <$ whitespace
+    Newline     -> softbreak <$ endline
+    Word        -> word
+    Esc1        -> str . T.singleton <$> primEscape
+    Esc2        -> str . T.singleton <$> primEscape
+    Symbol      ->
+      case t of
+        "-"     -> symbol '-' *>
+                    option (str "-") (symbol '-' *>
+                      option (str "–") (str "—" <$ symbol '-'))
+        "'"     -> symbol '\'' *>
+                  option (str "’") (str  "”" <$ symbol '\'')
+        "~"     -> str "\160" <$ symbol '~'
+        "`"     -> doubleQuote <|> singleQuote <|> symbolAsString
+        "\""    -> doubleQuote <|> singleQuote <|> symbolAsString
+        "“"     -> doubleQuote <|> symbolAsString
+        "‘"     -> singleQuote <|> symbolAsString
+        "$"     -> dollarsMath <|> unescapedSymbolAsString
+        "|"     -> (guardEnabled Ext_literate_haskell *>
+                    symbol '|' *> doLHSverb) <|> symbolAsString
+        "{"     -> inlineGroup
+        "#"     -> unescapedSymbolAsString
+        "&"     -> unescapedSymbolAsString
+        "_"     -> unescapedSymbolAsString
+        "^"     -> unescapedSymbolAsString
+        "\\"    -> mzero
+        "}"     -> mzero
+        _       -> symbolAsString
+    CtrlSeq _   -> macroDef (rawInline "latex")
+                  <|> inlineCommand'
+                  <|> inlineEnvironment
+                  <|> inlineGroup
+    _           -> mzero
 
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
 
+opt :: PandocMonad m => LP m Inlines
+opt = do
+  toks <- try (sp *> bracketedToks <* sp)
+  -- now parse the toks as inlines
+  st <- getState
+  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
+
 -- block elements:
 
 preamble :: PandocMonad m => LP m Blocks
 preamble = mconcat <$> many preambleBlock
   where preambleBlock =  (mempty <$ spaces1)
                      <|> macroDef (rawBlock "latex")
+                     <|> filecontents
                      <|> (mempty <$ blockCommand)
                      <|> (mempty <$ braced)
                      <|> (do notFollowedBy (begin_ "document")
                              anyTok
                              return mempty)
 
+rule :: PandocMonad m => LP m Blocks
+rule = do
+  skipopts
+  width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok
+  _thickness <- tok
+  -- 0-width rules are used to fix spacing issues:
+  case safeRead width of
+    Just (0 :: Double) -> return mempty
+    _ -> return horizontalRule
+
 paragraph :: PandocMonad m => LP m Blocks
 paragraph = do
   x <- trimInlines . mconcat <$> many1 inline
@@ -1264,6 +693,16 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
 insertIncluded :: PandocMonad m
                => FilePath
                -> FilePath
@@ -1273,13 +712,12 @@ insertIncluded defaultExtension f' = do
                 ".tex" -> f'
                 ".sty" -> f'
                 _      -> addExtension f' defaultExtension
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
   pos <- getPosition
   containers <- getIncludeFiles <$> getState
   when (T.pack f `elem` containers) $
     throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
   updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromDirs dirs f
+  mbcontents <- readFileFromTexinputs f
   contents <- case mbcontents of
                    Just s -> return s
                    Nothing -> do
@@ -1288,10 +726,6 @@ insertIncluded defaultExtension f' = do
   getInput >>= setInput . (tokenize f contents ++)
   updateState dropLatestIncludeFile
 
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
-   st{ sMeta = addMetaField field val $ sMeta st }
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
@@ -1300,150 +734,6 @@ authors = try $ do
   egroup
   addMeta "author" (map trimInlines auths)
 
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
-    (_, s) <- withRaw (commandDef <|> environmentDef)
-    (constructor (untokenize s) <$
-      guardDisabled Ext_latex_macros)
-     <|> return mempty
-  where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
-          guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
-        environmentDef = do
-          mbenv <- newenvironment
-          case mbenv of
-            Nothing -> return ()
-            Just (name, macro1, macro2) ->
-              guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
-        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
-        -- is equivalent to
-        -- @\newcommand{\envname}[n-args][default]{begin}@
-        -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
-  controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    optional $ symbol '='
-    spaces
-    -- we first parse in verbatim mode, and then expand macros,
-    -- because we don't want \let\foo\bar to turn into
-    -- \let\foo hello if we have previously \def\bar{hello}
-    contents <- bracedOrToken
-    return (name, contents)
-  contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
-  -- we use withVerbatimMode, because macros are to be expanded
-  -- at point of use, not point of definition
-  withVerbatimMode $ do
-    controlSeq "def"
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    argspecs <- many (argspecArg <|> argspecPattern)
-    contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
-  Tok _ (Arg i) _ <- satisfyTok isArgTok
-  return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
-  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
-                              (toktype' == Symbol || toktype' == Word) &&
-                              (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
-                             controlSeq "renewcommand" <|>
-                             controlSeq "providecommand" <|>
-                             controlSeq "DeclareMathOperator" <|>
-                             controlSeq "DeclareRobustCommand"
-  withVerbatimMode $ do
-    Tok _ (CtrlSeq name) txt <- do
-      optional (symbol '*')
-      anyControlSeq <|>
-        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    let argspecs = map ArgNum [1..numargs]
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    spaces
-    contents' <- bracedOrToken
-    let contents =
-         case mtype of
-              "DeclareMathOperator" ->
-                 Tok pos (CtrlSeq "mathop") "\\mathop"
-                 : Tok pos Symbol "{"
-                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
-                 : Tok pos Symbol "{"
-                 : (contents' ++
-                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
-              _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
-                             controlSeq "renewenvironment" <|>
-                             controlSeq "provideenvironment"
-  withVerbatimMode $ do
-    optional $ symbol '*'
-    spaces
-    name <- untokenize <$> braced
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    let argspecs = map (\i -> ArgNum i) [1..numargs]
-    startcontents <- spaces >> bracedOrToken
-    endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
-  ds <- untokenize <$> bracketedToks
-  case safeRead ds of
-       Just i -> return i
-       _      -> return 0
-
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
-  skipopts
-  ils <- tok
-  optional $ try $ spaces *> label
-  updateState $ \st -> st{ sCaption = Just ils }
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
@@ -1457,10 +747,6 @@ epigraph = do
   p2 <- grouped block
   return $ divWith ("", ["epigraph"], []) (p1 <> p2)
 
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption   = Nothing
-                                      , sLastLabel = Nothing }
-
 section :: PandocMonad m => Attr -> Int -> LP m Blocks
 section (ident, classes, kvs) lvl = do
   skipopts
@@ -1554,7 +840,7 @@ blockCommands = M.fromList
    , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
    , ("signature", mempty <$ (skipopts *> authors))
    , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-   , ("newtheorem", newtheorem)
+   , ("newtheorem", newtheorem inline)
    , ("theoremstyle", theoremstyle)
    -- KOMA-Script metadata commands
    , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1598,11 +884,11 @@ blockCommands = M.fromList
    --
    , ("hrule", pure horizontalRule)
    , ("strut", pure mempty)
-   , ("rule", skipopts *> tok *> tok $> horizontalRule)
+   , ("rule", rule)
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-   , ("caption", mempty <$ setCaption)
+   , ("caption", mempty <$ setCaption inline)
    , ("bibliography", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1640,7 +926,8 @@ blockCommands = M.fromList
 
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+   M.fromList
    [ ("document", env "document" blocks <* skipMany anyTok)
    , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
    , ("sloppypar", env "sloppypar" blocks)
@@ -1654,13 +941,6 @@ environments = M.fromList
    , ("flushright", divWith ("", ["flushright"], []) <$> env "flushright" blocks)
    , ("flushleft", divWith ("", ["flushleft"], []) <$> env "flushleft" blocks)
    , ("landscape", env "landscape" blocks)
-   , ("longtable",  env "longtable" $
-          resetCaption *> simpTable "longtable" False >>= addTableCaption)
-   , ("table",  env "table" $
-          skipopts *> resetCaption *> blocks >>= addTableCaption)
-   , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
-   , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
-   , ("tabular", env "tabular"  $ simpTable "tabular" False)
    , ("quote", blockQuote <$> env "quote" blocks)
    , ("quotation", blockQuote <$> env "quotation" blocks)
    , ("verse", blockQuote <$> env "verse" blocks)
@@ -1683,7 +963,7 @@ environments = M.fromList
    , ("lilypond", rawVerbEnv "lilypond")
    , ("ly", rawVerbEnv "ly")
    -- amsthm
-   , ("proof", proof)
+   , ("proof", proof blocks opt)
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -1692,130 +972,29 @@ environments = M.fromList
    , ("iftoggle", try $ ifToggle >> block)
    ]
 
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
-  stylename <- untokenize <$> braced
-  let mbstyle = case stylename of
-                  "plain"      -> Just PlainStyle
-                  "definition" -> Just DefinitionStyle
-                  "remark"     -> Just RemarkStyle
-                  _            -> Nothing
-  case mbstyle of
-    Nothing  -> return ()
-    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
-  return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
-  number <- option True (False <$ symbol '*' <* sp)
+filecontents :: PandocMonad m => LP m Blocks
+filecontents = try $ do
+  controlSeq "begin"
   name <- untokenize <$> braced
-  sp
-  series <- option Nothing $ Just . untokenize <$> bracketedToks
-  sp
-  showName <- tok
-  sp
-  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
-  sty <- sLastTheoremStyle <$> getState
-  let spec = TheoremSpec { theoremName = showName
-                         , theoremStyle = sty
-                         , theoremSeries = series
-                         , theoremSyncTo = syncTo
-                         , theoremNumber = number
-                         , theoremLastNum = DottedNum [0] }
-  tmap <- sTheoremMap <$> getState
-  updateState $ \s -> s{ sTheoremMap =
-                            M.insert name spec tmap }
+  guard $ name == "filecontents" || name == "filecontents*"
+  skipopts
+  fp <- untokenize <$> braced
+  txt <- verbEnv name
+  updateState $ \st ->
+    st{ sFileContents = M.insert fp txt (sFileContents st) }
   return mempty
 
-proof :: PandocMonad m => LP m Blocks
-proof = do
-  title <- option (B.text "Proof") opt
-  bs <- env "proof" blocks
-  return $
-    B.divWith ("", ["proof"], []) $
-      addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
-  case B.toList bs of
-    (Para xs : rest)
-      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
-    _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
-  case Seq.viewr (B.unMany bs) of
-    s Seq.:> Para ils
-      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
-    _ -> bs <> B.para qedSign
- where
-  qedSign = B.str "\xa0\x25FB"
-
 environment :: PandocMonad m => LP m Blocks
 environment = try $ do
   controlSeq "begin"
   name <- untokenize <$> braced
   M.findWithDefault mzero name environments <|>
-    theoremEnvironment name <|>
+    theoremEnvironment blocks opt name <|>
     if M.member name (inlineEnvironments
                        :: M.Map Text (LP PandocPure Inlines))
        then mzero
        else try (rawEnv name) <|> rawVerbEnv name
 
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
-  tmap <- sTheoremMap <$> getState
-  case M.lookup name tmap of
-    Nothing -> mzero
-    Just tspec -> do
-       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
-       bs <- env name blocks
-       number <-
-         if theoremNumber tspec
-            then do
-               let name' = fromMaybe name $ theoremSeries tspec
-               num <- getNextNumber
-                   (maybe (DottedNum [0]) theoremLastNum .
-                    M.lookup name' . sTheoremMap)
-               updateState $ \s ->
-                 s{ sTheoremMap =
-                       M.adjust
-                       (\spec -> spec{ theoremLastNum = num })
-                       name'
-                       (sTheoremMap s)
-                  }
-
-               case mblabel of
-                 Just ident ->
-                   updateState $ \s ->
-                     s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
-                 Nothing -> return ()
-               return $ space <> B.text (renderDottedNum num)
-            else return mempty
-       let titleEmph = case theoremStyle tspec of
-                         PlainStyle      -> B.strong
-                         DefinitionStyle -> B.strong
-                         RemarkStyle     -> B.emph
-       let title = titleEmph (theoremName tspec <> number)
-                      <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
-              $ case theoremStyle tspec of
-                  PlainStyle -> walk italicize bs
-                  _          -> bs
-
-italicize :: Block -> Block
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
@@ -1823,15 +1002,17 @@ rawEnv name = do
   rawOptions <- mconcat <$> many rawopt
   let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
   pos1 <- getPosition
-  (bs, raw) <- withRaw $ env name blocks
   if parseRaw
-     then return $ rawBlock "latex"
+     then do
+       (_, raw) <- withRaw $ env name blocks
+       return $ rawBlock "latex"
                  $ beginCommand <> untokenize raw
      else do
+       bs <- env name blocks
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-       return bs
+       return $ divWith ("",[name],[]) bs
 
 rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
 rawVerbEnv name = do
@@ -1890,8 +1071,7 @@ inputMinted = do
   pos <- getPosition
   attr <- mintedAttr
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   rawcode <- case mbCode of
                   Just s -> return s
                   Nothing -> do
@@ -1989,8 +1169,7 @@ inputListing = do
   pos <- getPosition
   options <- option [] keyvals
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   codeLines <- case mbCode of
                       Just s -> return $ T.lines s
                       Nothing -> do
@@ -1999,7 +1178,8 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap
+                                (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
@@ -2065,358 +1245,23 @@ orderedList' = try $ do
   bs <- listenv "enumerate" (many item)
   return $ orderedListWith (start, style, delim) bs
 
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
-  spaces
-  controlSeq "hline" <|>
-    -- booktabs rules:
-    controlSeq "toprule" <|>
-    controlSeq "bottomrule" <|>
-    controlSeq "midrule" <|>
-    controlSeq "endhead" <|>
-    controlSeq "endfirsthead"
-  spaces
-  optional opt
-  return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
-         <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
-  inp <- getInput
-  case inp of
-       (Tok spos Word t : rest) ->
-         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
-       _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
-  let maybeBar = skipMany
-        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
-  let cAlign = AlignCenter <$ symbol 'c'
-  let lAlign = AlignLeft <$ symbol 'l'
-  let rAlign = AlignRight <$ symbol 'r'
-  let parAlign = AlignLeft <$ symbol 'p'
-  -- aligns from tabularx
-  let xAlign = AlignLeft <$ symbol 'X'
-  let mAlign = AlignLeft <$ symbol 'm'
-  let bAlign = AlignLeft <$ symbol 'b'
-  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
-                                 <|> xAlign <|> mAlign <|> bAlign )
-  let alignPrefix = symbol '>' >> braced
-  let alignSuffix = symbol '<' >> braced
-  let colWidth = try $ do
-        symbol '{'
-        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
-        spaces
-        symbol '}'
-        return $ safeRead ds
-  let alignSpec = do
-        pref <- option [] alignPrefix
-        spaces
-        al <- alignChar
-        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
-                                                 pos <- getPosition
-                                                 report $ SkippedContent s pos
-                                                 return Nothing)
-        spaces
-        suff <- option [] alignSuffix
-        return (al, width, (pref, suff))
-  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
-        symbol '*'
-        spaces
-        ds <- trim . untokenize <$> braced
-        spaces
-        spec <- braced
-        case safeRead ds of
-             Just n  ->
-               getInput >>= setInput . (mconcat (replicate n spec) ++)
-             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
-  bgroup
-  spaces
-  maybeBar
-  aligns' <- many $ try $ spaces >> optional starAlign >>
-                            (alignSpec <* maybeBar)
-  spaces
-  egroup
-  spaces
-  return $ map toSpec aligns'
-  where
-    toColWidth (Just w) | w > 0 = ColWidth w
-    toColWidth _                = ColWidthDefault
-    toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
-              => Text   -- ^ table environment name
-              -> [([Tok], [Tok])] -- ^ pref/suffixes
-              -> LP m Row
-parseTableRow envname prefsufs = do
-  notFollowedBy (spaces *> end_ envname)
-  -- add prefixes and suffixes in token stream:
-  let celltoks (pref, suff) = do
-        prefpos <- getPosition
-        contents <- mconcat <$>
-            many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
-                  <|>
-                   snd <$> withRaw (inlineEnvironment <|> dollarsMath)
-                  <|>
-                   (do notFollowedBy
-                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
-                       count 1 anyTok) )
-
-        suffpos <- getPosition
-        option [] (count 1 amp)
-        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
-  rawcells <- mapM celltoks prefsufs
-  oldInput <- getInput
-  cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
-  setInput oldInput
-  spaces
-  return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
-  spaces
-  updateState $ \st -> st{ sInTableCell = True }
-  cell' <-   multicolumnCell
-         <|> multirowCell
-         <|> parseSimpleCell
-         <|> parseEmptyCell
-  updateState $ \st -> st{ sInTableCell = False }
-  spaces
-  return cell'
-  where
-    -- The parsing of empty cells is important in LaTeX, especially when dealing
-    -- with multirow/multicolumn. See #6603.
-    parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
-  where
-    alignment = do
-      c <- untoken <$> singleChar
-      return $ case c of
-        "l" -> AlignLeft
-        "r" -> AlignRight
-        "c" -> AlignCenter
-        "*" -> AlignDefault
-        _   -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
-                [Para ils] -> plain (fromList ils)
-                _          -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
-  -- Full prototype for \multirow macro is:
-  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-  -- However, everything except `nrows` and `text` make
-  -- sense in the context of the Pandoc AST
-  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
-  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
-  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
-  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
-  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
-  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
-  let singleCell = do
-        content <- plainify <$> blocks
-        return $ cell alignment (RowSpan 1) (ColSpan span') content
-
-  -- Two possible contents: either a \multirow cell, or content.
-  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-  -- Note that a \multirow cell can be nested in a \multicolumn,
-  -- but not the other way around. See #6603
-  let nestedCell = do
-        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
-        return $ cell
-                  alignment
-                  (RowSpan rs)
-                  (ColSpan span')
-                  (fromList bs)
-
-  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
-  where
-    rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
-  = TableBody attr rhc th' tb'
-  where
-    th' = fixTableRows th
-    tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
-  where
-    fixTableRows' oldHang (Row attr cells : rs)
-      = let (newHang, cells') = fixTableRow oldHang cells
-            rs'               = fixTableRows' newHang rs
-        in Row attr cells' : rs'
-    fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
-  -- If there's overhang, drop cells until their total width meets the
-  -- width of the occupied grid spaces (or we run out)
-  | (n, prefHang, restHang) <- splitHang oldHang
-  , n > 0
-  = let cells' = dropToWidth getCellW n cells
-        (restHang', cells'') = fixTableRow restHang cells'
-    in (prefHang restHang', cells'')
-  -- Otherwise record the overhang of a pending cell and fix the rest
-  -- of the row
-  | c@(Cell _ _ h w _):cells' <- cells
-  = let h' = max 1 h
-        w' = max 1 w
-        oldHang' = dropToWidth getHangW w' oldHang
-        (newHang, cells'') = fixTableRow oldHang' cells'
-    in (toHang w' h' <> newHang, c : cells'')
-  | otherwise
-  = (oldHang, [])
-  where
-    getCellW (Cell _ _ _ w _) = w
-    getHangW = maybe 1 fst
-    getCS (ColSpan n) = n
-
-    toHang c r
-      | r > 1     = [Just (c, r)]
-      | otherwise = replicate (getCS c) Nothing
-
-    -- Take the prefix of the overhang list representing filled grid
-    -- spaces. Also return the remainder and the length of this prefix.
-    splitHang = splitHang' 0 id
-
-    splitHang' !n l (Just (c, r):xs)
-      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
-    splitHang' n l xs = (n, l, xs)
-
-    -- Drop list items until the total width of the dropped items
-    -- exceeds the passed width.
-    dropToWidth _     n l | n < 1 = l
-    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
-    dropToWidth _     _ []        = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
-  when hasWidthParameter $ () <$ (spaces >> tok)
-  skipopts
-  colspecs <- parseAligns
-  let (aligns, widths, prefsufs) = unzip3 colspecs
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  skipMany hline
-  spaces
-  header' <- option [] . try . fmap (:[]) $
-             parseTableRow envname prefsufs <* lbreak <* many1 hline
-  spaces
-  rows <- sepEndBy (parseTableRow envname prefsufs)
-                    (lbreak <* optional (skipMany hline))
-  spaces
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  lookAhead $ controlSeq "end" -- make sure we're at end
-  let th  = fixTableHead $ TableHead nullAttr header'
-  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
-  let tf  = TableFoot nullAttr []
-  return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
-  where go (Table attr c spec th tb tf) = do
-          st <- getState
-          let mblabel = sLastLabel st
-          capt <- case (sCaption st, mblabel) of
-                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
-                   (Just ils, Just lab) -> do
-                     num <- getNextNumber sLastTableNum
-                     setState
-                       st{ sLastTableNum = num
-                         , sLabels = M.insert lab
-                                    [Str (renderDottedNum num)]
-                                    (sLabels st) }
-                     return $ caption Nothing (plain ils) -- add number??
-                   (Nothing, _)  -> return c
-          let attr' = case (attr, mblabel) of
-                        ((_,classes,kvs), Just ident) ->
-                           (ident,classes,kvs)
-                        _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
-        go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b       = Div attr [b]
-
 block :: PandocMonad m => LP m Blocks
 block = do
-  res <- (mempty <$ spaces1)
-    <|> environment
-    <|> macroDef (rawBlock "latex")
-    <|> blockCommand
-    <|> paragraph
-    <|> grouped block
+  Tok _ toktype _ <- lookAhead anyTok
+  res <- (case toktype of
+            Newline           -> mempty <$ spaces1
+            Spaces            -> mempty <$ spaces1
+            Comment           -> mempty <$ spaces1
+            Word              -> paragraph
+            CtrlSeq "begin"   -> environment
+            CtrlSeq _         -> macroDef (rawBlock "latex")
+                               <|> blockCommand
+            _                 -> mzero)
+          <|> paragraph
+          <|> grouped block
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  polylang <- untokenize <$> braced
-  case M.lookup polylang polyglossiaLangToBCP47 of
-       Nothing -> return mempty -- TODO mzero? warning?
-       Just langFunc -> do
-         let l = langFunc o
-         setTranslations l
-         updateState $ setMeta "lang" $ str (renderLang l)
-         return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..af97125c6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+  ( citationCommands
+  , cites
+  )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+  let citation = citationWith inline
+      tok = spaces *> grouped inline
+   in M.fromList
+  [ ("cite", citation "cite" NormalCitation False)
+  , ("Cite", citation "Cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("smartcite", citation "smartcite" NormalCitation False)
+  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Smartcite", citation "Smartcite" NormalCitation False)
+  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation inline NormalCitation)
+  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+                        complexNatbibCitation inline AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
+  ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _      = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
+  keys <- try $ bgroup *> manyTill citationLabel egroup
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+  opt :: PandocMonad m => LP m Inlines
+  opt = do
+    toks <- try (sp *> bracketedToks <* sp)
+    -- now parse the toks as inlines
+    st <- getState
+    parsed <- lift $
+      runParserT (mconcat <$> many inline) st "bracketed option" toks
+    case parsed of
+      Right result -> return result
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel  = do
+  sp
+  untokenize <$>
+    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+          <* sp
+          <* optional (symbol ',')
+          <* sp)
+  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+  let paropt = parenWrapped inline
+  cits <- if multi
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 $ simpleCiteArgs inline
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ (addMprenote pre k : init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
+             else count 1 $ simpleCiteArgs inline
+  let cs = concat cits
+  return $ case mode of
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+  (c,raw) <- withRaw $ cites inline mode multi
+  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+  let isCite Cite{} = True
+      isCite _      = False
+      (pref, rest) = break isCite (toList ils)
+  in case rest of
+          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+          _                -> []
+
+complexNatbibCitation :: PandocMonad m
+                      => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+  (cs, raw) <-
+    withRaw $ concat <$> do
+      bgroup
+      items <- mconcat <$>
+                many1 (notFollowedBy (symbol ';') >> inline)
+                  `sepBy1` symbol ';'
+      egroup
+      return $ map handleCitationPart items
+  case cs of
+       []       -> mzero
+       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+  note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..7b8bca4af
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,397 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ViewPatterns          #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+  ( acronymCommands
+  , verbCommands
+  , charCommands
+  , accentCommands
+  , nameCommands
+  , biblatexInlineCommands
+  , refCommands
+  , rawInlineOr
+  , listingsLanguage
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+                            manyTill, getInput, setInput, incSourceColumn,
+                            option, many1, try)
+import Data.Char (isDigit)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList, fromMaybe)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+  if parseRaw
+     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+     else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+  v <- braced
+  let refstr = untokenize v
+  updateState $ \st ->
+    st{ sLastLabel = Just refstr }
+  return $ spanWith (refstr,[],[("label", refstr)])
+    $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+  v <- braced
+  let refstr = untokenize v
+  return $ linkWith ("",[],[ ("reference-type", cls)
+                           , ("reference", refstr)])
+                    ("#" <> refstr)
+                    ""
+                    (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  withVerbatimMode $
+    code . untokenize <$>
+      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+  t@(Tok pos toktype txt) <- anyTok
+  case T.findIndex (== stopchar) txt of
+       Nothing -> return t
+       Just i  -> do
+         let (t1, t2) = T.splitAt i txt
+         inp <- getInput
+         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+         return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+  case lookup "language" opts of
+    Nothing  -> Nothing
+    Just l   -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+  options <- option [] keyvals
+  let classes = maybeToList $ listingsLanguage options
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- untokenize <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  let stopchar = if marker == '{' then '}' else marker
+  withVerbatimMode $
+    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+      manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x    = x
+
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+  str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+  str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+  where
+    inBraces = do
+      symbol '{'
+      spaces
+      res <- parser
+      spaces
+      symbol '}'
+      return res
+    parser = do
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
+      let (digits, rest) = T.span isDigit s
+      unless (T.null rest) $
+        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+      safeRead digits
+
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space] -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       []      -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       _       -> return ils
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+  [ ("verb", doverb)
+  , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
+  , ("Verb", doverb)
+  ]
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+  in  M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+  [ ("ldots", lit "…")
+  , ("vdots", lit "\8942")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("sep", lit ",")
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  , ("qed", lit "\a0\x25FB")
+  , ("lq", return (str "‘"))
+  , ("rq", return (str "’"))
+  , ("textquoteleft", return (str "‘"))
+  , ("textquoteright", return (str "’"))
+  , ("textquotedblleft", return (str "“"))
+  , ("textquotedblright", return (str "”"))
+  , ("/", pure mempty) -- italic correction
+  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+                            guard $ not inTableCell
+                            optional rawopt
+                            spaces))
+  , (",", lit "\8198")
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("slash", lit "/")
+  -- fontawesome
+  , ("faCheck", lit "\10003")
+  , ("faClose", lit "\10007")
+  -- hyphenat
+  , ("bshyp", lit "\\\173")
+  , ("fshyp", lit "/\173")
+  , ("dothyp", lit ".\173")
+  , ("colonhyp", lit ":\173")
+  , ("hyp", lit "-")
+  ]
+
+biblatexInlineCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+  -- biblatex misc
+  [ ("RN", romanNumeralUpper)
+  , ("Rn", romanNumeralLower)
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
+  ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+  [ ("figurename", doTerm Translations.Figure)
+  , ("prefacename", doTerm Translations.Preface)
+  , ("refname", doTerm Translations.References)
+  , ("bibname", doTerm Translations.Bibliography)
+  , ("chaptername", doTerm Translations.Chapter)
+  , ("partname", doTerm Translations.Part)
+  , ("contentsname", doTerm Translations.Contents)
+  , ("listfigurename", doTerm Translations.ListOfFigures)
+  , ("listtablename", doTerm Translations.ListOfTables)
+  , ("indexname", doTerm Translations.Index)
+  , ("abstractname", doTerm Translations.Abstract)
+  , ("tablename", doTerm Translations.Table)
+  , ("enclname", doTerm Translations.Encl)
+  , ("ccname", doTerm Translations.Cc)
+  , ("headtoname", doTerm Translations.To)
+  , ("pagename", doTerm Translations.Page)
+  , ("seename", doTerm Translations.See)
+  , ("seealsoname", doTerm Translations.SeeAlso)
+  , ("proofname", doTerm Translations.Proof)
+  , ("glossaryname", doTerm Translations.Glossary)
+  , ("lstlistingname", doTerm Translations.Listing)
+  ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+  [ ("label", rawInlineOr "label" dolabel)
+  , ("ref", rawInlineOr "ref" $ doref "ref")
+  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
+  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
+  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+  -- glossaries package
+  [ ("gls", doAcronym "short")
+  , ("Gls", doAcronym "short")
+  , ("glsdesc", doAcronym "long")
+  , ("Glsdesc", doAcronym "long")
+  , ("GLSdesc", doAcronym "long")
+  , ("acrlong", doAcronym "long")
+  , ("Acrlong", doAcronym "long")
+  , ("acrfull", doAcronym "full")
+  , ("Acrfull", doAcronym "full")
+  , ("acrshort", doAcronym "abbrv")
+  , ("Acrshort", doAcronym "abbrv")
+  , ("glspl", doAcronymPlural "short")
+  , ("Glspl", doAcronymPlural "short")
+  , ("glsdescplural", doAcronymPlural "long")
+  , ("Glsdescplural", doAcronymPlural "long")
+  , ("GLSdescplural", doAcronymPlural "long")
+  -- acronyms package
+  , ("ac", doAcronym "short")
+  , ("acf", doAcronym "full")
+  , ("acs", doAcronym "abbrv")
+  , ("acl", doAcronym "long")
+  , ("acp", doAcronymPlural "short")
+  , ("acfp", doAcronymPlural "full")
+  , ("acsp", doAcronymPlural "abbrv")
+  , ("aclp", doAcronymPlural "long")
+  , ("Ac", doAcronym "short")
+  , ("Acf", doAcronym "full")
+  , ("Acs", doAcronym "abbrv")
+  , ("Acl", doAcronym "long")
+  , ("Acp", doAcronymPlural "short")
+  , ("Acfp", doAcronymPlural "full")
+  , ("Acsp", doAcronymPlural "abbrv")
+  , ("Aclp", doAcronymPlural "long")
+  ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+  acro <- braced
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "singular+" <> form)])
+    $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+  acro <- braced
+  let plural = str "s"
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "plural+" <> form)]) $
+   mconcat [str $ untokenize acro, plural]]
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Lang
-   Copyright   : Copyright (C) 2018-2020 John MacFarlane
+   Copyright   : Copyright (C) 2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -12,144 +12,223 @@ Functions for parsing polyglossia and babel language specifiers to
 BCP47 'Lang'.
 -}
 module Text.Pandoc.Readers.LaTeX.Lang
-  ( polyglossiaLangToBCP47
+  ( setDefaultLanguage
+  , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , enquoteCommands
+  , inlineLanguageCommands
   )
 where
 import qualified Data.Map as M
+import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.Shared (extractSpaces)
+import Text.Collate.Lang (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+                            withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+                            singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+        => LP m Inlines
+        -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+  skipopts
+  let lang = mblang >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
+  quoteContext <- sQuoteContext <$> getState
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+                => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+  [ ("enquote*", enquote tok True Nothing)
+  , ("enquote", enquote tok False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+  ]
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+  babelLang <- untokenize <$> braced
+  case babelLangToBCP47 babelLang of
+       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
+       _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+  M.fromList $
+    ("foreignlanguage", foreignlanguage tok) :
+    (mk <$> M.toList polyglossiaLangToBCP47)
+  where
+    mk (polyglossia, bcp47Func) =
+      ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+               => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  let lang = renderLang $ bcp47Func o
+  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  polylang <- untokenize <$> braced
+  case M.lookup polylang polyglossiaLangToBCP47 of
+       Nothing -> return mempty -- TODO mzero? warning?
+       Just langFunc -> do
+         let l = langFunc o
+         setTranslations l
+         updateState $ setMeta "lang" $ str (renderLang l)
+         return mempty
 
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..5495a8e74
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,184 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+  ( macroDef
+  )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+    (_, s) <- withRaw (commandDef <|> environmentDef)
+    (constructor (untokenize s) <$
+      guardDisabled Ext_latex_macros)
+     <|> return mempty
+  where commandDef = do
+          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
+          guardDisabled Ext_latex_macros <|>
+           mapM_ (\(name, macro') ->
+                   updateState (\s -> s{ sMacros = M.insert name macro'
+                                          (sMacros s) })) nameMacroPairs
+        environmentDef = do
+          mbenv <- newenvironment
+          case mbenv of
+            Nothing -> return ()
+            Just (name, macro1, macro2) ->
+              guardDisabled Ext_latex_macros <|>
+                do updateState $ \s -> s{ sMacros =
+                    M.insert name macro1 (sMacros s) }
+                   updateState $ \s -> s{ sMacros =
+                    M.insert ("end" <> name) macro2 (sMacros s) }
+        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+        -- is equivalent to
+        -- @\newcommand{\envname}[n-args][default]{begin}@
+        -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
+letmacro = do
+  controlSeq "let"
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    optional $ symbol '='
+    spaces
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
+    contents <- bracedOrToken
+    return (name, contents)
+  contents' <- doMacros' 0 contents
+  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
+
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
+  -- we use withVerbatimMode, because macros are to be expanded
+  -- at point of use, not point of definition
+  controlSeq "def"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    argspecs <- many (argspecArg <|> argspecPattern)
+    contents <- bracedOrToken
+    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+  controlSeq "newif"
+  withVerbatimMode $ do
+    Tok pos (CtrlSeq name) _ <- anyControlSeq
+    -- \def\iffoo\iffalse
+    -- \def\footrue{\def\iffoo\iftrue}
+    -- \def\foofalse{\def\iffoo\iffalse}
+    let base = T.drop 2 name
+    return [ (name, Macro ExpandWhenUsed [] Nothing
+                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+           , (base <> "true",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+                   ])
+           , (base <> "false",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+                   ])
+           ]
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+  Tok _ (Arg i) _ <- satisfyTok isArgTok
+  return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                              (toktype' == Symbol || toktype' == Word) &&
+                              (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
+newcommand = do
+  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+                             controlSeq "renewcommand" <|>
+                             controlSeq "providecommand" <|>
+                             controlSeq "DeclareMathOperator" <|>
+                             controlSeq "DeclareRobustCommand"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) txt <- do
+      optional (symbol '*')
+      anyControlSeq <|>
+        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    let argspecs = map ArgNum [1..numargs]
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    spaces
+    contents' <- bracedOrToken
+    let contents =
+         case mtype of
+              "DeclareMathOperator" ->
+                 Tok pos (CtrlSeq "mathop") "\\mathop"
+                 : Tok pos Symbol "{"
+                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+                 : Tok pos Symbol "{"
+                 : (contents' ++
+                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+              _                     -> contents'
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+        Just macro
+          | mtype == "newcommand" -> do
+              report $ MacroAlreadyDefined txt pos
+              return [(name, macro)]
+          | mtype == "providecommand" -> return [(name, macro)]
+        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+                             controlSeq "renewenvironment" <|>
+                             controlSeq "provideenvironment"
+  withVerbatimMode $ do
+    optional $ symbol '*'
+    spaces
+    name <- untokenize <$> braced
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    let argspecs = map (\i -> ArgNum i) [1..numargs]
+    startcontents <- spaces >> bracedOrToken
+    endcontents <- spaces >> bracedOrToken
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+         Just _
+           | mtype == "newenvironment" -> do
+               report $ MacroAlreadyDefined name pos
+               return Nothing
+           | mtype == "provideenvironment" ->
+               return Nothing
+         _ -> return $ Just (name,
+                      Macro ExpandWhenUsed argspecs optarg startcontents,
+                      Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+  ds <- untokenize <$> bracketedToks
+  case safeRead ds of
+       Just i -> return i
+       _      -> return 0
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+  ( dollarsMath
+  , inlineEnvironments
+  , inlineEnvironment
+  , mathInline
+  , mathDisplay
+  , theoremstyle
+  , theoremEnvironment
+  , newtheorem
+  , proof
+  )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+  symbol '$'
+  display <- option False (True <$ symbol '$')
+  (do contents <- try $ untokenize <$> pDollarsMath 0
+      if display
+         then mathDisplay contents <$ symbol '$'
+         else return $ mathInline contents)
+   <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+  tk@(Tok _ toktype t) <- anyTok
+  case toktype of
+       Symbol | t == "$"
+              , n == 0 -> return []
+              | t == "\\" -> do
+                  tk' <- anyTok
+                  (tk :) . (tk' :) <$> pDollarsMath n
+              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+              | t == "}" ->
+                if n > 0
+                then (tk :) <$> pDollarsMath (n-1)
+                else mzero
+       _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+   where inner x = case innerEnv of
+                        Nothing -> x
+                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
+                                   "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+  skipopts
+  optional blankline
+  res <- manyTill anyTok (end_ name)
+  return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+    ("displaymath", mathEnvWith id Nothing "displaymath")
+  , ("math", math <$> mathEnv "math")
+  , ("equation", mathEnvWith id Nothing "equation")
+  , ("equation*", mathEnvWith id Nothing "equation*")
+  , ("gather", mathEnvWith id (Just "gathered") "gather")
+  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+  , ("multline", mathEnvWith id (Just "gathered") "multline")
+  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+  , ("align", mathEnvWith id (Just "aligned") "align")
+  , ("align*", mathEnvWith id (Just "aligned") "align*")
+  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+  ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+  stylename <- untokenize <$> braced
+  let mbstyle = case stylename of
+                  "plain"      -> Just PlainStyle
+                  "definition" -> Just DefinitionStyle
+                  "remark"     -> Just RemarkStyle
+                  _            -> Nothing
+  case mbstyle of
+    Nothing  -> return ()
+    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+  return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+  number <- option True (False <$ symbol '*' <* sp)
+  name <- untokenize <$> braced
+  sp
+  series <- option Nothing $ Just . untokenize <$> bracketedToks
+  sp
+  showName <- tokWith inline
+  sp
+  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+  sty <- sLastTheoremStyle <$> getState
+  let spec = TheoremSpec { theoremName = showName
+                         , theoremStyle = sty
+                         , theoremSeries = series
+                         , theoremSyncTo = syncTo
+                         , theoremNumber = number
+                         , theoremLastNum = DottedNum [0] }
+  tmap <- sTheoremMap <$> getState
+  updateState $ \s -> s{ sTheoremMap =
+                            M.insert name spec tmap }
+  return mempty
+
+theoremEnvironment :: PandocMonad m
+                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+  tmap <- sTheoremMap <$> getState
+  case M.lookup name tmap of
+    Nothing -> mzero
+    Just tspec -> do
+       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+       mblabel <- option Nothing $ Just . untokenize <$>
+                   try (spaces >> controlSeq "label" >> spaces >> braced)
+       bs <- env name blocks
+       number <-
+         if theoremNumber tspec
+            then do
+               let name' = fromMaybe name $ theoremSeries tspec
+               num <- getNextNumber
+                   (maybe (DottedNum [0]) theoremLastNum .
+                    M.lookup name' . sTheoremMap)
+               updateState $ \s ->
+                 s{ sTheoremMap =
+                       M.adjust
+                       (\spec -> spec{ theoremLastNum = num })
+                       name'
+                       (sTheoremMap s)
+                  }
+
+               case mblabel of
+                 Just ident ->
+                   updateState $ \s ->
+                     s{ sLabels = M.insert ident
+                         (B.toList $
+                           theoremName tspec <> "\160" <>
+                           str (renderDottedNum num)) (sLabels s) }
+                 Nothing -> return ()
+               return $ space <> B.text (renderDottedNum num)
+            else return mempty
+       let titleEmph = case theoremStyle tspec of
+                         PlainStyle      -> B.strong
+                         DefinitionStyle -> B.strong
+                         RemarkStyle     -> B.emph
+       let title = titleEmph (theoremName tspec <> number)
+                      <> optTitle <> "." <> space
+       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+              $ case theoremStyle tspec of
+                  PlainStyle -> walk italicize bs
+                  _          -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+  title <- option (B.text "Proof") opt
+  bs <- env "proof" blocks
+  return $
+    B.divWith ("", ["proof"], []) $
+      addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+  case B.toList bs of
+    (Para xs : rest)
+      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+    _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+  case Seq.viewr (B.unMany bs) of
+    s Seq.:> Para ils
+      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+    _ -> bs <> B.para qedSign
+ where
+  qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,11 +1,12 @@
 {-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -27,11 +28,15 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
+  , getInputTokens
   , untokenize
   , untoken
   , totoks
   , toksToString
   , satisfyTok
+  , parseFromToks
+  , disablingWithRaw
   , doMacros
   , doMacros'
   , setpos
@@ -52,6 +57,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , comment
   , anyTok
   , singleChar
+  , tokWith
   , specialChars
   , endline
   , blankline
@@ -78,6 +84,11 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawopt
   , overlaySpecification
   , getNextNumber
+  , label
+  , setCaption
+  , resetCaption
+  , env
+  , addMeta
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -87,13 +98,15 @@ import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -102,7 +115,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
--- import Debug.Trace
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -151,7 +163,9 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sLabels        :: M.Map Text [Inline]
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
-                            , sExpanded      :: Bool
+                            , sFileContents  :: M.Map Text Text
+                            , sEnableWithRaw :: Bool
+                            , sRawTokens     :: IntMap.IntMap [Tok]
                             }
      deriving Show
 
@@ -176,7 +190,9 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sLabels        = M.empty
                               , sHasChapters   = False
                               , sToggles       = M.empty
-                              , sExpanded      = False
+                              , sFileContents  = M.empty
+                              , sEnableWithRaw = True
+                              , sRawTokens     = IntMap.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -232,21 +248,25 @@ withVerbatimMode parser = do
        updateState $ \st -> st{ sVerbatimMode = False }
        return result
 
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
   let lstate' = lstate { sMacros = extractMacros pstate }
+  let setStartPos = case toks of
+                      Tok pos _ _ : _ -> setPosition pos
+                      _ -> return ()
+  let preparser = setStartPos >> parser
   let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+                            lstate "chunk" toks
   case res' of
        Left _    -> mzero
-       Right toks' -> do
+       Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        doMacros
                                         ts <- many (satisfyTok (const True))
                                         setInput ts
                                       rawparser)
@@ -255,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                let skipTilPos stopPos = do
+                      anyChar
+                      pos <- getPosition
+                      if pos >= stopPos
+                         then return ()
+                         else skipTilPos stopPos
+                skipTilPos endpos
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -268,7 +294,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -279,6 +305,31 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
            Left e   -> Prelude.fail (show e)
            Right s' -> return s'
 
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+>   let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+  pos <- getPosition
+  ss <- getInput
+  return $
+    case ss of
+      Sources [] -> []
+      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
@@ -402,41 +453,62 @@ untoken t = untokenAccum t mempty
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+  oldInput <- getInput
+  setInput toks
+  oldpos <- getPosition
+  case toks of
+     Tok pos _ _ : _ -> setPosition pos
+     _ -> return ()
+  result <- disablingWithRaw parser
+  setInput oldInput
+  setPosition oldpos
+  return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+  oldEnableWithRaw <- sEnableWithRaw <$> getState
+  updateState $ \st -> st{ sEnableWithRaw = False }
+  result <- parser
+  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+  return result
+
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False }
-    return res
+    updateState $ \st ->
+      if sEnableWithRaw st
+         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+         else st
+    return $! res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
         updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
+        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
-  expanded <- sExpanded <$> getState
-  verbatimMode <- sVerbatimMode <$> getState
-  unless (expanded || verbatimMode) $ do
-      getInput >>= doMacros' 1 >>= setInput
-      updateState $ \st -> st{ sExpanded = True }
+  st <- getState
+  unless (sVerbatimMode st) $
+    getInput >>= doMacros' 1 >>= setInput
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
 doMacros' n inp =
   case inp of
      Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
       Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos name ts
+        -> handleMacros n spos name ts <|> return inp
      Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
       Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos ("end" <> name) ts
+        -> handleMacros n spos ("end" <> name) ts <|> return inp
      Tok _ (CtrlSeq "expandafter") _ : t : ts
         -> combineTok t <$> doMacros' n ts
      Tok spos (CtrlSeq name) _ : ts
-        -> handleMacros n spos name ts
+        -> handleMacros n spos name ts <|> return inp
      _ -> return inp
-   <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -482,7 +554,7 @@ doMacros' n inp =
         $ throwError $ PandocMacroLoop name
       macros <- sMacros <$> getState
       case M.lookup name macros of
-           Nothing -> mzero
+           Nothing -> trySpecialMacro name ts
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
@@ -510,6 +582,41 @@ doMacros' n inp =
                    ExpandWhenUsed    -> doMacros' (n' + 1) result
                    ExpandWhenDefined -> return result
 
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+  ts' <- doMacros' 1 ts
+  case ts' of
+    Tok pos Word t : _
+      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+    _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
+trySpecialMacro _ _ = mzero
+
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+  case res' of
+    Left _ -> Prelude.fail "Could not parse conditional"
+    Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+                    *> anyTok)
+  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+                 <|> ([] <$ controlSeq "fi")
+  rest <- getInput
+  return $ (if b then ifToks else elseToks) ++ rest
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+  case T.uncons t of
+       Just (c, _) | isAlphaNum c -> True
+       _           -> False
+
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 
@@ -592,18 +699,22 @@ isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+  satisfyTok $ \case
+     Tok _ Word  t   -> T.length t == 1
+     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t)
+     _               -> False
+
 singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+  singleCharFromWord = do
+    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+    let (t1, t2) = (T.take 1 t, T.drop 1 t)
+    inp <- getInput
+    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+    anyTok
 
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
@@ -646,28 +757,25 @@ grouped parser = try $ do
   -- {{a,b}} should be parsed the same as {a,b}
   try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
-  handleEgroup <|> handleBgroup <|> handleOther
-  where handleEgroup = do
-          t <- symbol '}'
-          if n == 1
-             then return []
-             else (t:) <$> braced' getTok (n - 1)
-        handleBgroup = do
-          t <- symbol '{'
-          (t:) <$> braced' getTok (n + 1)
-        handleOther = do
-          t <- getTok
-          (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+  go n = do
+    t <- getTok
+    case t of
+      Tok _ Symbol "}"
+        | n > 1     -> (t:) <$> go (n - 1)
+        | otherwise -> return []
+      Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+      _ -> (t:) <$> go n
 
 braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
 
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
 
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
@@ -723,16 +831,29 @@ ignore raw = do
 
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
-  inp <- getInput
+  rawTokensMap <- sRawTokens <$> getState
+  let key = case IntMap.lookupMax rawTokensMap of
+               Nothing     -> 0
+               Just (n,_)  -> n + 1
+  -- insert empty list at key
+  updateState $ \st -> st{ sRawTokens =
+                             IntMap.insert key [] $ sRawTokens st }
   result <- parser
-  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
-  let raw = takeWhile (\(Tok pos _ _) -> maybe True
-                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+  raw <- case mbRevToks of
+           Just revtoks -> do
+             updateState $ \st -> st{ sRawTokens =
+                                        IntMap.delete key $ sRawTokens st}
+             return $ reverse revtoks
+           Nothing      ->
+             throwError $ PandocShouldNeverHappenError $
+                "sRawTokens has nothing at key " <> T.pack (show key)
   return (result, raw)
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
@@ -792,7 +913,7 @@ getRawCommand name txt = do
   (_, rawargs) <- withRaw $
       case name of
            "write" -> do
-             void $ satisfyTok isWordTok -- digits
+             void $ many $ satisfyTok isDigitTok -- digits
              void braced
            "titleformat" -> do
              void braced
@@ -807,6 +928,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _              = False
+
 skipopts :: PandocMonad m => LP m ()
 skipopts = skipMany (void overlaySpecification <|> void rawopt)
 
@@ -874,3 +999,35 @@ getNextNumber getCurrentNum = do
                Just n  -> [n, 1]
                Nothing -> [1]
 
+label :: PandocMonad m => LP m ()
+label = do
+  controlSeq "label"
+  t <- braced
+  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+  skipopts
+  ils <- tokWith inline
+  optional $ try $ spaces *> label
+  updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption   = Nothing
+                                      , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+                                 grouped inlineParser
+                            <|> (lookAhead anyControlSeq >> inlineParser)
+                            <|> singleChar'
+  where singleChar' = do
+          Tok _ _ t <- singleChar
+          return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+   st{ sMeta = addMetaField field val $ sMeta st }
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,7 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
-  ( dosi
-  , doSI
-  , doSIrange
-  , doSInum
-  , doSInumlist
-  , doSIang
-  )
+  ( siunitxCommands )
 where
 import Text.Pandoc.Builder
 import Text.Pandoc.Readers.LaTeX.Parsing
@@ -15,14 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
+
+siunitxCommands :: PandocMonad m
+                 => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+  [ ("si", dosi tok)
+  , ("SI", doSI tok)
+  , ("SIrange", doSIrange True tok)
+  , ("numrange", doSIrange False tok)
+  , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
+  , ("num", doSInum)
+  , ("ang", doSIang)
+  ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -57,23 +70,50 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
   parseComma <|>
   parsePlusMinus <|>
+  parsePM <|>
   parseI <|>
   parseExp <|>
   parseX <|>
   parseSpace
  where
-  parseDecimalNum = do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
-    basenum <- (pref <>) . T.pack
-                <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+  parseDecimalNum, parsePlusMinus, parsePM,
+    parseComma, parseI, parseX,
+    parseExp, parseSpace :: Parser Text () Inlines
+  parseDecimalNum = try $ do
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
+    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+    let basenum = pref <> T.pack
+                    (case basenum' of
+                      '.':_ -> '0':basenum'
+                      _ -> basenum')
     uncertainty <- option mempty $ T.pack <$> parseParens
     if T.null uncertainty
        then return $ str basenum
@@ -91,6 +131,7 @@ parseNumPart =
                                                | otherwise -> "." <> t
   parseComma = str "." <$ char ','
   parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
   parseParens =
     char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
   parseI = str "i" <$ char 'i'
@@ -103,11 +144,14 @@ doSIang :: PandocMonad m => LP m Inlines
 doSIang = do
   skipopts
   ps <- T.splitOn ";" . untokenize <$> braced
+  let dropPlus t = case T.uncons t of
+                     Just ('+',t') -> t'
+                     _ -> t
   case ps ++ repeat "" of
     (d:m:s:_) -> return $
-      (if T.null d then mempty else str d <> str "\xb0") <>
-      (if T.null m then mempty else str m <> str "\x2032") <>
-      (if T.null s then mempty else str s <> str "\x2033")
+      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
     _ -> return mempty
 
 -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
@@ -136,40 +180,99 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
-  Tok _ (CtrlSeq name) _ <- anyControlSeq
-  case name of
-    "square" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "2"
-    "cubic" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "3"
-    "raisetothe" -> do
-       n <- tok
-       unit <- siUnit tok
-       return $ unit <> superscript n
-    _ ->
-       case M.lookup name siUnitMap of
-            Just il ->
-              option il $
-                choice
-                 [ (il <> superscript "2") <$ controlSeq "squared"
-                 , (il <> superscript "3") <$ controlSeq "cubed"
-                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
-                 ]
-            Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
-         return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
-         return (str t))
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
+ where
+  siUnitPart :: LP m Inlines
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
+  siPrefix :: LP m (Inlines -> Inlines)
+  siPrefix =
+       (do _ <- controlSeq "square"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubic"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "raisetothe"
+           skipopts
+           n <- walk hyphenToMinus <$> tok
+           return (<> superscript n))
+  siSuffix :: LP m (Inlines -> Inlines)
+  siSuffix =
+       (do _ <- controlSeq "squared"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubed"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "tothe"
+           skipopts
+           n <- walk hyphenToMinus <$> tok
+           return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
+  siBase :: LP m Inlines
+  siBase =
+    ((try
+       (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
+    <|> (do Tok _ Word t <- satisfyTok isWordTok
+            return $ str t)
+     )
+
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
 
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
@@ -269,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -277,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -329,25 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("per", str "/")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..f56728fe1
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,379 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+  ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+                  => LP m Blocks
+                  -> LP m Inlines
+                  -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+  M.fromList
+  [ ("longtable",  env "longtable" $
+          resetCaption *>
+            simpTable blocks inline "longtable" False >>= addTableCaption)
+  , ("table",  env "table" $
+          skipopts *> resetCaption *> blocks >>= addTableCaption)
+  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
+  ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+  spaces
+  controlSeq "hline" <|>
+    (controlSeq "cline" <* braced) <|>
+    -- booktabs rules:
+    controlSeq "toprule" <|>
+    controlSeq "bottomrule" <|>
+    controlSeq "midrule" <|>
+    controlSeq "endhead" <|>
+    controlSeq "endfirsthead"
+  spaces
+  optional rawopt
+  return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+         <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+  inp <- getInput
+  case inp of
+       (Tok spos Word t : rest) ->
+         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+       _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+  let maybeBar = skipMany
+        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+  let cAlign = AlignCenter <$ symbol 'c'
+  let lAlign = AlignLeft <$ symbol 'l'
+  let rAlign = AlignRight <$ symbol 'r'
+  let parAlign = AlignLeft <$ symbol 'p'
+  -- aligns from tabularx
+  let xAlign = AlignLeft <$ symbol 'X'
+  let mAlign = AlignLeft <$ symbol 'm'
+  let bAlign = AlignLeft <$ symbol 'b'
+  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
+                                 <|> xAlign <|> mAlign <|> bAlign )
+  let alignPrefix = symbol '>' >> braced
+  let alignSuffix = symbol '<' >> braced
+  let colWidth = try $ do
+        symbol '{'
+        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+        spaces
+        symbol '}'
+        return $ safeRead ds
+  let alignSpec = do
+        pref <- option [] alignPrefix
+        spaces
+        al <- alignChar
+        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+                                                 pos <- getPosition
+                                                 report $ SkippedContent s pos
+                                                 return Nothing)
+        spaces
+        suff <- option [] alignSuffix
+        return (al, width, (pref, suff))
+  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+        symbol '*'
+        spaces
+        ds <- trim . untokenize <$> braced
+        spaces
+        spec <- braced
+        case safeRead ds of
+             Just n  ->
+               getInput >>= setInput . (mconcat (replicate n spec) ++)
+             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+  bgroup
+  spaces
+  maybeBar
+  aligns' <- many $ try $ spaces >> optional starAlign >>
+                            (alignSpec <* maybeBar)
+  spaces
+  egroup
+  spaces
+  return $ map toSpec aligns'
+  where
+    toColWidth (Just w) | w > 0 = ColWidth w
+    toColWidth _                = ColWidthDefault
+    toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+              => LP m Blocks -- ^ block parser
+              -> LP m Inlines -- ^ inline parser
+              -> Text   -- ^ table environment name
+              -> [([Tok], [Tok])] -- ^ pref/suffixes
+              -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+  notFollowedBy (spaces *> end_ envname)
+  -- contexts that can contain & that is not colsep:
+  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
+      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
+      canContainAmp _       = False
+  -- add prefixes and suffixes in token stream:
+  let celltoks (pref, suff) = do
+        prefpos <- getPosition
+        contents <- mconcat <$>
+            many ( snd <$> withRaw
+                     ((lookAhead (controlSeq "parbox") >>
+                       void blocks) -- #5711
+                      <|>
+                      (lookAhead (satisfyTok canContainAmp) >> void inline)
+                      <|>
+                      (lookAhead (symbol '$') >> void inline))
+                  <|>
+                   (do notFollowedBy
+                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
+                       count 1 anyTok) )
+
+        suffpos <- getPosition
+        option [] (count 1 amp)
+        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+  rawcells <- mapM celltoks prefsufs
+  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+  spaces
+  return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+  spaces
+  updateState $ \st -> st{ sInTableCell = True }
+  cell' <-   multicolumnCell blocks
+         <|> multirowCell blocks
+         <|> parseSimpleCell
+         <|> parseEmptyCell
+  updateState $ \st -> st{ sInTableCell = False }
+  spaces
+  return cell'
+  where
+    -- The parsing of empty cells is important in LaTeX, especially when dealing
+    -- with multirow/multicolumn. See #6603.
+    parseEmptyCell = spaces $> emptyCell
+    parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+  where
+    alignment = do
+      c <- untoken <$> singleChar
+      return $ case c of
+        "l" -> AlignLeft
+        "r" -> AlignRight
+        "c" -> AlignCenter
+        "*" -> AlignDefault
+        _   -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+                [Para ils] -> plain (fromList ils)
+                _          -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+  -- Full prototype for \multirow macro is:
+  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+  -- However, everything except `nrows` and `text` make
+  -- sense in the context of the Pandoc AST
+  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
+  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
+  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
+  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+  let singleCell = do
+        content <- plainify <$> blocks
+        return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+  -- Two possible contents: either a \multirow cell, or content.
+  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+  -- Note that a \multirow cell can be nested in a \multicolumn,
+  -- but not the other way around. See #6603
+  let nestedCell = do
+        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+        return $ cell
+                  alignment
+                  (RowSpan rs)
+                  (ColSpan span')
+                  (fromList bs)
+
+  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+  where
+    rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+  = TableBody attr rhc th' tb'
+  where
+    th' = fixTableRows th
+    tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+  where
+    fixTableRows' oldHang (Row attr cells : rs)
+      = let (newHang, cells') = fixTableRow oldHang cells
+            rs'               = fixTableRows' newHang rs
+        in Row attr cells' : rs'
+    fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+  -- If there's overhang, drop cells until their total width meets the
+  -- width of the occupied grid spaces (or we run out)
+  | (n, prefHang, restHang) <- splitHang oldHang
+  , n > 0
+  = let cells' = dropToWidth getCellW n cells
+        (restHang', cells'') = fixTableRow restHang cells'
+    in (prefHang restHang', cells'')
+  -- Otherwise record the overhang of a pending cell and fix the rest
+  -- of the row
+  | c@(Cell _ _ h w _):cells' <- cells
+  = let h' = max 1 h
+        w' = max 1 w
+        oldHang' = dropToWidth getHangW w' oldHang
+        (newHang, cells'') = fixTableRow oldHang' cells'
+    in (toHang w' h' <> newHang, c : cells'')
+  | otherwise
+  = (oldHang, [])
+  where
+    getCellW (Cell _ _ _ w _) = w
+    getHangW = maybe 1 fst
+    getCS (ColSpan n) = n
+
+    toHang c r
+      | r > 1     = [Just (c, r)]
+      | otherwise = replicate (getCS c) Nothing
+
+    -- Take the prefix of the overhang list representing filled grid
+    -- spaces. Also return the remainder and the length of this prefix.
+    splitHang = splitHang' 0 id
+
+    splitHang' !n l (Just (c, r):xs)
+      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+    splitHang' n l xs = (n, l, xs)
+
+    -- Drop list items until the total width of the dropped items
+    -- exceeds the passed width.
+    dropToWidth _     n l | n < 1 = l
+    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
+    dropToWidth _     _ []        = []
+
+simpTable :: PandocMonad m
+          => LP m Blocks
+          -> LP m Inlines
+          -> Text
+          -> Bool
+          -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+  when hasWidthParameter $ () <$ tokWith inline
+  skipopts
+  colspecs <- parseAligns
+  let (aligns, widths, prefsufs) = unzip3 colspecs
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  skipMany hline
+  spaces
+  header' <- option [] . try . fmap (:[]) $
+             parseTableRow blocks inline envname prefsufs <*
+               lbreak <* many1 hline
+  spaces
+  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+                    (lbreak <* optional (skipMany hline))
+  spaces
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  let th  = fixTableHead $ TableHead nullAttr header'
+  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+  let tf  = TableFoot nullAttr []
+  return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+  where go (Table attr c spec th tb tf) = do
+          st <- getState
+          let mblabel = sLastLabel st
+          capt <- case (sCaption st, mblabel) of
+                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
+                   (Just ils, Just lab) -> do
+                     num <- getNextNumber sLastTableNum
+                     setState
+                       st{ sLastTableNum = num
+                         , sLabels = M.insert lab
+                                    [Str (renderDottedNum num)]
+                                    (sLabels st) }
+                     return $ caption Nothing (plain ils) -- add number??
+                   (Nothing, _)  -> return c
+          let attr' = case (attr, mblabel) of
+                        ((_,classes,kvs), Just ident) ->
+                           (ident,classes,kvs)
+                        _ -> attr
+          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+        go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b       = Div attr [b]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,7 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..2dc7ddf52 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,15 +21,17 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
+import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, takeDirectory)
+import qualified System.FilePath.Windows as Windows
+import qualified System.FilePath.Posix as Posix
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -45,20 +47,22 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
                                  isCommentTag, isInlineTag, isTextTag)
 import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
+
+type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -79,7 +83,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -95,14 +99,12 @@ yamlToRefs :: PandocMonad m
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
-        oldPos <- getPosition
         case mbfp of
           Nothing -> return ()
           Just fp -> setPosition $ initialPos fp
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
-        setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -145,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -247,51 +249,48 @@ titleBlock :: PandocMonad m => MarkdownParser m ()
 titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
 pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
+pandocTitleBlock = do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
-  title <- option mempty titleLine
-  author <- option (return []) authorsLine
-  date <- option mempty dateLine
-  optional blanklines
-  let meta' = do title' <- title
-                 author' <- author
-                 date' <- date
-                 return $
-                     (if null title' then id else B.setMeta "title" title')
-                   . (if null author' then id else B.setMeta "author" author')
-                   . (if null date' then id else B.setMeta "date" date')
-                   $ nullMeta
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
-
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
+  try $ do
+    title <- option mempty titleLine
+    author <- option (return []) authorsLine
+    date <- option mempty dateLine
+    optional blanklines
+    let meta' = do title' <- title
+                   author' <- author
+                   date' <- date
+                   return $
+                       (if null title'
+                           then id
+                           else B.setMeta "title" title')
+                     . (if null author'
+                           then id
+                           else B.setMeta "author" author')
+                     . (if null date'
+                           then id
+                           else B.setMeta "date" date')
+                     $ nullMeta
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+
+yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-              $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+  newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks)
   -- Since `<>` is left-biased, existing values are not touched:
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
   return mempty
 
-stopLine :: PandocMonad m => MarkdownParser m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
+mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
-  firstPair <- kvPair False
-  restPairs <- many (kvPair True)
-  let kvPairs = firstPair : restPairs
-  blanklines
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
-                             return (Meta $ M.fromList kvPairs) }
+  try $ do
+    firstPair <- kvPair False
+    restPairs <- many (kvPair True)
+    let kvPairs = firstPair : restPairs
+    blanklines
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                               return (Meta $ M.fromList kvPairs) }
 
 kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
 kvPair allowEmpty = try $ do
@@ -300,7 +299,7 @@ kvPair allowEmpty = try $ do
           (try $ newline >> lookAhead (blankline <|> nonspaceChar))
   guard $ allowEmpty || not (T.null val)
   let key' = T.concat $ T.words $ T.toLower key
-  let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
+  let val' = MetaInlines $ B.toList $ B.text val
   return (key',val')
 
 parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
@@ -334,10 +333,14 @@ referenceKey = try $ do
                     skipMany spaceChar
                     notFollowedBy' referenceTitle
                     notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+                    notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
+                                     try (spnl <* keyValAttr)
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
-  src <- try betweenAngles <|> sourceURL
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  src <- (if rebase then rebasePath pos else id) <$>
+             (try betweenAngles <|> sourceURL)
   tit <- option "" referenceTitle
   attr   <- option nullAttr $ try $
               do guardEnabled Ext_link_attributes
@@ -346,7 +349,7 @@ referenceKey = try $ do
   addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
                           >> many (try $ spnl >> keyValAttr)
   blanklines
-  let attr'  = extractIdClass $ foldl (\x f -> f x) attr addKvs
+  let attr'  = extractIdClass $ foldl' (\x f -> f x) attr addKvs
       target = (escapeURI $ trimr src, tit)
   st <- getState
   let oldkeys = stateKeys st
@@ -442,7 +445,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks)
 block = do
   res <- choice [ mempty <$ blanklines
                , codeBlockFenced
-               , yamlMetaBlock
+               , yamlMetaBlock'
                -- note: bulletList needs to be before header because of
                -- the possibility of empty list items: -
                , bulletList
@@ -568,7 +571,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +591,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -602,7 +605,7 @@ attributes = try $ do
   spnl
   attrs <- many (attribute <* spnl)
   char '}'
-  return $ foldl (\x f -> f x) nullAttr attrs
+  return $ foldl' (\x f -> f x) nullAttr attrs
 
 attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
 attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
@@ -659,15 +662,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
 codeBlockFenced = try $ do
   indentchars <- nonindentSpaces
   let indentLevel = T.length indentchars
-  c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+  c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
      <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
   size <- blockDelimiter (== c) Nothing
   skipMany spaceChar
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+         ((guardEnabled Ext_fenced_code_attributes >> try attributes)
           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
   blankline
   contents <- T.intercalate "\n" <$>
@@ -732,7 +735,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1028,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1118,6 +1121,7 @@ rawTeXBlock = do
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
   indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
@@ -1131,7 +1135,9 @@ rawHtmlBlocks = do
                  gobbleAtMostSpaces indentlevel
                  notFollowedBy' closer
                  block
-  contents <- mconcat <$> many block'
+  contents <- if selfClosing
+                 then return mempty
+                 else mconcat <$> many block'
   result <-
     try
     (do gobbleAtMostSpaces indentlevel
@@ -1155,11 +1161,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
 --
 
 lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
+lineBlock = do
   guardEnabled Ext_line_blocks
-  lines' <- lineBlockLines >>=
-            mapM (parseFromString' (trimInlinesF <$> inlines))
-  return $ B.lineBlock <$> sequence lines'
+  try $ do
+    lines' <- lineBlockLines >>=
+              mapM (parseFromString' (trimInlinesF <$> inlines))
+    return $ B.lineBlock <$> sequence lines'
 
 --
 -- Tables
@@ -1169,7 +1176,7 @@ lineBlock = try $ do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1238,7 +1245,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1261,11 +1268,12 @@ multilineRow indices = do
 -- Parses a table caption:  inlines beginning with 'Table:'
 -- and followed by blank lines.
 tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
+tableCaption = do
   guardEnabled Ext_table_captions
-  skipNonindentSpaces
-  (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
-  trimInlinesF <$> inlines1 <* blanklines
+  try $ do
+    skipNonindentSpaces
+    (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+    trimInlinesF <$> inlines1 <* blanklines
 
 -- Parse a simple table with '---' header and one line per row.
 simpleTable :: PandocMonad m
@@ -1351,7 +1359,7 @@ pipeTable = try $ do
   lines' <- many pipeTableRow
   let lines'' = map (take (length aligns) <$>) lines'
   let maxlength = maximum $
-       map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
+       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
   numColumns <- getOption readerColumns
   let widths = if maxlength > numColumns
                   then map (\len ->
@@ -1388,7 +1396,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1404,10 +1412,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1434,15 +1446,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks)
 table = try $ do
   frontCaption <- option Nothing (Just <$> tableCaption)
   (aligns, widths, heads, lns) <-
-         try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable False) <|>
-         try (guardEnabled Ext_simple_tables >>
-                (simpleTable True <|> simpleTable False)) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable True) <|>
-         try (guardEnabled Ext_grid_tables >>
-                (gridTable False <|> gridTable True)) <?> "table"
+         (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
+         (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
+         (guardEnabled Ext_simple_tables >>
+                try (simpleTable True <|> simpleTable False)) <|>
+         (guardEnabled Ext_multiline_tables >>
+                try (multilineTable True)) <|>
+         (guardEnabled Ext_grid_tables >>
+                try (gridTable False <|> gridTable True)) <?> "table"
   optional blanklines
   caption <- case frontCaption of
                   Nothing -> option (return mempty) tableCaption
@@ -1476,35 +1487,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
 inlines1 = mconcat <$> many1 inline
 
 inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
-                , bareURL
-                , str
-                , endline
-                , code
-                , strongOrEmph
-                , note
-                , cite
-                , bracketedSpan
-                , link
-                , image
-                , math
-                , strikeout
-                , subscript
-                , superscript
-                , inlineNote  -- after superscript because of ^[link](/foo)^
-                , autoLink
-                , spanHtml
-                , rawHtmlInline
-                , escapedNewline
-                , escapedChar
-                , rawLaTeXInline'
-                , exampleRef
-                , smart
-                , return . B.singleton <$> charRef
-                , emoji
-                , symbol
-                , ltSign
-                ] <?> "inline"
+inline = do
+  c <- lookAhead anyChar
+  ((case c of
+     ' '     -> whitespace
+     '\t'    -> whitespace
+     '\n'    -> endline
+     '`'     -> code
+     '_'     -> strongOrEmph
+     '*'     -> strongOrEmph
+     '^'     -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^
+     '['     -> note <|> cite <|> bracketedSpan <|> link
+     '!'     -> image
+     '$'     -> math
+     '~'     -> strikeout <|> subscript
+     '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
+     '\\'    -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '@'     -> cite <|> exampleRef
+     '"'     -> smart
+     '\''    -> smart
+     '\8216' -> smart
+     '\145'  -> smart
+     '\8220' -> smart
+     '\147'  -> smart
+     '-'     -> smart
+     '.'     -> smart
+     '&'     -> return . B.singleton <$> charRef
+     ':'     -> emoji
+     _       -> mzero)
+   <|> bareURL
+   <|> str
+   <|> symbol) <?> "inline"
 
 escapedChar' :: PandocMonad m => MarkdownParser m Char
 escapedChar' = try $ do
@@ -1515,11 +1528,12 @@ escapedChar' = try $ do
      <|> oneOf "\\`*_{}[]()>#+-.!~\""
 
 escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedNewline = try $ do
+escapedNewline = do
   guardEnabled Ext_escaped_line_breaks
-  char '\\'
-  lookAhead (char '\n') -- don't consume the newline (see #3730)
-  return $ return B.linebreak
+  try $ do
+    char '\\'
+    lookAhead (char '\n') -- don't consume the newline (see #3730)
+    return $ return B.linebreak
 
 escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
 escapedChar = do
@@ -1541,19 +1555,20 @@ ltSign = do
 -- whole document has been parsed.  But we need this parser
 -- here in case citations is disabled.
 exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
+exampleRef = do
   guardEnabled Ext_example_lists
-  char '@'
-  lab <- mconcat . map T.pack <$>
-                    many (many1 alphaNum <|>
-                          try (do c <- char '_' <|> char '-'
-                                  cs <- many1 alphaNum
-                                  return (c:cs)))
-  return $ do
-    st <- askF
-    return $ case M.lookup lab (stateExamples st) of
-                  Just n  -> B.str $ tshow n
-                  Nothing -> B.str $ "@" <> lab
+  try $ do
+    char '@'
+    lab <- mconcat . map T.pack <$>
+                      many (many1 alphaNum <|>
+                            try (do c <- char '_' <|> char '-'
+                                    cs <- many1 alphaNum
+                                    return (c:cs)))
+    return $ do
+      st <- askF
+      return $ case M.lookup lab (stateExamples st) of
+                    Just n  -> B.str $ tshow n
+                    Nothing -> B.str $ "@" <> lab
 
 symbol :: PandocMonad m => MarkdownParser m (F Inlines)
 symbol = do
@@ -1580,10 +1595,10 @@ code = try $ do
                   >> count (length starts) (char '`')
                   >> notFollowedBy (char '`'))
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+         (guardEnabled Ext_inline_code_attributes >> try attributes))
   return $ return $
     case rawattr of
          Left syn   -> B.rawInline syn result
@@ -1676,38 +1691,40 @@ strikeout = fmap B.strikeout <$>
           strikeEnd   = try $ string "~~"
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript = do
   guardEnabled Ext_superscript
-  char '^'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '^'))
+  fmap B.superscript <$> try (do
+    char '^'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '^'))
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript = do
   guardEnabled Ext_subscript
-  char '~'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '~'))
+  fmap B.subscript <$> try (do
+    char '~'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '~'))
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
@@ -1790,15 +1807,16 @@ link = try $ do
   regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
 
 bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
+bracketedSpan = do
   guardEnabled Ext_bracketed_spans
-  (lab,_) <- reference
-  attr <- attributes
-  return $ if isSmallCaps attr
-              then B.smallcaps <$> lab
-              else if isUnderline attr
-                      then B.underline <$> lab
-                      else B.spanWith attr <$> lab
+  try $ do
+    (lab,_) <- reference
+    attr <- attributes
+    return $ if isSmallCaps attr
+                then B.smallcaps <$> lab
+                else if isUnderline attr
+                        then B.underline <$> lab
+                        else B.spanWith attr <$> lab
 
 -- | We treat a span as SmallCaps if class is "smallcaps" (and
 -- no other attributes are set or if style is "font-variant:small-caps"
@@ -1825,9 +1843,12 @@ regLink :: PandocMonad m
         -> MarkdownParser m (F Inlines)
 regLink constructor lab = try $ do
   (src, tit) <- source
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
+  let src' = if rebase then rebasePath pos src else src
   attr <- option nullAttr $
           guardEnabled Ext_link_attributes >> attributes
-  return $ constructor attr src tit <$> lab
+  return $ constructor attr src' tit <$> lab
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: PandocMonad m
@@ -1867,7 +1888,8 @@ referenceLink constructor (lab, raw) = do
                    Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
                    Nothing              -> makeFallback
             else makeFallback
-       Just ((src,tit), attr) -> constructor attr src tit <$> lab
+       Just ((src,tit), attr) ->
+           constructor attr src tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
@@ -1877,12 +1899,13 @@ dropBrackets = dropRB . dropLB
         dropLB xs                          = xs
 
 bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
+bareURL = do
   guardEnabled Ext_autolink_bare_uris
   getState >>= guard . stateAllowLinks
-  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
-  return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
+  try $ do
+    (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
+    notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
+    return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
 
 autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
 autoLink = try $ do
@@ -1899,15 +1922,33 @@ autoLink = try $ do
   return $ return $ B.linkWith attr (src <> escapeURI extra) ""
                      (B.str $ orig <> extra)
 
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position.  Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+  let fp = sourceName pos
+      isFragment = T.take 1 path == "#"
+      path' = T.unpack path
+      isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path'
+   in if T.null path || isFragment || isAbsolutePath || isURI path
+         then path
+         else
+           case takeDirectory fp of
+             ""  -> path
+             "." -> path
+             d   -> T.pack d <> "/" <> path
+
 image :: PandocMonad m => MarkdownParser m (F Inlines)
 image = try $ do
   char '!'
   (lab,raw) <- reference
   defaultExt <- getOption readerDefaultImageExtension
-  let constructor attr' src = case takeExtension (T.unpack src) of
-                          "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
-                                                          $ T.unpack defaultExt)
-                          _  -> B.imageWith attr' src
+  let constructor attr' src =
+         case takeExtension (T.unpack src) of
+            "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+                                            $ T.unpack defaultExt)
+            _  -> B.imageWith attr' src
   regLink constructor lab <|> referenceLink constructor (lab,raw)
 
 note :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1935,23 +1976,25 @@ note = try $ do
           return $ B.note $ walk adjustCite contents'
 
 inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
+inlineNote = do
   guardEnabled Ext_inline_notes
-  char '^'
-  updateState $ \st -> st{ stateInNote = True
-                         , stateNoteNumber = stateNoteNumber st + 1 }
-  contents <- inlinesInBalancedBrackets
-  updateState $ \st -> st{ stateInNote = False }
-  return $ B.note . B.para <$> contents
+  try $ do
+    char '^'
+    updateState $ \st -> st{ stateInNote = True
+                           , stateNoteNumber = stateNoteNumber st + 1 }
+    contents <- inlinesInBalancedBrackets
+    updateState $ \st -> st{ stateInNote = False }
+    return $ B.note . B.para <$> contents
 
 rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
+rawLaTeXInline' = do
   guardEnabled Ext_raw_tex
   notFollowedBy' rawConTeXtEnvironment
-  s <- rawLaTeXInline
-  return $ return $ B.rawInline "tex" s -- "tex" because it might be context
+  try $ do
+    s <- rawLaTeXInline
+    return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1960,7 +2003,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
@@ -1968,55 +2011,60 @@ inBrackets parser = do
   return $ "[" <> contents <> "]"
 
 spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
+spanHtml = do
   guardEnabled Ext_native_spans
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
-  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
-  let ident = fromMaybe "" $ lookup "id" attrs
-  let classes = maybe [] T.words $ lookup "class" attrs
-  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ if isSmallCaps (ident, classes, keyvals)
-              then B.smallcaps <$> contents
-              else if isUnderline (ident, classes, keyvals)
-                      then B.underline <$> contents
-                      else B.spanWith (ident, classes, keyvals) <$> contents
+  try $ do
+    (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+    contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
+    let ident = fromMaybe "" $ lookup "id" attrs
+    let classes = maybe [] T.words $ lookup "class" attrs
+    let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+    return $ if isSmallCaps (ident, classes, keyvals)
+                then B.smallcaps <$> contents
+                else if isUnderline (ident, classes, keyvals)
+                        then B.underline <$> contents
+                        else B.spanWith (ident, classes, keyvals) <$> contents
 
 divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
+divHtml = do
   guardEnabled Ext_native_divs
-  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-  -- we set stateInHtmlBlock so that closing tags that can be either block or
-  -- inline will not be parsed as inline tags
-  oldInHtmlBlock <- stateInHtmlBlock <$> getState
-  updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
-  bls <- option "" (blankline >> option "" blanklines)
-  contents <- mconcat <$>
-              many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
-  closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
-  if closed
-     then do
-       updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
-       let ident = fromMaybe "" $ lookup "id" attrs
-       let classes = maybe [] T.words $ lookup "class" attrs
-       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-       return $ B.divWith (ident, classes, keyvals) <$> contents
-     else -- avoid backtracing
-       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+  try $ do
+    (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+    -- we set stateInHtmlBlock so that closing tags that can be either block
+    -- or inline will not be parsed as inline tags
+    oldInHtmlBlock <- stateInHtmlBlock <$> getState
+    updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+    bls <- option "" (blankline >> option "" blanklines)
+    contents <- mconcat <$>
+                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+    closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
+    if closed
+       then do
+         updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+         let ident = fromMaybe "" $ lookup "id" attrs
+         let classes = maybe [] T.words $ lookup "class" attrs
+         let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+         return $ B.divWith (ident, classes, keyvals) <$> contents
+       else -- avoid backtracing
+         return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
 
 divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-divFenced = try $ do
+divFenced = do
   guardEnabled Ext_fenced_divs
-  string ":::"
-  skipMany (char ':')
-  skipMany spaceChar
-  attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
-  skipMany spaceChar
-  skipMany (char ':')
-  blankline
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
-  bs <- mconcat <$> manyTill block divFenceEnd
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
-  return $ B.divWith attribs <$> bs
+  try $ do
+    string ":::"
+    skipMany (char ':')
+    skipMany spaceChar
+    attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+    skipMany spaceChar
+    skipMany (char ':')
+    blankline
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+    bs <- mconcat <$> manyTill block divFenceEnd
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+    return $ B.divWith attribs <$> bs
 
 divFenceEnd :: PandocMonad m => MarkdownParser m ()
 divFenceEnd = try $ do
@@ -2048,14 +2096,15 @@ emojiChars :: [Char]
 emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
 
 emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
+emoji = do
   guardEnabled Ext_emoji
-  char ':'
-  emojikey <- many1Char (oneOf emojiChars)
-  char ':'
-  case emojiToInline emojikey of
-    Just i -> return (return $ B.singleton i)
-    Nothing -> mzero
+  try $ do
+    char ':'
+    emojikey <- many1Char (oneOf emojiChars)
+    char ':'
+    case emojiToInline emojikey of
+      Just i -> return (return $ B.singleton i)
+      Nothing -> mzero
 
 -- Citations
 
@@ -2074,7 +2123,7 @@ cite = do
 
 textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
 textualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey True
   -- If this is a reference to an earlier example list item,
   -- then don't parse it as a citation.  If the example list
   -- item comes later, we'll parse it here and figure out in
@@ -2154,7 +2203,7 @@ prefix = trimInlinesF . mconcat <$>
   manyTill inline (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
-                   citeKey
+                   citeKey True
                    return ']'))
 
 citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@@ -2163,7 +2212,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
 citation :: PandocMonad m => MarkdownParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey True
   suff <- suffix
   noteNum <- stateNoteNumber <$> getState
   return $ do
@@ -2182,28 +2231,30 @@ citation = try $ do
 smart :: PandocMonad m => MarkdownParser m (F Inlines)
 smart = do
   guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [apostrophe, dash, ellipses])
+  doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|>
+    (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses)
 
 singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
+singleQuoted = do
   singleQuoteStart
-  withQuoteContext InSingleQuote $
+  (try (withQuoteContext InSingleQuote $
     fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline singleQuoteEnd
+      many1Till inline singleQuoteEnd))
+    <|> (return (return (B.str "\8217")))
 
 -- doubleQuoted will handle regular double-quoted sections, as well
 -- as dialogues with an open double-quote without a close double-quote
 -- in the same paragraph.
 doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
+doubleQuoted = do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $
+  (try (withQuoteContext InDoubleQuote $
     fmap B.doubleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline doubleQuoteEnd
+      many1Till inline doubleQuoteEnd))
+    <|> (return (return (B.str "\8220")))
 
 toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
 
 toHeaderRow :: [Blocks] -> [Row]
-toHeaderRow l = [toRow l | not (null l)]
+toHeaderRow l = [toRow l | not (null l) && not (all null l)]
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e8985ab2c..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
@@ -112,12 +113,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
 isBlockTag' :: Tag Text -> Bool
 isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
+isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
 isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
 isBlockTag' tag = isBlockTag tag
 
 isInlineTag' :: Tag Text -> Bool
 isInlineTag' (TagComment _) = True
+isInlineTag' (TagClose "ref") = False -- see below inlineTag
 isInlineTag' t              = not (isBlockTag' t)
 
 eitherBlockOrInline :: [Text]
@@ -554,11 +557,17 @@ variable = try $ do
   contents <- manyTillChar anyChar (try $ string "}}}")
   return $ "{{{" <> contents <> "}}}"
 
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain bs =
+  case B.toList bs of
+    [Para ils] -> B.fromList [Plain ils]
+    _ -> bs
+
 inlineTag :: PandocMonad m => MWParser m Inlines
 inlineTag = do
   (tag, _) <- lookAhead $ htmlTag isInlineTag'
   case tag of
-       TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+       TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
        TagOpen "nowiki" _ -> try $ do
           (_,raw) <- htmlTag (~== tag)
           if T.any (== '/') raw
@@ -678,19 +687,17 @@ url = do
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
 inlinesBetween start end =
-  trimInlines . mconcat <$> try (start >> many1Till inner end)
-    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace <* notFollowedBy' end
+  trimInlines . mconcat <$> try (start >> many1Till inline end)
 
 emph :: PandocMonad m => MWParser m Inlines
 emph = B.emph <$> nested (inlinesBetween start end)
-    where start = sym "''" >> lookAhead nonspaceChar
+    where start = sym "''"
           end   = try $ notFollowedBy' (() <$ strong) >> sym "''"
 
 strong :: PandocMonad m => MWParser m Inlines
 strong = B.strong <$> nested (inlinesBetween start end)
-    where start = sym "'''" >> lookAhead nonspaceChar
-          end   = try $ sym "'''"
+    where start = sym "'''"
+          end   = sym "'''"
 
 doubleQuotes :: PandocMonad m => MWParser m Inlines
 doubleQuotes = do
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..cbc523b25 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Metadata
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
 module Text.Pandoc.Readers.Metadata (
   yamlBsToMeta,
   yamlBsToRefs,
+  yamlMetaBlock,
   yamlMap ) where
 
 import Control.Monad
@@ -30,11 +31,13 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
+import qualified Data.Text.Lazy as TL
+import qualified Text.Pandoc.UTF8 as UTF8
 
-yamlBsToMeta :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text ParserState m (F Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -42,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do
        Right [] -> return . return $ mempty
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
+       -- the following is what we get from a comment:
+       Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
+                -> return . return $ mempty
        Right _  -> Prelude.fail "expected YAML object"
        Left (yamlpos, err')
                 -> do pos <- getPosition
@@ -63,11 +69,11 @@ lookupYAML t (YAML.Mapping _ _ m) =
 lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
-yamlBsToRefs :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text ParserState m (F [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -95,8 +101,12 @@ yamlBsToRefs pMetaValue idpred bstr =
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
        Right _  -> Prelude.fail "expecting YAML object"
-       Left (_pos, err')
-                -> Prelude.fail err'
+       Left (yamlpos, err')
+                -> do pos <- getPosition
+                      setPosition $ incSourceLine
+                            (setSourceColumn pos (YE.posColumn yamlpos))
+                            (YE.posLine yamlpos - 1)
+                      Prelude.fail err'
 
 
 nodeToKey :: YAML.Node YE.Pos -> Maybe Text
@@ -104,10 +114,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t))       = Just t
 nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
-normalizeMetaValue :: PandocMonad m
-                   => ParserT Text ParserState m (F MetaValue)
+normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text ParserState m (F MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -129,10 +139,10 @@ checkBoolean t
   | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
   | otherwise = Nothing
 
-yamlToMetaValue :: PandocMonad m
-                => ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text ParserState m (F MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -152,10 +162,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
   fmap MetaMap <$> yamlMap pMetaValue o
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
-yamlMap :: PandocMonad m
-        => ParserT Text ParserState m (F MetaValue)
+yamlMap :: (PandocMonad m, HasLastStrPosition st)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -171,3 +181,20 @@ yamlMap pMetaValue o = do
       return $ do
         v' <- fv
         return (k, v')
+
+-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+stopLine :: Monad m => ParserT Sources st m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b4eea9d3a..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,19 +36,20 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Parsing
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -13,20 +13,21 @@ Conversion of OPML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
-import Data.Char (toUpper)
 import Data.Default
-import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import qualified Data.Text.Lazy as TL
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.XML.Light
+import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
 
@@ -46,42 +47,27 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock ns)
+                 def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
     setAuthors (opmlDocAuthors st') $
     setDate (opmlDocDate st') $
     doc $ mconcat bs
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
 -- exceptT = either throwError return
@@ -105,11 +91,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
 parseBlock (Elem e) =
   case qName (elName e) of
         "ownerName"    -> mempty <$ modify (\st ->
-                              st{opmlDocAuthors = [text $ textContent e]})
+                              st{opmlDocAuthors = [text $ strContent e]})
         "dateModified" -> mempty <$ modify (\st ->
-                              st{opmlDocDate = text $ textContent e})
+                              st{opmlDocDate = text $ strContent e})
         "title"        -> mempty <$ modify (\st ->
-                              st{opmlDocTitle = text $ textContent e})
+                              st{opmlDocTitle = text $ strContent e})
         "outline" -> gets opmlSectionLevel >>= sect . (+1)
         "?xml"  -> return mempty
         _       -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 7b8bfd4b5..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt
@@ -15,7 +14,7 @@ Entry point to the odt reader.
 module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
+import Text.Pandoc.XML.Light
 
 import qualified Data.ByteString.Lazy as B
 
@@ -23,6 +22,8 @@ import System.FilePath
 
 import Control.Monad.Except (throwError)
 
+import qualified Data.Text as T
+
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -60,35 +61,37 @@ readOdt' _ bytes = bytesToOdt bytes-- of
 bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
 bytesToOdt bytes = case toArchiveOrFail bytes of
   Right archive -> archiveToOdt archive
-  Left _        -> Left $ PandocParseError "Couldn't parse odt file."
+  Left err      -> Left $ PandocParseError
+                        $ "Could not unzip ODT: " <> T.pack err
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
-  | Just contentEntry      <- findEntryByPath "content.xml" archive
-  , Just stylesEntry       <- findEntryByPath "styles.xml"  archive
-  , Just contentElem       <- entryToXmlElem contentEntry
-  , Just stylesElem        <- entryToXmlElem stylesEntry
-  , Right styles           <- chooseMax (readStylesAt stylesElem )
-                                        (readStylesAt contentElem)
-  , media                  <- filteredFilesFromArchive archive filePathIsOdtMedia
-  , startState             <- readerState styles media
-  , Right pandocWithMedia  <- runConverter' read_body
-                                            startState
-                                            contentElem
-
-  = Right pandocWithMedia
-
-  | otherwise
-    -- Not very detailed, but I don't think more information would be helpful
-  = Left $ PandocParseError "Couldn't parse odt file."
-    where
-      filePathIsOdtMedia :: FilePath -> Bool
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
+      onFailure _   (Just x) = Right x
+  contentEntry <- onFailure "Could not find content.xml"
+                   (findEntryByPath "content.xml" archive)
+  stylesEntry <- onFailure "Could not find styles.xml"
+                   (findEntryByPath "styles.xml" archive)
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
+               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
+  let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
         let (dir, name) = splitFileName fp
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
+  let media = filteredFilesFromArchive archive filePathIsOdtMedia
+  let startState = readerState styles media
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
+    (runConverter' read_body startState contentElem)
 
 
 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
 import Control.Arrow
 import qualified Control.Category as Cat
 import Control.Monad
-
+import Data.List (foldl')
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 import Text.Pandoc.Readers.Odt.Generic.Fallible
 
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
 iterateSL :: (Foldable f, MonadPlus m)
           => ArrowState s    x     y
           -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
   where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
 
 
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE Arrows            #-}
+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE DeriveFoldable    #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE PatternGuards     #-}
@@ -29,14 +30,14 @@ import Control.Monad ((<=<))
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
 
 import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import Text.Pandoc.Builder hiding (underline)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
 
 type BlockMatcher  = ElementMatcher Blocks
 
-
-newtype FirstMatch a = FirstMatch (Option (First a))
-                     deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+  deriving (Foldable, Monoid, Semigroup)
 
 firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
 
 --
 matchingElement :: (Monoid e)
@@ -557,7 +556,7 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                        >>?% mappend
     --
     extractText     :: XML.Content -> Fallible T.Text
-    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -577,7 +576,10 @@ read_spaces       = matchingElement NsText "s" (
 read_line_break  :: InlineMatcher
 read_line_break   = matchingElement NsText "line-break"
                     $ returnV linebreak
-
+--
+read_tab         :: InlineMatcher
+read_tab          = matchingElement NsText "tab"
+                    $ returnV space
 --
 read_span        :: InlineMatcher
 read_span         = matchingElement NsText "span"
@@ -585,6 +587,7 @@ read_span         = matchingElement NsText "span"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -604,6 +607,7 @@ read_paragraph    = matchingElement NsText "p"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -630,6 +634,7 @@ read_header       = matchingElement NsText "h"
   children <- ( matchChildContent [ read_span
                                   , read_spaces
                                   , read_line_break
+                                  , read_tab
                                   , read_link
                                   , read_note
                                   , read_citation
@@ -777,14 +782,14 @@ read_frame_img =
       ""   -> returnV mempty -< ()
       src' -> do
         let exts = extensionsFromList [Ext_auto_identifiers]
-        resource   <- lookupResource                          -< src'
+        resource   <- lookupResource                          -< T.unpack src'
         _          <- updateMediaWithResource                 -< resource
         w          <- findAttrText' NsSVG "width"             -< ()
         h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +809,8 @@ read_frame_mathml =
     case fold src of
       ""   -> returnV mempty -< ()
       src' -> do
-        let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+        let path = T.unpack $
+                    fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
         (_, mathml) <- lookupResource -< path
         case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,15 +61,15 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
+import           Data.List (foldl')
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
-
 import           Text.Pandoc.Readers.Odt.Generic.Namespaces
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -292,7 +293,7 @@ readNSattributes         = fromState $ \state -> maybe (state, failEmpty     )
                          => XMLConverterState nsID extraState
                          -> Maybe (XMLConverterState nsID extraState)
     extractNSAttrs startState
-                         = foldl (\state d -> state >>= addNS d)
+                         = foldl' (\state d -> state >>= addNS d)
                                  (Just startState)
                                  nsAttribs
       where nsAttribs    = mapMaybe readNSattr (XML.elAttribs element)
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt.Namespaces
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
 module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
                                           ) where
 
-import Data.List (isPrefixOf)
 import qualified Data.Map as M (empty, insert)
 import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Pandoc.Readers.Odt.Generic.Namespaces
 
 
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
 
 
 findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
 
 nsIDmap :: NameSpaceIRIs Namespace
 nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
                  -- Core XML (basically only for the 'id'-attribute)
                | NsXML
                  -- Fallback
-               | NsOther String
+               | NsOther Text
   deriving ( Eq, Ord, Show )
 
 -- | Not the actual iri's, but large prefixes of them - this way there are
 -- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
 nsIDs = [
   ("urn:oasis:names:tc:opendocument:xmlns:animation"        , NsAnim         ),
   ("urn:oasis:names:tc:opendocument:xmlns:chart"            , NsChart        ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE Arrows          #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE TupleSections   #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.StyleReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -43,14 +44,16 @@ import Control.Arrow
 
 import Data.Default
 import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
 import qualified Data.Map as M
 import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
 import qualified Data.Set as S
 
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
 
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 
@@ -90,7 +93,7 @@ instance Default FontPitch where
 --
 -- Thus, we want
 
-type FontFaceName = String
+type FontFaceName = Text
 
 type FontPitches = M.Map FontFaceName FontPitch
 
@@ -117,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
                               &&&
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
-                    >>?^ ( M.fromList . foldl accumLegalPitches [] )
+                    >>?^ ( M.fromList . foldl' accumLegalPitches [] )
                   ) `ifFailedDo` returnV (Right M.empty)
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
@@ -151,7 +154,7 @@ findPitch =     ( lookupAttr NsStyle "font-pitch"
 -- Definitions of main data
 --------------------------------------------------------------------------------
 
-type StyleName        = String
+type StyleName        = Text
 
 -- | There are two types of styles: named styles with a style family and an
 -- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
   -- \^ simpler, but in general less efficient
 
 data ListLevelStyle = ListLevelStyle { listLevelType  :: ListLevelType
-                                     , listItemPrefix :: Maybe String
-                                     , listItemSuffix :: Maybe String
+                                     , listItemPrefix :: Maybe Text
+                                     , listItemSuffix :: Maybe Text
                                      , listItemFormat :: ListItemNumberFormat
                                      , listItemStart  :: Int
                                      }
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
   show ListLevelStyle{..} =    "<LLS|"
                             ++ show listLevelType
                             ++ "|"
-                            ++ maybeToString listItemPrefix
+                            ++ maybeToString (T.unpack <$> listItemPrefix)
                             ++ show listItemFormat
-                            ++ maybeToString listItemSuffix
+                            ++ maybeToString (T.unpack <$> listItemSuffix)
                             ++ ">"
     where maybeToString = fromMaybe ""
 
@@ -471,7 +474,7 @@ readTextProperties =
      )
   where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
         isFontBold = ("normal",False):("bold",True)
-                    :map ((,True).show) ([100,200..900]::[Int])
+                    :map ((,True) . tshow) ([100,200..900]::[Int])
 
 readUnderlineMode     :: StyleReaderSafe _x (Maybe UnderlineMode)
 readUnderlineMode     = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
 readStrikeThroughMode = readLineMode "text-line-through-mode"
                                      "text-line-through-style"
 
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
 readLineMode modeAttr styleAttr = proc x -> do
   isUL <- searchAttr  NsStyle styleAttr False isLinePresent -< x
   mode <- lookupAttr' NsStyle  modeAttr                     -< x
@@ -560,12 +563,13 @@ readListLevelStyle levelType =      readAttr NsText "level"
 
 --
 chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
-                                    | otherwise    = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
   where
-   select ( ListLevelStyle       t1            p1          s1          f1          b1 )
-          ( ListLevelStyle       t2            p2          s2          f2          _ )
-        =   ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+   select l Nothing = Just l
+   select ( ListLevelStyle t1 p1 s1 f1 b1 )
+          ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+        =   Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+                                  (selectLinf f1 f2) b1
    select' LltNumbered _           = LltNumbered
    select' _           LltNumbered = LltNumbered
    select' _           _           = LltBullet
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.BlockStarts
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f2e8b1ab6..f18d2f9a7 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,9 +1,10 @@
 {-# LANGUAGE FlexibleContexts  #-}
-{-# LANGUAGE RecordWildCards   #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards   #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -38,10 +39,12 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
-
+import Data.List.NonEmpty (nonEmpty)
+import System.FilePath
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
+import Text.Pandoc.Sources (ToSources(..))
 
 --
 -- parsing blocks
@@ -294,24 +297,22 @@ verseBlock blockType = try $ do
 codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
 codeBlock blockAttrs blockType = do
   skipSpaces
-  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  content           <- rawBlockContent blockType
-  resultsContent    <- option mempty babelResultsBlock
-  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
-  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
-  let labelledBlck   = maybe (pure codeBlck)
-                             (labelDiv codeBlck)
-                             (blockAttrCaption blockAttrs)
+  (classes, kv)  <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  content        <- rawBlockContent blockType
+  resultsContent <- option mempty babelResultsBlock
+  let identifier = fromMaybe mempty $ blockAttrName blockAttrs
+  let codeBlk    = B.codeBlockWith (identifier, classes, kv) content
+  let wrap       = maybe pure addCaption (blockAttrCaption blockAttrs)
   return $
-    (if exportsCode kv    then labelledBlck   else mempty) <>
+    (if exportsCode kv    then wrap codeBlk   else mempty) <>
     (if exportsResults kv then resultsContent else mempty)
  where
-   labelDiv :: Blocks -> F Inlines -> F Blocks
-   labelDiv blk value =
-     B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+   addCaption :: F Inlines -> Blocks -> F Blocks
+   addCaption caption blk = B.divWith ("", ["captioned-content"], [])
+                         <$> (mkCaptionBlock caption <> pure blk)
 
-   labelledBlock :: F Inlines -> F Blocks
-   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+   mkCaptionBlock :: F Inlines -> F Blocks
+   mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain)
 
    exportsResults :: [(Text, Text)] -> Bool
    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
@@ -527,7 +528,9 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFileF blocksParser ["."] filename
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile blocksParser toSources
+                     [currentDir] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
@@ -543,8 +546,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         -- CAVE: partial function in else
-                         curMin = if null levels then 0 else minimum levels
+                         curMin = maybe 0 minimum $ nonEmpty levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
@@ -852,16 +854,52 @@ definitionListItem parseIndentedMarker = try $ do
    definitionMarker =
      spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
 
+-- | Checkbox for tasks.
+data Checkbox
+  = UncheckedBox
+  | CheckedBox
+  | SemicheckedBox
+
+-- | Parses a checkbox in a plain list.
+checkbox :: PandocMonad m
+         => OrgParser m Checkbox
+checkbox = do
+  guardEnabled Ext_task_lists
+  try (char '[' *> status <* char ']') <?> "checkbox"
+  where
+    status = choice
+      [ UncheckedBox   <$ char ' '
+      , CheckedBox     <$ char 'X'
+      , SemicheckedBox <$ char '-'
+      ]
+
+checkboxToInlines :: Checkbox -> Inline
+checkboxToInlines = B.Str . \case
+  UncheckedBox   -> "☐"
+  SemicheckedBox -> "☐"
+  CheckedBox     -> "☒"
+
 -- | parse raw text for one list item
 listItem :: PandocMonad m
          => OrgParser m Int
          -> OrgParser m (F Blocks)
 listItem parseIndentedMarker = try . withContext ListItemState $ do
   markerLength <- try parseIndentedMarker
+  box <- optionMaybe checkbox
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   rest <- T.concat <$> many (listContinuation markerLength)
-  parseFromString blocks $ firstLine <> blank <> rest
+  contents <- parseFromString blocks $ firstLine <> blank <> rest
+  return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
+
+-- | Prepend inlines to blocks, adding them to the first paragraph or
+-- creating a new Plain element if necessary.
+prependInlines :: Inline -> Blocks -> Blocks
+prependInlines inlns = B.fromList . prepend . B.toList
+  where
+    prepend (Plain is : bs) = Plain (inlns : Space : is) : bs
+    prepend (Para  is : bs) = Para  (inlns : Space : is) : bs
+    prepend bs              = Plain [inlns, Space] : bs
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ExportSettings
-   Copyright   : © 2016–2020 Albert Krewinkel
+   Copyright   : © 2016-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b234bee58..6862dd71e 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Inlines
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -262,7 +263,7 @@ berkeleyCitationList = try $ do
  where
    citationListPart :: PandocMonad m => OrgParser m (F Inlines)
    citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
-     notFollowedBy' citeKey
+     notFollowedBy' $ citeKey False
      notFollowedBy (oneOf ";]")
      inline
 
@@ -277,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
 
 berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
 berkeleyTextualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey False
   returnF . return $ Citation
     { citationId      = key
     , citationPrefix  = mempty
@@ -322,7 +323,7 @@ linkLikeOrgRefCite = try $ do
 -- from the `org-ref-cite-re` variable in `org-ref.el`.
 orgRefCiteKey :: PandocMonad m => OrgParser m Text
 orgRefCiteKey =
-  let citeKeySpecialChars = "-_:\\./," :: String
+  let citeKeySpecialChars = "-_:\\./" :: String
       isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
       isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
       endOfCitation = try $ do
@@ -350,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
 citation :: PandocMonad m => OrgParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey False
   suff <- suffix
   return $ do
     x <- pref
@@ -367,7 +368,7 @@ citation = try $ do
       }
  where
    prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+            manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
    suffix = try $ do
      hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
      skipSpaces
@@ -477,17 +478,17 @@ linkToInlinesF linkStr =
 
 internalLink :: Text -> Inlines -> F Inlines
 internalLink link title = do
-  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
-  if anchorB
+  ids <- asksF orgStateAnchorIds
+  if link `elem` ids
     then return $ B.link ("#" <> link) "" title
-    else return $ B.emph title
+    else let attr' = ("", ["spurious-link"] , [("target", link)])
+         in return $ B.spanWith attr' (B.emph title)
 
 -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
 -- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
 -- @org-target-regexp@, which is fairly liberal.  Since no link is created if
 -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
 -- an anchor.
-
 anchor :: PandocMonad m => OrgParser m (F Inlines)
 anchor =  try $ do
   anchorId <- parseAnchor
@@ -501,7 +502,6 @@ anchor =  try $ do
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
-
 solidify :: Text -> Text
 solidify = T.map replaceSpecialChar
  where replaceSpecialChar c
@@ -573,7 +573,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines)
 underline = fmap B.underline    <$> emphasisBetween '_'
 
 verbatim  :: PandocMonad m => OrgParser m (F Inlines)
-verbatim  = return . B.code     <$> verbatimBetween '='
+verbatim  = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
 
 code      :: PandocMonad m => OrgParser m (F Inlines)
 code      = return . B.code     <$> verbatimBetween '~'
@@ -803,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..a1b21046a 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Meta
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -239,7 +239,7 @@ lineOfInlines = do
 todoSequence :: Monad m => OrgParser m TodoSequence
 todoSequence = try $ do
   todoKws <- todoKeywords
-  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+  doneKws <- optionMaybe $ todoDoneSep *> doneKeywords
   newline
   -- There must be at least one DONE keyword. The last TODO keyword is
   -- taken if necessary.
@@ -250,11 +250,17 @@ todoSequence = try $ do
                     (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
 
  where
+   todoKeyword :: Monad m => OrgParser m Text
+   todoKeyword = many1Char nonspaceChar <* skipSpaces
+
    todoKeywords :: Monad m => OrgParser m [Text]
    todoKeywords = try $
-     let keyword = many1Char nonspaceChar <* skipSpaces
-         endOfKeywords = todoDoneSep <|> void newline
-     in manyTill keyword (lookAhead endOfKeywords)
+     let endOfKeywords = todoDoneSep <|> void newline
+     in manyTill todoKeyword (lookAhead endOfKeywords)
+
+   doneKeywords :: Monad m => OrgParser m [Text]
+   doneKeywords = try $
+     manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline)
 
    todoDoneSep :: Monad m => OrgParser m ()
    todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ParserState
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..f0949e205 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.Org.Parsing
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , ellipses
   , citeKey
   , gridTableWith
-  , insertIncludedFileF
-  -- * Re-exports from Text.Pandoc.Parsec
+  , insertIncludedFile
   , runParser
   , runParserT
   , getInput
@@ -100,21 +99,22 @@ module Text.Pandoc.Readers.Org.Parsing
   , getState
   , updateState
   , SourcePos
+  , sourceName
   , getPosition
   ) where
 
 import Data.Text (Text)
 import Text.Pandoc.Readers.Org.ParserState
 
-import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
-                            parseFromString)
+import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
+                                   parseFromString)
 import qualified Text.Pandoc.Parsing as P
 
 import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Shared
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..3990f0cb5 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -27,8 +27,7 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getCurrentTime)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -38,25 +37,25 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
+import System.FilePath (takeDirectory)
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -446,64 +453,43 @@ encoding
 -}
 
 includeDirective :: PandocMonad m
-                 => Text -> [(Text, Text)] -> Text
+                 => Text
+                 -> [(Text, Text)]
+                 -> Text
                  -> RSTParser m Blocks
 includeDirective top fields body = do
-  let f = trim top
-  guard $ not (T.null f)
+  let f = T.unpack $ trim top
+  guard $ not $ null f
   guard $ T.null (trim body)
-  -- options
-  let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
-  let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
-  oldPos <- getPosition
-  oldInput <- getInput
-  containers <- stateContainers <$> getState
-  when (f `elem` containers) $
-    throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
-  updateState $ \s -> s{ stateContainers = f : stateContainers s }
-  mbContents <- readFileFromDirs ["."] $ T.unpack f
-  contentLines <- case mbContents of
-                       Just s -> return $ T.lines s
-                       Nothing -> do
-                         logMessage $ CouldNotLoadIncludeFile f oldPos
-                         return []
-  let numLines = length contentLines
-  let startLine' = case startLine of
-                        Nothing            -> 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let endLine' = case endLine of
-                        Nothing            -> numLines + 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let contentLines' =   drop (startLine' - 1)
-                      $ take (endLine' - 1) contentLines
-  let contentLines'' = (case trim <$> lookup "end-before" fields of
-                             Just patt -> takeWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) .
-                       (case trim <$> lookup "start-after" fields of
-                             Just patt -> drop 1 .
-                                            dropWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) $ contentLines'
-  let contents' = T.unlines contentLines''
-  case lookup "code" fields of
-       Just lang -> do
-         let classes =  maybe [] T.words (lookup "class" fields)
-         let ident = maybe "" trimr $ lookup "name" fields
-         codeblock ident classes fields (trimr lang) contents' False
-       Nothing   -> case lookup "literal" fields of
-                         Just _  -> return $ B.rawBlock "rst" contents'
-                         Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
-                           updateState $ \s -> s{ stateContainers =
-                                         tail $ stateContainers s }
-                           return bs
-
+  let startLine = lookup "start-line" fields >>= safeRead
+  let endLine = lookup "end-line" fields >>= safeRead
+  let classes =  maybe [] T.words (lookup "class" fields)
+  let ident = maybe "" trimr $ lookup "name" fields
+  let parser =
+       case lookup "code" fields of
+         Just lang ->
+           (codeblock ident classes fields (trimr lang) False
+            . sourcesToText) <$> getInput
+         Nothing   ->
+           case lookup "literal" fields of
+             Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
+             Nothing -> parseBlocks
+  let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
+  let selectLines =
+        (case trim <$> lookup "end-before" fields of
+                         Just patt -> takeWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id) .
+        (case trim <$> lookup "start-after" fields of
+                         Just patt -> drop 1 .
+                                        dropWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id)
+
+  let toStream t =
+        Sources [(initialPos f,
+                   (T.unlines . selectLines . T.lines $ t) <>
+                    if isLiteral then mempty else "\n")]  -- see #7436
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile parser toStream [currentDir] f startLine endLine
 
 --
 -- list blocks
@@ -526,7 +512,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -613,8 +599,9 @@ comment = try $ do
   string ".."
   skipMany1 spaceChar <|> (() <$ lookAhead newline)
   -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
-  manyTill anyChar blanklines
+  _ <- anyLine
   optional indentedBlock
+  optional blanklines
   return mempty
 
 directiveLabel :: Monad m => RSTParser m Text
@@ -685,7 +672,7 @@ directive' = do
         "replace" -> B.para <$>  -- consumed by substKey
                    parseInlineFromText (trim top)
         "date" -> B.para <$> do  -- consumed by substKey
-                     t <- getCurrentTime
+                     t <- getTimestamp
                      let format = case T.unpack (T.strip top) of
                                     [] -> "%Y-%m-%d"
                                     x  -> x
@@ -731,8 +718,8 @@ directive' = do
                                      ""   -> stateRstHighlight def
                                      lang -> Just lang })
         x | x == "code" || x == "code-block" || x == "sourcecode" ->
-          codeblock name classes (map (second trimr) fields)
-             (trim top) body True
+          return $ codeblock name classes (map (second trimr) fields)
+             (trim top) True body
         "aafig" -> do
           let attribs = (name, ["aafig"], map (second trimr) fields)
           return $ B.codeBlockWith attribs $ stripTrailingNewlines body
@@ -877,10 +864,11 @@ csvTableDirective top fields rawcsv = do
                     (bs, _) <- fetchItem u
                     return $ UTF8.toText bs
                   Nothing -> return rawcsv
-  let res = parseCSV opts (case explicitHeader of
-                              Just h  -> h <> "\n" <> rawcsv'
-                              Nothing -> rawcsv')
-  case res of
+  let header' = case explicitHeader of
+                  Just h  -> parseCSV defaultCSVOptions h
+                  Nothing -> Right []
+  let res = parseCSV opts rawcsv'
+  case (<>) <$> header' <*> res of
        Left e  ->
          throwError $ PandocParsecError "csv table" e
        Right rawrows -> do
@@ -1017,10 +1005,10 @@ toChunks = dropWhile T.null
                           then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
                           else s
 
-codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool
-          -> RSTParser m Blocks
-codeblock ident classes fields lang body rmTrailingNewlines =
-  return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
+codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text
+          -> Blocks
+codeblock ident classes fields lang rmTrailingNewlines body =
+  B.codeBlockWith attribs $ stripTrailingNewlines' body
     where stripTrailingNewlines' = if rmTrailingNewlines
                                      then stripTrailingNewlines
                                      else id
@@ -1101,7 +1089,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1120,7 +1108,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1158,8 +1146,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1248,13 +1238,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1380,7 +1370,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
@@ -1656,21 +1646,4 @@ note = try $ do
       return $ B.note contents
 
 smart :: PandocMonad m => RSTParser m Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote $
-    B.singleQuoted . trimInlines . mconcat <$>
-      many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  withQuoteContext InDoubleQuote $
-    B.doubleQuoted . trimInlines . mconcat <$>
-      many1Till inline doubleQuoteEnd
+smart = smartPunctuation inline
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 484a6c923..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
@@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines
 symbol = B.str <$> countChar 1 nonspaceChar
 
 smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [ apostrophe
-           , dash
-           , ellipses
-           ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote
-    (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  withQuoteContext InDoubleQuote (doubleQuoteEnd >>
-   return (B.doubleQuoted $ B.trimInlines contents))
-   <|> return (B.str "\8220" B.<> contents)
+smart = smartPunctuation inline
 
 link :: PandocMonad m => TWParser m B.Inlines
 link = try $ do
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Textile
    Copyright   : Copyright (C) 2010-2012 Paul Rivier
-                               2010-2020 John MacFarlane
+                               2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
@@ -11,7 +11,7 @@
    Portability : portable
 
 Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
+available at https://www.promptworks.com/textile/.
 
 Implemented and parsed:
  - Paragraphs
@@ -38,7 +38,8 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
 import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -52,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -83,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -101,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -120,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -155,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -173,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -185,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -207,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -249,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -280,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -299,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -307,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -325,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -338,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -349,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -359,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -375,8 +380,9 @@ table = try $ do
                              (toprow:rest) | any (fst . fst) toprow ->
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
-  let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+  let nbOfCols = maximum $ fmap length (headers :| rows)
+  let aligns = map (maybe AlignDefault minimum . nonEmpty) $
+                transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
   return $ B.table (B.simpleCaption $ B.plain caption)
@@ -386,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -395,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -407,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -421,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -445,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -459,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -505,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -520,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -533,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -545,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -576,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -588,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes :: PandocMonad m => TextileParser m Attr
+attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -641,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -657,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -668,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -698,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -707,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 474e4fac0..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
 {- |
    Module      : Text.Pandoc.Readers.Txt2Tags
    Copyright   : Copyright (C) 2014 Matthew Pickering
@@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader (Reader, asks, runReader)
 import Data.Default
 import Data.List (intercalate, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -31,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -53,25 +55,28 @@ getT2TMeta = do
     inps <- P.getInputFiles
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
-    let getModTime = fmap (formatTime defaultTimeLocale "%T") .
-                       P.getModificationTime
-    curMtime <- case inps of
-                  [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
-                  _ -> catchError
-                        (maximum <$> mapM getModTime inps)
-                        (const (return ""))
-    return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
+    curMtime <- catchError
+                 (mapM P.getModificationTime inps >>=
+                   (\case
+                       Nothing ->
+                         formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+                       Just ts -> return $
+                         formatTime defaultTimeLocale "%T" $ maximum ts)
+                    . nonEmpty)
+                (const (return ""))
+    return $ T2TMeta (T.pack curDate) (T.pack curMtime)
+                     (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -261,9 +266,9 @@ table = try $ do
   rows <- many1 (many commentLine *> tableRow)
   let columns = transpose rows
   let ncolumns = length columns
-  let aligns = map (foldr1 findAlign . map fst) columns
+  let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
-  let size = maximum (map length rows')
+  let size = maybe 0 maximum $ nonEmpty $ map length rows'
   let rowsPadded = map (pad size) rows'
   let headerPadded = if null tableHeader then mempty else pad size tableHeader
   let toRow = Row nullAttr . map B.simpleCell
@@ -278,10 +283,11 @@ pad :: (Monoid a) => Int -> [a] -> [a]
 pad n xs = xs ++ replicate (n - length xs) mempty
 
 
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
-  | x == y = x
-  | otherwise = AlignDefault
+findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment
+findAlign (x,_) (Just y)
+  | x == y = Just x
+  | otherwise = Just AlignDefault
+findAlign (x,_) Nothing = Just x
 
 headerRow :: T2T [(Alignment, Blocks)]
 headerRow = genericRow (string "||")
@@ -472,9 +478,29 @@ macro = try $ do
 -- raw URLs in text are automatically linked
 url :: T2T Inlines
 url = try $ do
-  (rawUrl, escapedUrl) <- try uri <|> emailAddress
+  (rawUrl, escapedUrl) <- try uri <|> emailAddress'
   return $ B.link rawUrl "" (B.str escapedUrl)
 
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+  (base, mailURI) <- emailAddress
+  query <- option "" emailQuery
+  return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+  char '?'
+  parts <- kv `sepBy1` (char '&')
+  return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+  k <- T.pack <$> many1 alphaNum
+  char '='
+  let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+  v <- T.pack <$> many1 vchar
+  return (k <> "=" <> v)
+
 uri :: T2T (Text, Text)
 uri = try $ do
   address <- t2tURI
@@ -564,7 +590,7 @@ getTarget = do
               _                               -> "html"
 
 atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+atStart = getPosition >>= guard . (== 1) . sourceColumn
 
 ignoreSpacesCap :: T2T Text -> T2T Text
 ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants
diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs
index 67e8b9cd5..d1c38204f 100644
--- a/src/Text/Pandoc/RoffChar.hs
+++ b/src/Text/Pandoc/RoffChar.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.RoffChar
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 061361aba..3bbab4bbe 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -16,7 +16,6 @@ the HTML using data URIs.
 module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where
 import Codec.Compression.GZip as Gzip
 import Control.Applicative ((<|>))
-import Control.Monad.Except (throwError)
 import Control.Monad.Trans (lift)
 import Data.ByteString (ByteString)
 import Data.ByteString.Base64
@@ -29,7 +28,6 @@ import System.FilePath (takeDirectory, takeExtension, (</>))
 import Text.HTML.TagSoup
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem,
                                       getInputFiles, report, setInputFiles)
-import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Shared (isURI, renderTags', trim)
@@ -244,11 +242,10 @@ getData mimetype src
       let raw' = if ext `elem` [".gz", ".svgz"]
                  then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw]
                  else raw
-      mime <- case (mimetype, respMime) of
-                ("",Nothing) -> throwError $ PandocSomeError
-                  $ "Could not determine mime type for `" <> src <> "'"
-                (x, Nothing) -> return x
-                (_, Just x ) -> return x
+      let mime = case (mimetype, respMime) of
+                  ("",Nothing) -> "application/octet-stream"
+                  (x, Nothing) -> x
+                  (_, Just x ) -> x
       result <- if "text/css" `T.isPrefixOf` mime
                 then do
                   oldInputs <- getInputFiles
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4853621c8..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,18 +21,11 @@ module Text.Pandoc.Shared (
                      -- * List processing
                      splitBy,
                      splitTextBy,
-                     splitByIndices,
-                     splitStringByIndices,
                      splitTextByIndices,
-                     substitute,
                      ordNub,
                      findM,
                      -- * Text processing
-                     ToString (..),
-                     ToText (..),
                      tshow,
-                     backslashEscapes,
-                     escapeStringUsing,
                      elemText,
                      notElemText,
                      stripTrailingNewlines,
@@ -70,10 +63,10 @@ module Text.Pandoc.Shared (
                      isTightList,
                      taskListItemFromAscii,
                      taskListItemToAscii,
+                     handleTaskListItem,
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
-                     underlineSpan,
                      htmlSpanLikeElements,
                      splitSentences,
                      filterIpynbOutput,
@@ -98,7 +91,7 @@ module Text.Pandoc.Shared (
                      safeRead,
                      safeStrRead,
                      -- * User data directory
-                     defaultUserDataDirs,
+                     defaultUserDataDir,
                      -- * Version
                      pandocVersion
                     ) where
@@ -112,7 +105,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, stripPrefix, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -130,7 +123,7 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
 import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
 import qualified Text.Pandoc.Builder as B
 import Data.Time
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Definition
 import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
 import Text.Pandoc.Generic (bottomUp)
@@ -150,46 +143,31 @@ splitBy :: (a -> Bool) -> [a] -> [[a]]
 splitBy _ [] = []
 splitBy isSep lst =
   let (first, rest) = break isSep lst
-      rest'         = dropWhile isSep rest
-  in  first:splitBy isSep rest'
+  in  first:splitBy isSep (dropWhile isSep rest)
 
+-- | Split text by groups of one or more separator.
 splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text]
 splitTextBy isSep t
   | T.null t = []
   | otherwise = let (first, rest) = T.break isSep t
-                    rest'         = T.dropWhile isSep rest
-                in  first : splitTextBy isSep rest'
-
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
-  where (first, rest) = splitAt x lst
-
--- | Split string into chunks divided at specified indices.
-splitStringByIndices :: [Int] -> [Char] -> [[Char]]
-splitStringByIndices [] lst = [lst]
-splitStringByIndices (x:xs) lst =
-  let (first, rest) = splitAt' x lst in
-  first : splitStringByIndices (map (\y -> y - x) xs) rest
+                in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
 splitTextByIndices :: [Int] -> T.Text -> [T.Text]
-splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack
+splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
+ where
+  splitTextByRelIndices [] cs = [T.pack cs]
+  splitTextByRelIndices (x:xs) cs =
+    let (first, rest) = splitAt' x cs
+     in T.pack first : splitTextByRelIndices xs rest
 
+-- Note: don't replace this with T.splitAt, which is not sensitive
+-- to character widths!
 splitAt' :: Int -> [Char] -> ([Char],[Char])
 splitAt' _ []          = ([],[])
 splitAt' n xs | n <= 0 = ([],xs)
 splitAt' n (x:xs)      = (x:ys,zs)
   where (ys,zs) = splitAt' (n - charWidth x) xs
 
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ xs = xs
-substitute target replacement lst@(x:xs) =
-    case stripPrefix target lst of
-      Just lst' -> replacement ++ substitute target replacement lst'
-      Nothing   -> x : substitute target replacement xs
-
 ordNub :: (Ord a) => [a] -> [a]
 ordNub l = go Set.empty l
   where
@@ -209,38 +187,9 @@ findM p = foldr go (pure Nothing)
 -- Text processing
 --
 
-class ToString a where
-  toString :: a -> String
-
-instance ToString String where
-  toString = id
-
-instance ToString T.Text where
-  toString = T.unpack
-
-class ToText a where
-  toText :: a -> T.Text
-
-instance ToText String where
-  toText = T.pack
-
-instance ToText T.Text where
-  toText = id
-
 tshow :: Show a => a -> T.Text
 tshow = T.pack . show
 
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char]    -- ^ list of special characters to escape
-                 -> [(Char, T.Text)]
-backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
-escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-
 -- | @True@ exactly when the @Char@ appears in the @Text@.
 elemText :: Char -> T.Text -> Bool
 elemText c = T.any (== c)
@@ -253,17 +202,24 @@ notElemText c = T.all (/= c)
 stripTrailingNewlines :: T.Text -> T.Text
 stripTrailingNewlines = T.dropWhileEnd (== '\n')
 
+isWS :: Char -> Bool
+isWS ' '  = True
+isWS '\r' = True
+isWS '\n' = True
+isWS '\t' = True
+isWS _    = False
+
 -- | Remove leading and trailing space (including newlines) from string.
 trim :: T.Text -> T.Text
-trim = T.dropAround (`elemText` " \r\n\t")
+trim = T.dropAround isWS
 
 -- | Remove leading space (including newlines) from string.
 triml :: T.Text -> T.Text
-triml = T.dropWhile (`elemText` " \r\n\t")
+triml = T.dropWhile isWS
 
 -- | Remove trailing space (including newlines) from string.
 trimr :: T.Text -> T.Text
-trimr = T.dropWhileEnd (`elemText` " \r\n\t")
+trimr = T.dropWhileEnd isWS
 
 -- | Trim leading space and trailing space unless after \.
 trimMath :: T.Text -> T.Text
@@ -274,7 +230,7 @@ trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd
       | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff
       | otherwise = suff
       where
-        (pref, suff) = T.span (`elemText` " \t\n\r") t
+        (pref, suff) = T.span isWS t
 
 -- | Strip leading and trailing characters from string
 stripFirstAndLast :: T.Text -> T.Text
@@ -342,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
                        (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                        <> go (T.drop 1 s2)
 
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
 -- | Strip out DOS line endings.
 crFilter :: T.Text -> T.Text
 crFilter = T.filter (/= '\r')
@@ -483,22 +440,20 @@ plainToPara :: Block -> Block
 plainToPara (Plain ils) = Para ils
 plainToPara x = x
 
+
 -- | Like @compactify@, but acts on items of definition lists.
 compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 compactifyDL items =
-  let defs = concatMap snd items
-  in  case reverse (concatMap B.toList defs) of
-           (Para x:xs)
-             | not (any isPara xs) ->
-                   let (t,ds) = last items
-                       lastDef = B.toList $ last ds
-                       ds' = init ds ++
-                             if null lastDef
-                                then [B.fromList lastDef]
-                                else [B.fromList $ init lastDef ++ [Plain x]]
-                    in init items ++ [(t, ds')]
-             | otherwise           -> items
-           _                       -> items
+  case reverse items of
+        ((t,ds):ys) ->
+           case reverse (map (reverse . B.toList) ds) of
+             ((Para x:xs) : zs) | not (any isPara xs) ->
+                  reverse ys ++
+                    [(t, reverse (map B.fromList zs) ++
+                         [B.fromList (reverse (Plain x:xs))])]
+             _     -> items
+        _          -> items
+
 
 -- | Combine a list of lines by adding hard linebreaks.
 combineLines :: [[Inline]] -> [Inline]
@@ -532,7 +487,7 @@ inlineListToIdentifier exts =
       | otherwise = T.dropWhile (not . isAlpha)
     filterAscii
       | extensionEnabled Ext_ascii_identifiers exts
-        = T.pack . mapMaybe toAsciiChar . T.unpack
+        = toAsciiText
       | otherwise = id
     toIdent
       | extensionEnabled Ext_gfm_auto_identifiers exts =
@@ -749,13 +704,6 @@ eastAsianLineBreakFilter = bottomUp go
         go xs
           = xs
 
-{-# DEPRECATED underlineSpan "Use Text.Pandoc.Builder.underline instead" #-}
--- | Builder for underline (deprecated).
--- This probably belongs in Builder.hs in pandoc-types.
--- Will be replaced once Underline is an element.
-underlineSpan :: Inlines -> Inlines
-underlineSpan = B.underline
-
 -- | Set of HTML elements that are represented as Span with a class equal as
 -- the element tag itself.
 htmlSpanLikeElements :: Set.Set T.Text
@@ -868,7 +816,7 @@ mapLeft = Bifunctor.first
 -- > collapseFilePath "parent/foo/.." ==  "parent"
 -- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
 collapseFilePath :: FilePath -> FilePath
-collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
+collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
   where
     go rs "." = rs
     go r@(p:rs) ".." = case p of
@@ -905,7 +853,6 @@ filteredFilesFromArchive zf f =
     fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
     fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
 
-
 --
 -- IANA URIs
 --
@@ -1038,12 +985,16 @@ safeStrRead s = case reads s of
 --
 
 -- | Return appropriate user data directory for platform.  We use
--- XDG_DATA_HOME (or its default value), but fall back to the
--- legacy user data directory ($HOME/.pandoc on *nix) if this is
--- missing.
-defaultUserDataDirs :: IO [FilePath]
-defaultUserDataDirs = E.catch (do
+-- XDG_DATA_HOME (or its default value), but for backwards compatibility,
+-- we fall back to the legacy user data directory ($HOME/.pandoc on *nix)
+-- if the XDG_DATA_HOME is missing and this exists.  If neither directory
+-- is present, we return the XDG data directory.
+defaultUserDataDir :: IO FilePath
+defaultUserDataDir = do
   xdgDir <- getXdgDirectory XdgData "pandoc"
   legacyDir <- getAppUserDataDirectory "pandoc"
-  return $ ordNub [xdgDir, legacyDir])
- (\(_ :: E.SomeException) -> return [])
+  xdgExists <- doesDirectoryExist xdgDir
+  legacyDirExists <- doesDirectoryExist legacyDir
+  if not xdgExists && legacyDirExists
+     then return legacyDir
+     else return xdgDir
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 9ea0389c9..a3e550b1f 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Slides
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Sources
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+  ( Sources(..)
+  , ToSources(..)
+  , UpdateSourcePos(..)
+  , sourcesToText
+  , initialSourceName
+  , addToSources
+  , ensureFinalNewlines
+  , addToInput
+  , satisfy
+  , oneOf
+  , noneOf
+  , anyChar
+  , char
+  , string
+  , newline
+  , space
+  , spaces
+  , letter
+  , digit
+  , hexDigit
+  , alphaNum
+  )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions.  It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+  deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+  uncons (Sources []) = return Nothing
+  uncons (Sources ((pos,t):rest)) =
+    case T.uncons t of
+      Nothing -> uncons (Sources rest)
+      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+  toSources :: a -> Sources
+
+instance ToSources Text where
+  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+  toSources = Sources
+            . map (\(fp,t) ->
+                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+  toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+  curpos <- P.getPosition
+  Sources xs <- P.getInput
+  let xs' = case xs of
+               [] -> []
+               ((_,t'):rest) -> (curpos,t'):rest
+  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+                    -> Sources
+                    -> Sources
+ensureFinalNewlines n (Sources xs) =
+  case NonEmpty.nonEmpty xs of
+    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+    Just lst ->
+      case NonEmpty.last lst of
+        (spos, t) ->
+          case T.length (T.takeWhileEnd (=='\n') t) of
+            len | len >= n -> Sources xs
+                | otherwise -> Sources (NonEmpty.init lst ++
+                                        [(spos,
+                                          t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+  updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+   updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+   updateSourcePos pos c sources =
+     case sources of
+       Sources [] -> updatePosChar pos c
+       Sources ((_,t):(pos',_):_)
+         | T.null t  -> pos'
+       Sources _ ->
+           case c of
+             '\n' -> incSourceLine (setSourceColumn pos 1) 1
+             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+             _    -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+  Sources xs <- P.getInput
+  case xs of
+    [] -> P.setInput $ Sources [(initialPos "",t)]
+    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+  matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+     => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+hexDigit = satisfy isHexDigit
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 0c10b258d..7fd896641 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Templates
-   Copyright   : Copyright (C) 2009-2020 John MacFarlane
+   Copyright   : Copyright (C) 2009-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -34,6 +34,7 @@ import Control.Monad.Except (catchError, throwError)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error
+import System.IO.Error (isDoesNotExistError)
 
 -- | Wrap a Monad in this if you want partials to
 -- be taken only from the default data files.
@@ -70,6 +71,9 @@ getTemplate tp = UTF8.toText <$>
              PandocResourceNotFound _ ->
                 -- see #5987 on reason for takeFileName
                 readDataFile ("templates" </> takeFileName tp)
+             PandocIOError _ ioe | isDoesNotExistError ioe ->
+                -- see #5987 on reason for takeFileName
+                readDataFile ("templates" </> takeFileName tp)
              _ -> throwError e))
 
 -- | Get default template for the specified writer.
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 200d756f6..0c7d7ab23 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Translations
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 4621e1765..4d5921faf 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,8 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.UTF8
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -40,67 +39,65 @@ where
 
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy.Char8 as BL
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
 import qualified Data.Text.Encoding as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
 import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
 import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
                   putStrLn, readFile, writeFile)
-import qualified System.IO as IO
 
-readFile :: FilePath -> IO String
+readFile :: FilePath -> IO Text
 readFile f = do
   h <- openFile (encodePath f) ReadMode
   hGetContents h
 
-getContents :: IO String
+getContents :: IO Text
 getContents = hGetContents stdin
 
-writeFileWith :: Newline -> FilePath -> String -> IO ()
+writeFileWith :: Newline -> FilePath -> Text -> IO ()
 writeFileWith eol f s =
   withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
 
-writeFile :: FilePath -> String -> IO ()
+writeFile :: FilePath -> Text -> IO ()
 writeFile = writeFileWith nativeNewline
 
-putStrWith :: Newline -> String -> IO ()
+putStrWith :: Newline -> Text -> IO ()
 putStrWith eol s = hPutStrWith eol stdout s
 
-putStr :: String -> IO ()
+putStr :: Text -> IO ()
 putStr = putStrWith nativeNewline
 
-putStrLnWith :: Newline -> String -> IO ()
+putStrLnWith :: Newline -> Text -> IO ()
 putStrLnWith eol s = hPutStrLnWith eol stdout s
 
-putStrLn :: String -> IO ()
+putStrLn :: Text -> IO ()
 putStrLn = putStrLnWith nativeNewline
 
-hPutStrWith :: Newline -> Handle -> String -> IO ()
+hPutStrWith :: Newline -> Handle -> Text -> IO ()
 hPutStrWith eol h s =
   hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStr h s
+  hSetEncoding h utf8 >> TIO.hPutStr h s
 
-hPutStr :: Handle -> String -> IO ()
+hPutStr :: Handle -> Text -> IO ()
 hPutStr = hPutStrWith nativeNewline
 
-hPutStrLnWith :: Newline -> Handle -> String -> IO ()
+hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
 hPutStrLnWith eol h s =
   hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStrLn h s
+  hSetEncoding h utf8 >> TIO.hPutStrLn h s
 
-hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn :: Handle -> Text -> IO ()
 hPutStrLn = hPutStrLnWith nativeNewline
 
-hGetContents :: Handle -> IO String
-hGetContents = fmap toString . B.hGetContents
--- hGetContents h = hSetEncoding h utf8_bom
---                   >> hSetNewlineMode h universalNewlineMode
---                   >> IO.hGetContents h
+hGetContents :: Handle -> IO Text
+hGetContents = fmap toText . B.hGetContents
 
 -- | Convert UTF8-encoded ByteString to Text, also
 -- removing '\r' characters.
-toText :: B.ByteString -> T.Text
+toText :: B.ByteString -> Text
 toText = T.decodeUtf8 . filterCRs . dropBOM
   where dropBOM bs =
          if "\xEF\xBB\xBF" `B.isPrefixOf` bs
@@ -128,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
 toStringLazy :: BL.ByteString -> String
 toStringLazy = TL.unpack . toTextLazy
 
-fromText :: T.Text -> B.ByteString
+fromText :: Text -> B.ByteString
 fromText = T.encodeUtf8
 
 fromTextLazy :: TL.Text -> BL.ByteString
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index ca0df2d0b..12579be90 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.UUID
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 0654c2d85..c348477c2 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -21,6 +21,8 @@ module Text.Pandoc.Writers
     , writeAsciiDoc
     , writeAsciiDoctor
     , writeBeamer
+    , writeBibTeX
+    , writeBibLaTeX
     , writeCommonMark
     , writeConTeXt
     , writeCustom
@@ -79,12 +81,14 @@ import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Class
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Error
 import Text.Pandoc.Writers.AsciiDoc
+import Text.Pandoc.Writers.BibTeX
 import Text.Pandoc.Writers.CommonMark
 import Text.Pandoc.Writers.ConTeXt
 import Text.Pandoc.Writers.CslJson
@@ -119,7 +123,6 @@ import Text.Pandoc.Writers.Texinfo
 import Text.Pandoc.Writers.Textile
 import Text.Pandoc.Writers.XWiki
 import Text.Pandoc.Writers.ZimWiki
-import Text.Parsec.Error
 
 data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text)
               | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString)
@@ -185,14 +188,16 @@ writers = [
   ,("tei"          , TextWriter writeTEI)
   ,("muse"         , TextWriter writeMuse)
   ,("csljson"      , TextWriter writeCslJson)
+  ,("bibtex"       , TextWriter writeBibTeX)
+  ,("biblatex"     , TextWriter writeBibLaTeX)
   ]
 
 -- | Retrieve writer, extensions based on formatSpec (format+extensions).
 getWriter :: PandocMonad m => Text -> m (Writer m, Extensions)
 getWriter s =
   case parseFormatSpec s of
-        Left e  -> throwError $ PandocAppError
-                    $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e]
+        Left e  -> throwError $ PandocAppError $
+                    "Error parsing writer format " <> tshow s <> ": " <> tshow e
         Right (writerName, extsToEnable, extsToDisable) ->
            case lookup writerName writers of
                    Nothing  -> throwError $
diff --git a/src/Text/Pandoc/Writers/AnnotatedTable.hs b/src/Text/Pandoc/Writers/AnnotatedTable.hs
index 48c9d61f2..3f69496a9 100644
--- a/src/Text/Pandoc/Writers/AnnotatedTable.hs
+++ b/src/Text/Pandoc/Writers/AnnotatedTable.hs
@@ -1,8 +1,12 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE TupleSections #-}
+{-# LANGUAGE UndecidableInstances #-}
 
 {- |
    Module      : Text.Pandoc.Writers.AnnotatedTable
@@ -45,6 +49,7 @@ import           Data.Generics                  ( Data
 import           Data.List.NonEmpty             ( NonEmpty(..) )
 import           GHC.Generics                   ( Generic )
 import qualified Text.Pandoc.Builder           as B
+import           Text.Pandoc.Walk               ( Walkable (..) )
 
 -- | An annotated table type, corresponding to the Pandoc 'B.Table'
 -- constructor and the HTML @\<table\>@ element. It records the data
@@ -298,3 +303,21 @@ fromBodyRow (BodyRow attr _ rh rb) =
 
 fromCell :: Cell -> B.Cell
 fromCell (Cell _ _ c) = c
+
+--
+-- Instances
+--
+instance Walkable a B.Cell => Walkable a Cell where
+  walkM f (Cell colspecs colnum cell) =
+    Cell colspecs colnum <$> walkM f cell
+  query f (Cell _colspecs _colnum cell) = query f cell
+
+instance Walkable a B.Cell => Walkable a HeaderRow where
+  walkM f (HeaderRow attr rownum cells) =
+    HeaderRow attr rownum <$> walkM f cells
+  query f (HeaderRow _attr _rownum cells) = query f cells
+
+instance Walkable a B.Cell => Walkable a TableHead where
+  walkM f (TableHead attr rows) =
+    TableHead attr <$> walkM f rows
+  query f (TableHead _attr rows) = query f rows
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e742577b6..ab7e5f1a9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.AsciiDoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -22,6 +22,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where
 import Control.Monad.State.Strict
 import Data.Char (isPunctuation, isSpace)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (NonEmpty(..))
 import Data.Maybe (fromMaybe, isJust)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -37,6 +38,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Templates (renderTemplate)
 import Text.Pandoc.Writers.Shared
 
+
 data WriterState = WriterState { defListMarker       :: Text
                                , orderedListLevel    :: Int
                                , bulletListLevel     :: Int
@@ -45,6 +47,10 @@ data WriterState = WriterState { defListMarker       :: Text
                                , asciidoctorVariant  :: Bool
                                , inList              :: Bool
                                , hasMath             :: Bool
+                               -- |0 is no table
+                               -- 1 is top level table
+                               -- 2 is a table in a table
+                               , tableNestingLevel   :: Int
                                }
 
 defaultWriterState :: WriterState
@@ -56,6 +62,7 @@ defaultWriterState = WriterState { defListMarker      = "::"
                                  , asciidoctorVariant = False
                                  , inList             = False
                                  , hasMath            = False
+                                 , tableNestingLevel  = 0
                                  }
 
 -- | Convert Pandoc to AsciiDoc.
@@ -98,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
 
 -- | Escape special characters for AsciiDoc.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing escs
-  where escs = backslashEscapes "{"
+escapeString t
+  | T.any (== '{') t = T.concatMap escChar t
+  | otherwise        = t
+  where escChar '{' = "\\{"
+        escChar c   = T.singleton c
 
 -- | Ordered list start parser for use in Para below.
 olMarker :: Parser Text ParserState Char
@@ -194,7 +204,7 @@ blockToAsciiDoc opts (BlockQuote blocks) = do
                      else contents
   let bar = text "____"
   return $ bar $$ chomp contents' $$ bar <> blankline
-blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do
+blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do
   let (caption, aligns, widths, headers, rows) =
         toLegacyTable blkCapt specs thead tbody tfoot
   caption' <- inlineListToAsciiDoc opts caption
@@ -236,23 +246,42 @@ blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do
              $ zipWith colspec aligns widths')
          <> text ","
          <> headerspec <> text "]"
+         
+  -- construct cells and recurse in case of nested tables
+  parentTableLevel <- gets tableNestingLevel
+  let currentNestingLevel = parentTableLevel + 1
+  
+  modify $ \st -> st{ tableNestingLevel = currentNestingLevel }
+  
+  let separator = text (if parentTableLevel == 0
+                          then "|"  -- top level separator
+                          else "!") -- nested separator
+
   let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
-                              return $ text "|" <> chomp d
+                              return $ separator <> chomp d
       makeCell [Para x]  = makeCell [Plain x]
-      makeCell []        = return $ text "|"
-      makeCell bs        = do d <- blockListToAsciiDoc opts bs
-                              return $ text "a|" $$ d
+      makeCell []        = return separator
+      makeCell bs        = if currentNestingLevel == 2
+                             then do
+                               --asciidoc only supports nesting once
+                               report $ BlockNotRendered block
+                               return separator
+                             else do
+                               d <- blockListToAsciiDoc opts bs
+                               return $ (text "a" <> separator) $$ d
+
   let makeRow cells = hsep `fmap` mapM makeCell cells
   rows' <- mapM makeRow rows
   head' <- makeRow headers
+  modify $ \st -> st{ tableNestingLevel = parentTableLevel }
   let head'' = if all null headers then empty else head'
   let colwidth = if writerWrapText opts == WrapAuto
                     then writerColumns opts
                     else 100000
-  let maxwidth = maximum $ map offset (head':rows')
+  let maxwidth = maximum $ fmap offset (head' :| rows')
   let body = if maxwidth > colwidth then vsep rows' else vcat rows'
-  let border = text "|==="
-  return $
+  let border = separator <> text "==="
+  return $ 
     caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
 blockToAsciiDoc opts (BulletList items) = do
   inlist <- gets inList
@@ -470,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
         | otherwise     -> [Str "``"] ++ lst ++ [Str "''"]
 inlineToAsciiDoc _ (Code _ str) = do
   isAsciidoctor <- gets asciidoctorVariant
-  let contents = literal (escapeStringUsing (backslashEscapes "`") str)
+  let escChar '`' = "\\'"
+      escChar c   = T.singleton c
+  let contents = literal (T.concatMap escChar str)
   return $
     if isAsciidoctor
        then text "`+" <> contents <> "+`"
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
new file mode 100644
index 000000000..95de6b71f
--- /dev/null
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -0,0 +1,61 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.BibTeX
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Writes a BibTeX or BibLaTeX bibliographies based on the
+'references' metadata in a Pandoc document.
+-}
+module Text.Pandoc.Writers.BibTeX
+  ( writeBibTeX
+  , writeBibLaTeX
+  )
+where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Data.Text (Text)
+import Data.Maybe (mapMaybe)
+import Citeproc (parseLang)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Citeproc.BibTeX as BibTeX
+import Text.Pandoc.Citeproc.MetaValue (metaValueToReference)
+import Text.Pandoc.Writers.Shared (lookupMetaString, defField,
+                                    addVariablesToContext)
+import Text.DocLayout (render, vcat)
+import Text.DocTemplates (Context(..))
+import Text.Pandoc.Templates (renderTemplate)
+
+-- | Write BibTeX based on the references metadata from a Pandoc document.
+writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeBibTeX = writeBibTeX' BibTeX.Bibtex
+
+-- | Write BibLaTeX based on the references metadata from a Pandoc document.
+writeBibLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeBibLaTeX = writeBibTeX' BibTeX.Biblatex
+
+writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
+writeBibTeX' variant opts (Pandoc meta _) = do
+  let mblang = case lookupMetaString "lang" meta of
+                 "" -> Nothing
+                 t  -> either (const Nothing) Just $ parseLang t
+  let refs = case lookupMeta "references" meta of
+               Just (MetaList xs) -> mapMaybe metaValueToReference xs
+               _ -> []
+  let main = vcat $ map (BibTeX.writeBibtexString opts variant mblang) refs
+  let context  = defField "body" main
+                 $ addVariablesToContext opts (mempty :: Context Text)
+  let colwidth = if writerWrapText opts == WrapAuto
+                    then Just $ writerColumns opts
+                    else Nothing
+  return $ render colwidth $
+    case writerTemplate opts of
+      Nothing  -> main
+      Just tpl -> renderTemplate tpl context
+
+
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 66ded218f..8733b7149 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0a6313513..3cafcefba 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.ConTeXt
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -20,7 +20,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47
+import Text.Collate.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -177,8 +177,12 @@ blockToConTeXt (Para lst) = do
   contents <- inlineListToConTeXt lst
   return $ contents <> blankline
 blockToConTeXt (LineBlock lns) = do
-  doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns
-  return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline
+  let emptyToBlankline doc = if isEmpty doc
+                             then blankline
+                             else doc
+  doclines <- mapM inlineListToConTeXt lns
+  let contextLines = vcat . map emptyToBlankline $ doclines
+  return $ "\\startlines" $$ contextLines $$ "\\stoplines" <> blankline
 blockToConTeXt (BlockQuote lst) = do
   contents <- blockListToConTeXt lst
   return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
@@ -228,13 +232,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                         Period       -> "stopper=."
                         OneParen     -> "stopper=)"
                         TwoParens    -> "left=(,stopper=)"
-    let width = maximum $ map T.length $ take (length contents)
-                          (orderedListMarkers (start, style', delim))
-    let width' = (toEnum width + 1) / 2
-    let width'' = if width' > (1.5 :: Double)
-                     then "width=" <> tshow width' <> "em"
-                     else ""
-    let specs2Items = filter (not . T.null) [start', delim', width'']
+    let specs2Items = filter (not . T.null) [start', delim']
     let specs2 = if null specs2Items
                     then ""
                     else "[" <> T.intercalate "," specs2Items <> "]"
@@ -248,8 +246,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                           UpperAlpha   -> 'A') :
                        if isTightList lst then ",packed]" else "]"
     let specs = T.pack style'' <> specs2
-    return $ "\\startitemize" <> literal specs $$ vcat contents $$
-             "\\stopitemize" <> blankline
+    return $ "\\startenumerate" <> literal specs $$ vcat contents $$
+             "\\stopenumerate" <> blankline
 blockToConTeXt (DefinitionList lst) =
   liftM vcat $ mapM defListItemToConTeXt lst
 blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
@@ -432,9 +430,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
   put $ st {stNextRef = next + 1}
   let ref = "url" <> tshow next
   contents <-  inlineListToConTeXt txt
+  let escChar '#' = "\\#"
+      escChar '%' = "\\%"
+      escChar c   = T.singleton c
+  let escContextURL = T.concatMap escChar
   return $ "\\useURL"
            <> brackets (literal ref)
-           <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
+           <> brackets (literal $ escContextURL src)
            <> (if isAutolink
                   then empty
                   else brackets empty <> brackets contents)
@@ -477,7 +479,7 @@ inlineToConTeXt (Note contents) = do
               then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}'
               else literal "\\startbuffer " <> nest 2 (chomp contents') <>
                    literal "\\stopbuffer\\footnote{\\getbuffer}"
-inlineToConTeXt (Span (_,_,kvs) ils) = do
+inlineToConTeXt (Span (ident,_,kvs) ils) = do
   mblang <- fromBCP47 (lookup "lang" kvs)
   let wrapDir txt = case lookup "dir" kvs of
                       Just "rtl" -> braces $ "\\righttoleft " <> txt
@@ -487,7 +489,11 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do
                        Just lng -> braces ("\\language" <>
                                            brackets (literal lng) <> txt)
                        Nothing -> txt
-  wrapLang . wrapDir <$> inlineListToConTeXt ils
+      addReference =
+        if T.null ident
+        then id
+        else (("\\reference" <> brackets (literal ident) <> "{}") <>)
+  addReference . wrapLang . wrapDir <$> inlineListToConTeXt ils
 
 -- | Craft the section header, inserting the section reference, if supplied.
 sectionHeader :: PandocMonad m
@@ -549,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs
 -- https://tools.ietf.org/html/bcp47#section-2.1
 -- http://wiki.contextgarden.net/Language_Codes
 fromBCP47' :: Maybe Lang -> Maybe Text
-fromBCP47' (Just (Lang "ar" _ "SY" _)     ) = Just "ar-sy"
-fromBCP47' (Just (Lang "ar" _ "IQ" _)     ) = Just "ar-iq"
-fromBCP47' (Just (Lang "ar" _ "JO" _)     ) = Just "ar-jo"
-fromBCP47' (Just (Lang "ar" _ "LB" _)     ) = Just "ar-lb"
-fromBCP47' (Just (Lang "ar" _ "DZ" _)     ) = Just "ar-dz"
-fromBCP47' (Just (Lang "ar" _ "MA" _)     ) = Just "ar-ma"
-fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
-fromBCP47' (Just (Lang "de" _ "DE" _)     ) = Just "de-de"
-fromBCP47' (Just (Lang "de" _ "AT" _)     ) = Just "de-at"
-fromBCP47' (Just (Lang "de" _ "CH" _)     ) = Just "de-ch"
-fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
-fromBCP47' (Just (Lang "en" _ "US" _)     ) = Just "en-us"
-fromBCP47' (Just (Lang "en" _ "GB" _)     ) = Just "en-gb"
-fromBCP47' (Just (Lang "grc"_  _ _)       ) = Just "agr"
-fromBCP47' (Just (Lang "el" _ _ _)        ) = Just "gr"
-fromBCP47' (Just (Lang "eu" _ _ _)        ) = Just "ba"
-fromBCP47' (Just (Lang "he" _ _ _)        ) = Just "il"
-fromBCP47' (Just (Lang "jp" _ _ _)        ) = Just "ja"
-fromBCP47' (Just (Lang "uk" _ _ _)        ) = Just "ua"
-fromBCP47' (Just (Lang "vi" _ _ _)        ) = Just "vn"
-fromBCP47' (Just (Lang "zh" _ _ _)        ) = Just "cn"
-fromBCP47' (Just (Lang l _ _ _)           ) = Just l
-fromBCP47' Nothing                          = Nothing
+fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _))    = Just "deo"
+fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _))    = Just "agr"
+fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _ _ _))           = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _ _ _))           = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _ _ _))           = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _ _ _))           = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _ _ _))           = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _ _ _))           = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _ _ _))           = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _ _ _))           = Just "cn"
+fromBCP47' (Just (Lang l _ _ _ _ _))              = Just l
+fromBCP47' Nothing                                = Nothing
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index 08310de65..395335667 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -34,23 +34,24 @@ import Control.Monad.Identity
 import Citeproc.Locale (getLocale)
 import Citeproc.CslJson
 import Text.Pandoc.Options (WriterOptions)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Aeson.Encode.Pretty         (Config (..), Indent (Spaces),
                                          NumberFormat (Generic),
                                          defConfig, encodePretty')
 
 writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCslJson _opts (Pandoc meta _) = do
-  let lang = maybe (Lang "en" (Just "US")) parseLang
-               (lookupMeta "lang" meta >>= metaValueToText)
+  let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] [])
+               (lookupMeta "lang" meta >>= metaValueToText >>=
+                  either (const Nothing) Just . parseLang)
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
-  case lookupMeta "references" meta of
-    Just (MetaList rs) -> return $ (UTF8.toText $
-         toCslJson locale (mapMaybe metaValueToReference rs))
-          <> "\n"
-    _ -> throwError $ PandocAppError "No references field"
+  let rs = case lookupMeta "references" meta of
+             Just (MetaList xs) -> xs
+             _ -> []
+  return $ UTF8.toText
+           (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n"
 
 fromInlines :: [Inline] -> CslJson Text
 fromInlines = foldMap fromInline . B.fromList
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 8da611b61..58c4bb5be 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Writers.Custom
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 408d8cc0c..33a6f5f0c 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -15,6 +15,7 @@ Conversion of 'Pandoc' documents to Docbook XML.
 module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
 import Control.Monad.Reader
 import Data.Generics (everywhere, mkT)
+import Data.Maybe (isNothing)
 import Data.Monoid (Any (..))
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -40,6 +41,25 @@ data DocBookVersion = DocBook4 | DocBook5
 
 type DB = ReaderT DocBookVersion
 
+-- | Get level of the top-level headers based on the configured top-level division.
+-- The header level can then be used to determine appropriate DocBook element
+-- for each subdivision associated with a header.
+-- The numbering here follows LaTeX's internal numbering
+getStartLvl :: WriterOptions -> Int
+getStartLvl opts =
+  case writerTopLevelDivision opts of
+       TopLevelPart    -> -1
+       TopLevelChapter -> 0
+       TopLevelSection -> 1
+       TopLevelDefault -> 1
+
+-- | Get correct name for the id attribute based on DocBook version.
+-- DocBook 4 used custom id attribute but DocBook 5 adopted the xml:id specification.
+-- https://www.w3.org/TR/xml-id/
+idName :: DocBookVersion -> Text
+idName DocBook5 = "xml:id"
+idName DocBook4 = "id"
+
 -- | Convert list of authors to a docbook <author> section
 authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
 authorToDocbook opts name' = do
@@ -79,12 +99,7 @@ writeDocbook opts (Pandoc meta blocks) = do
   let colwidth = if writerWrapText opts == WrapAuto
                     then Just $ writerColumns opts
                     else Nothing
-  -- The numbering here follows LaTeX's internal numbering
-  let startLvl = case writerTopLevelDivision opts of
-                   TopLevelPart    -> -1
-                   TopLevelChapter -> 0
-                   TopLevelSection -> 1
-                   TopLevelDefault -> 1
+  let startLvl = getStartLvl opts
   let fromBlocks = blocksToDocbook opts .
                    makeSections False (Just startLvl)
   auths' <- mapM (authorToDocbook opts) $ docAuthors meta
@@ -153,7 +168,7 @@ blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text)
 blockToDocbook _ Null = return empty
 -- Add ids to paragraphs in divs with ids - this is needed for
 -- pandoc-citeproc to get link anchors in bibliographies:
-blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
+blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do
   version <- ask
   -- Docbook doesn't allow sections with no content, so insert some if needed
   let bs = if null xs
@@ -166,28 +181,52 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
                                               then "section"
                                               else "sect" <> tshow n
                  _                    -> "simplesect"
-      idName = if version == DocBook5
-                 then "xml:id"
-                 else "id"
-      idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')]
-      nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
-                                      else []
-      attribs = nsAttr <> idAttr
+      idAttr = [(idName version, writerIdentifierPrefix opts <> id') | not (T.null id')]
+      -- We want to add namespaces to the root (top-level) element.
+      nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts)
+      -- Though, DocBook 4 does not support namespaces and
+      -- standalone documents will include them in the template.
+                 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+                 else []
+      
+      -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id
+      miscAttr = filter (isSectionAttr version) attrs
+      attribs = nsAttr <> idAttr <> miscAttr
   title' <- inlinesToDocbook opts ils
   contents <- blocksToDocbook opts bs
   return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents
-blockToDocbook opts (Div (ident,_,_) [Para lst]) =
-  let attribs = [("id", ident) | not (T.null ident)] in
-  if hasLineBreaks lst
-     then flush . nowrap . inTags False "literallayout" attribs
-                         <$> inlinesToDocbook opts lst
-     else inTags True "para" attribs <$> inlinesToDocbook opts lst
-blockToDocbook opts (Div (ident,_,_) bs) = do
-  contents <- blocksToDocbook opts (map plainToPara bs)
-  return $
-    (if T.null ident
-        then mempty
-        else selfClosingTag "anchor" [("id", ident)]) $$ contents
+blockToDocbook opts (Div (ident,classes,_) bs) = do
+  version <- ask
+  let identAttribs = [(idName version, ident) | not (T.null ident)]
+      admonitions = ["caution","danger","important","note","tip","warning"]
+  case classes of
+    (l:_) | l `elem` admonitions -> do
+        let (mTitleBs, bodyBs) =
+                case bs of
+                  -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain.
+                  (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest)
+                  -- Matches AST produced by the Docbook reader.
+                  (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest)
+                  _ -> (Nothing, bs)
+        admonitionTitle <- case mTitleBs of
+                              Nothing -> return mempty
+                              -- id will be attached to the admonition so let’s pass empty identAttrs.
+                              Just titleBs -> inTags False "title" [] <$> titleBs
+        admonitionBody <- handleDivBody [] bodyBs
+        return (inTags True l identAttribs (admonitionTitle $$ admonitionBody))
+    _ -> handleDivBody identAttribs bs
+  where
+    handleDivBody identAttribs [Para lst] =
+      if hasLineBreaks lst
+         then flush . nowrap . inTags False "literallayout" identAttribs
+                             <$> inlinesToDocbook opts lst
+         else inTags True "para" identAttribs <$> inlinesToDocbook opts lst
+    handleDivBody identAttribs bodyBs = do
+      contents <- blocksToDocbook opts (map plainToPara bodyBs)
+      return $
+        (if null identAttribs
+            then mempty
+            else selfClosingTag "anchor" identAttribs) $$ contents
 blockToDocbook _ h@Header{} = do
   -- should be handled by Div section above, except inside lists/blockquotes
   report $ BlockNotRendered h
@@ -213,17 +252,18 @@ blockToDocbook opts (LineBlock lns) =
   blockToDocbook opts $ linesToPara lns
 blockToDocbook opts (BlockQuote blocks) =
   inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
+blockToDocbook opts (CodeBlock (_,classes,_) str) = return $
   literal ("<programlisting" <> lang <> ">") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>")
     where lang  = if null langs
                      then ""
                      else " language=\"" <> escapeStringForXML (head langs) <>
                           "\""
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToDocbook opts (BulletList lst) = do
   let attribs = [("spacing", "compact") | isTightList lst]
@@ -341,11 +381,12 @@ inlineToDocbook opts (Quoted _ lst) =
   inTagsSimple "quote" <$> inlinesToDocbook opts lst
 inlineToDocbook opts (Cite _ lst) =
   inlinesToDocbook opts lst
-inlineToDocbook opts (Span (ident,_,_) ils) =
+inlineToDocbook opts (Span (ident,_,_) ils) = do
+  version <- ask
   ((if T.null ident
        then mempty
-       else selfClosingTag "anchor" [("id", ident)]) <>) <$>
-  inlinesToDocbook opts ils
+       else selfClosingTag "anchor" [(idName version, ident)]) <>) <$>
+    inlinesToDocbook opts ils
 inlineToDocbook _ (Code _ str) =
   return $ inTagsSimple "literal" $ literal (escapeStringForXML str)
 inlineToDocbook opts (Math t str)
@@ -413,3 +454,43 @@ idAndRole (id',cls,_) = ident <> role
   where
     ident = [("id", id') | not (T.null id')]
     role  = [("role", T.unwords cls) | not (null cls)]
+
+isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool
+isSectionAttr _ ("label",_) = True
+isSectionAttr _ ("status",_) = True
+isSectionAttr DocBook5 ("annotations",_) = True
+isSectionAttr DocBook5 ("dir","ltr") = True
+isSectionAttr DocBook5 ("dir","rtl") = True
+isSectionAttr DocBook5 ("dir","lro") = True
+isSectionAttr DocBook5 ("dir","rlo") = True
+isSectionAttr _ ("remap",_) = True
+isSectionAttr _ ("revisionflag","changed") = True
+isSectionAttr _ ("revisionflag","added") = True
+isSectionAttr _ ("revisionflag","deleted") = True
+isSectionAttr _ ("revisionflag","off") = True
+isSectionAttr _ ("role",_) = True
+isSectionAttr DocBook5 ("version",_) = True
+isSectionAttr DocBook5 ("xml:base",_) = True
+isSectionAttr DocBook5 ("xml:lang",_) = True
+isSectionAttr _ ("xreflabel",_) = True
+isSectionAttr DocBook5 ("linkend",_) = True
+isSectionAttr DocBook5 ("linkends",_) = True
+isSectionAttr DocBook5 ("xlink:actuate",_) = True
+isSectionAttr DocBook5 ("xlink:arcrole",_) = True
+isSectionAttr DocBook5 ("xlink:from",_) = True
+isSectionAttr DocBook5 ("xlink:href",_) = True
+isSectionAttr DocBook5 ("xlink:label",_) = True
+isSectionAttr DocBook5 ("xlink:role",_) = True
+isSectionAttr DocBook5 ("xlink:show",_) = True
+isSectionAttr DocBook5 ("xlink:title",_) = True
+isSectionAttr DocBook5 ("xlink:to",_) = True
+isSectionAttr DocBook5 ("xlink:type",_) = True
+isSectionAttr DocBook4 ("arch",_) = True
+isSectionAttr DocBook4 ("condition",_) = True
+isSectionAttr DocBook4 ("conformance",_) = True
+isSectionAttr DocBook4 ("lang",_) = True
+isSectionAttr DocBook4 ("os",_) = True
+isSectionAttr DocBook4 ("revision",_) = True
+isSectionAttr DocBook4 ("security",_) = True
+isSectionAttr DocBook4 ("vendor",_) = True
+isSectionAttr _ (_,_) = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index a380fd4fa..a3c4b6be1 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.Docx
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -22,7 +22,6 @@ import Control.Applicative ((<|>))
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import Data.Char (isSpace, isLetter)
 import Data.List (intercalate, isPrefixOf, isSuffixOf)
@@ -31,13 +30,14 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import System.Random (randomRs, mkStdGen)
-import Text.Pandoc.BCP47 (getLang, renderLang)
-import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
+import Text.Collate.Lang (renderLang)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm)
+import qualified Text.Pandoc.Translations as Term
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
 import Text.Pandoc.UTF8 (fromTextLazy)
@@ -47,135 +47,37 @@ import Text.Pandoc.Highlighting (highlight)
 import Text.Pandoc.Error
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
-import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType,
-                         getMimeTypeDef)
+import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef)
 import Text.Pandoc.Options
 import Text.Pandoc.Writers.Docx.StyleMap
+import Text.Pandoc.Writers.Docx.Table
+import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
+import qualified Text.Pandoc.Writers.GridTable as Grid
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
-import Text.Printf (printf)
 import Text.TeXMath
-import Text.XML.Light as XML
-import Text.XML.Light.Cursor as XMLC
 import Text.Pandoc.Writers.OOXML
-
-data ListMarker = NoMarker
-                | BulletMarker
-                | NumberMarker ListNumberStyle ListNumberDelim Int
-                deriving (Show, Read, Eq, Ord)
-
-listMarkerToId :: ListMarker -> String
-listMarkerToId NoMarker = "990"
-listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) =
-  '9' : '9' : styNum : delimNum : show n
-  where styNum = case sty of
-                      DefaultStyle -> '2'
-                      Example      -> '3'
-                      Decimal      -> '4'
-                      LowerRoman   -> '5'
-                      UpperRoman   -> '6'
-                      LowerAlpha   -> '7'
-                      UpperAlpha   -> '8'
-        delimNum = case delim of
-                      DefaultDelim -> '0'
-                      Period       -> '1'
-                      OneParen     -> '2'
-                      TwoParens    -> '3'
-
-data EnvProps = EnvProps{ styleElement  :: Maybe Element
-                        , otherElements :: [Element]
-                        }
-
-instance Semigroup EnvProps where
-  EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
-
-instance Monoid EnvProps where
-  mempty = EnvProps Nothing []
-  mappend = (<>)
+import Text.Pandoc.XML.Light as XML
+import Data.Generics (mkT, everywhere)
 
 squashProps :: EnvProps -> [Element]
 squashProps (EnvProps Nothing es) = es
 squashProps (EnvProps (Just e) es) = e : es
 
-data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
-                          , envParaProperties :: EnvProps
-                          , envRTL            :: Bool
-                          , envListLevel      :: Int
-                          , envListNumId      :: Int
-                          , envInDel          :: Bool
-                          , envChangesAuthor  :: T.Text
-                          , envChangesDate    :: T.Text
-                          , envPrintWidth     :: Integer
-                          }
-
-defaultWriterEnv :: WriterEnv
-defaultWriterEnv = WriterEnv{ envTextProperties = mempty
-                            , envParaProperties = mempty
-                            , envRTL = False
-                            , envListLevel = -1
-                            , envListNumId = 1
-                            , envInDel = False
-                            , envChangesAuthor  = "unknown"
-                            , envChangesDate    = "1969-12-31T19:00:00Z"
-                            , envPrintWidth     = 1
-                            }
-
-data WriterState = WriterState{
-         stFootnotes      :: [Element]
-       , stComments       :: [([(T.Text, T.Text)], [Inline])]
-       , stSectionIds     :: Set.Set T.Text
-       , stExternalLinks  :: M.Map String String
-       , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
-       , stLists          :: [ListMarker]
-       , stInsId          :: Int
-       , stDelId          :: Int
-       , stStyleMaps      :: StyleMaps
-       , stFirstPara      :: Bool
-       , stInTable        :: Bool
-       , stInList         :: Bool
-       , stTocTitle       :: [Inline]
-       , stDynamicParaProps :: Set.Set ParaStyleName
-       , stDynamicTextProps :: Set.Set CharStyleName
-       , stCurId          :: Int
-       }
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState{
-        stFootnotes      = defaultFootnotes
-      , stComments       = []
-      , stSectionIds     = Set.empty
-      , stExternalLinks  = M.empty
-      , stImages         = M.empty
-      , stLists          = [NoMarker]
-      , stInsId          = 1
-      , stDelId          = 1
-      , stStyleMaps      = StyleMaps M.empty M.empty
-      , stFirstPara      = False
-      , stInTable        = False
-      , stInList         = False
-      , stTocTitle       = [Str "Table of Contents"]
-      , stDynamicParaProps = Set.empty
-      , stDynamicTextProps = Set.empty
-      , stCurId          = 20
-      }
-
-type WS m = ReaderT WriterEnv (StateT WriterState m)
-
-renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap :: Int -> [Element] -> M.Map Text Text
 renumIdMap _ [] = M.empty
 renumIdMap n (e:es)
   | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
-      M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es)
+      M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es)
   | otherwise = renumIdMap n es
 
-replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr]
 replaceAttr f val = map $
     \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a
 
-renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element
+renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element
 renumId f renumMap e
   | Just oldId <- findAttrBy f e
   , Just newId <- M.lookup oldId renumMap =
@@ -184,18 +86,12 @@ renumId f renumMap e
      e { elAttribs = attrs' }
   | otherwise = e
 
-renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element]
+renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element]
 renumIds f renumMap = map (renumId f renumMap)
 
-findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text
-findAttrTextBy x = fmap T.pack . findAttrBy x
-
-lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text
-lookupAttrTextBy x = fmap T.pack . lookupAttrBy x
-
 -- | Certain characters are invalid in XML even if escaped.
 -- See #1992
-stripInvalidChars :: T.Text -> T.Text
+stripInvalidChars :: Text -> Text
 stripInvalidChars = T.filter isValidChar
 
 -- | See XML reference
@@ -217,7 +113,7 @@ writeDocx opts doc = do
   let doc' = Pandoc meta blocks'
 
   username <- P.lookupEnv "USERNAME"
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   oldUserDataDir <- P.getUserDataDir
   P.setUserDataDir Nothing
   res <- P.readDefaultDataFile "reference.docx"
@@ -234,11 +130,11 @@ writeDocx opts doc = do
 
   -- Gets the template size
   let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz"))
-  let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs
+  let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs
 
   let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar"))
-  let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs
-  let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs
+  let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs
+  let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs
 
   -- Get the available area (converting the size and the margins to int and
   -- doing the difference
@@ -250,24 +146,21 @@ writeDocx opts doc = do
 
   -- styles
   mblang <- toLang $ getLang opts meta
+  -- TODO FIXME avoid this generic traversal!
+  -- lang is in w:docDefaults /  w:rPr  /  w:lang
   let addLang :: Element -> Element
-      addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $
-                                 XMLC.fromElement e) <$> mblang of
-                    Just (Elem e') -> e'
-                    _              -> e -- return original
-        where go :: String -> Cursor -> Cursor
-              go l cursor = case XMLC.findRec (isLangElt . current) cursor of
-                              Nothing -> cursor
-                              Just t  -> XMLC.modifyContent (setval l) t
-              setval :: String -> Content -> Content
-              setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $
-                                               elAttribs e' }
-              setval _ x         = x
-              setvalattr :: String -> XML.Attr -> XML.Attr
-              setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
-              setvalattr _ x                                 = x
-              isLangElt (Elem e') = qName (elName e') == "lang"
-              isLangElt _         = False
+      addLang = case mblang of
+                  Nothing -> id
+                  Just l  -> everywhere (mkT (go (renderLang l)))
+        where
+          go :: Text -> Element -> Element
+          go l e'
+            | qName (elName e') == "lang"
+                = e'{ elAttribs = map (setvalattr l) $ elAttribs e' }
+            | otherwise = e'
+
+          setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
+          setvalattr _ x                                 = x
 
   let stylepath = "word/styles.xml"
   styledoc <- addLang <$> parseXml refArchive distArchive stylepath
@@ -337,12 +230,13 @@ writeDocx opts doc = do
 
   -- [Content_Types].xml
   let mkOverrideNode (part', contentType') = mknode "Override"
-               [("PartName",part'),("ContentType",contentType')] ()
+               [("PartName", T.pack part')
+               ,("ContentType", contentType')] ()
   let mkImageOverride (_, imgpath, mbMimeType, _) =
-          mkOverrideNode ("/word/" ++ imgpath,
-                          maybe "application/octet-stream" T.unpack mbMimeType)
+          mkOverrideNode ("/word/" <> imgpath,
+                          fromMaybe "application/octet-stream" mbMimeType)
   let mkMediaOverride imgpath =
-          mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath)
+          mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath)
   let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -369,13 +263,14 @@ writeDocx opts doc = do
                   ,("/word/footnotes.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
                   ] ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
                     map mkImageOverride imgs ++
-                    [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive
-                                        , "word/media/" `isPrefixOf` eRelativePath e ]
+                    [ mkMediaOverride (eRelativePath e)
+                        | e <- zEntries refArchive
+                        , "word/media/" `isPrefixOf` eRelativePath e ]
 
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
@@ -421,7 +316,7 @@ writeDocx opts doc = do
   let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
   let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
   let baserels = baserels' ++ renumHeaders ++ renumFooters
-  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
   let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
@@ -441,7 +336,7 @@ writeDocx opts doc = do
         Nothing      -> mknode "w:sectPr" [] ()
 
   -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
-  let contents' = contents ++ [sectpr]
+  let contents' = contents ++ [Elem sectpr]
   let docContents = mknode "w:document" stdAttributes
                     $ mknode "w:body" [] contents'
 
@@ -489,10 +384,10 @@ writeDocx opts doc = do
   numbering <- parseXml refArchive distArchive numpath
   let newNumElts = mkNumbering (stLists st)
   let pandocAdded e =
-       case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of
+       case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of
          Just numid -> numid >= (990 :: Int)
          Nothing    ->
-           case findAttrTextBy ((== "numId") . qName) e >>= safeRead of
+           case findAttrBy ((== "numId") . qName) e >>= safeRead of
              Just numid -> numid >= (1000 :: Int)
              Nothing    -> False
   let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering)
@@ -514,7 +409,7 @@ writeDocx opts doc = do
   let extraCoreProps = ["subject","lang","category","description"]
   let extraCorePropsMap = M.fromList $ zip extraCoreProps
                        ["dc:subject","dc:language","cp:category","dc:description"]
-  let lookupMetaString' :: T.Text -> Meta -> T.Text
+  let lookupMetaString' :: Text -> Meta -> Text
       lookupMetaString' key' meta' =
         case key' of
              "description"    -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
@@ -530,20 +425,21 @@ writeDocx opts doc = do
           : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta))
           : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
             | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
-          ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords)
+          ++ mknode "cp:keywords" [] (T.intercalate ", " keywords)
           : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-                   ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+                   ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
   -- docProps/custom.xml
-  let customProperties :: [(String, String)]
-      customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta)
+  let customProperties :: [(Text, Text)]
+      customProperties = [ (k, lookupMetaString k meta)
+                         | k <- M.keys (unMeta meta)
                          , k `notElem` (["title", "author", "keywords"]
                                        ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
+         ,("pid", tshow pid)
          ,("name", k)] $ mknode "vt:lpwstr" [] v
   let customPropsPath = "docProps/custom.xml"
   let customProps = mknode "Properties"
@@ -574,12 +470,27 @@ writeDocx opts doc = do
   -- adds references to footnotes or endnotes we don't have...
   -- we do, however, copy some settings over from reference
   let settingsPath = "word/settings.xml"
-      settingsList = [ "w:autoHyphenation"
-                     , "w:consecutiveHyphenLimit"
-                     , "w:hyphenationZone"
-                     , "w:doNotHyphenateCap"
-                     , "w:evenAndOddHeaders"
-                     , "w:proofState"
+      settingsList = [ "zoom"
+                     , "embedSystemFonts"
+                     , "doNotTrackMoves"
+                     , "defaultTabStop"
+                     , "drawingGridHorizontalSpacing"
+                     , "drawingGridVerticalSpacing"
+                     , "displayHorizontalDrawingGridEvery"
+                     , "displayVerticalDrawingGridEvery"
+                     , "characterSpacingControl"
+                     , "savePreviewPicture"
+                     , "mathPr"
+                     , "themeFontLang"
+                     , "decimalSymbol"
+                     , "listSeparator"
+                     , "autoHyphenation"
+                     , "consecutiveHyphenLimit"
+                     , "hyphenationZone"
+                     , "doNotHyphenateCap"
+                     , "evenAndOddHeaders"
+                     , "proofState"
+                     , "compat"
                      ]
   settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
 
@@ -593,7 +504,8 @@ writeDocx opts doc = do
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
   webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
   headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $
-                     mapMaybe extractTarget (headers ++ footers)
+                         mapMaybe (fmap T.unpack . extractTarget)
+                         (headers ++ footers)
   let miscRelEntries = [ e | e <- zEntries refArchive
                        , "word/_rels/" `isPrefixOf` eRelativePath e
                        , ".xml.rels" `isSuffixOf` eRelativePath e
@@ -619,8 +531,8 @@ newParaPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "paragraph")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyText")] ()
      , mknode "w:qFormat" [] ()
      ]
@@ -630,8 +542,8 @@ newTextPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "character")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
      ]
 
@@ -642,13 +554,14 @@ styleToOpenXml sm style =
         toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing
                         | otherwise = Just $
                           mknode "w:style" [("w:type","character"),
-                           ("w:customStyle","1"),("w:styleId",show toktype)]
-                             [ mknode "w:name" [("w:val",show toktype)] ()
+                           ("w:customStyle","1"),("w:styleId", tshow toktype)]
+                             [ mknode "w:name" [("w:val", tshow toktype)] ()
                              , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
                              , mknode "w:rPr" [] $
-                               [ mknode "w:color" [("w:val",tokCol toktype)] ()
+                               [ mknode "w:color" [("w:val", tokCol toktype)] ()
                                  | tokCol toktype /= "auto" ] ++
-                               [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+                               [ mknode "w:shd" [("w:val","clear")
+                                                ,("w:fill",tokBg toktype)] ()
                                  | tokBg toktype /= "auto" ] ++
                                [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
                                [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
@@ -656,10 +569,10 @@ styleToOpenXml sm style =
                              ]
         tokStyles = tokenStyles style
         tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles
-        tokCol toktype = maybe "auto" (drop 1 . fromColor)
+        tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenColor =<< M.lookup toktype tokStyles)
                            `mplus` defaultColor style
-        tokBg toktype = maybe "auto" (drop 1 . fromColor)
+        tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenBackground =<< M.lookup toktype tokStyles)
                            `mplus` backgroundColor style
         parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing
@@ -672,23 +585,25 @@ styleToOpenXml sm style =
                              , mknode "w:pPr" []
                                $ mknode "w:wordWrap" [("w:val","off")] ()
                                :
-                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style)
+                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style)
                              ]
 
-copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
+copyChildren :: (PandocMonad m)
+             => Archive -> Archive -> String -> Integer -> [Text] -> m Entry
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
+  let elsToCopy =
+        map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref
+  let elsToKeep =
+        [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)]
   return $ toEntry path timestamp $ renderXml dist{
-      elContent = elContent dist ++ copyContent ref
+      elContent = map Elem elsToKeep ++ map Elem elsToCopy
     }
   where
-    strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p++":"++name
-      | otherwise        = name
-    shouldCopy = (`elem` elNames) . strName
-    cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
-    copyContent = map cleanElem . filterChildrenName shouldCopy
+    hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) =
+      qName n1 == qName n2
+    cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}}
 
 -- this is the lowest number used for a list numId
 baseListId :: Int
@@ -697,43 +612,42 @@ baseListId = 1000
 mkNumbering :: [ListMarker] -> [Element]
 mkNumbering lists =
   elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
-    where elts = zipWith mkAbstractNum (ordNub lists) $
-                     randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848
+    where elts = map mkAbstractNum (ordNub lists)
 
 maxListLevel :: Int
 maxListLevel = 8
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
-  mknode "w:num" [("w:numId",show numid)]
+  mknode "w:num" [("w:numId",tshow numid)]
    $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
    : case marker of
        NoMarker     -> []
        BulletMarker -> []
        NumberMarker _ _ start ->
-          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
-              $ mknode "w:startOverride" [("w:val",show start)] ())
+          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))]
+              $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
-mkAbstractNum :: ListMarker -> Integer -> Element
-mkAbstractNum marker nsid =
+mkAbstractNum :: ListMarker -> Element
+mkAbstractNum marker =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
 
 mkLvl :: ListMarker -> Int -> Element
 mkLvl marker lvl =
-  mknode "w:lvl" [("w:ilvl",show lvl)] $
+  mknode "w:lvl" [("w:ilvl",tshow lvl)] $
     [ mknode "w:start" [("w:val",start)] ()
       | marker /= NoMarker && marker /= BulletMarker ] ++
     [ mknode "w:numFmt" [("w:val",fmt)] ()
-    , mknode "w:lvlText" [("w:val",lvltxt)] ()
+    , mknode "w:lvlText" [("w:val", lvltxt)] ()
     , mknode "w:lvlJc" [("w:val","left")] ()
     , mknode "w:pPr" []
-      [ mknode "w:ind" [ ("w:left",show $ lvl * step + step)
-                       , ("w:hanging",show (hang :: Int))
+      [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step)
+                       , ("w:hanging",tshow (hang :: Int))
                        ] ()
       ]
     ]
@@ -742,8 +656,8 @@ mkLvl marker lvl =
                  NoMarker             -> ("bullet"," ","1")
                  BulletMarker         -> ("bullet",bulletFor lvl,"1")
                  NumberMarker st de n -> (styleFor st lvl
-                                         ,patternFor de ("%" ++ show (lvl + 1))
-                                         ,show n)
+                                         ,patternFor de ("%" <> tshow (lvl + 1))
+                                         ,tshow n)
           step = 720
           hang = 480
           bulletFor 0 = "\x2022"  -- filled circle
@@ -766,9 +680,9 @@ mkLvl marker lvl =
           styleFor DefaultStyle 5 = "lowerRoman"
           styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6)
           styleFor _ _            = "decimal"
-          patternFor OneParen s  = s ++ ")"
-          patternFor TwoParens s = "(" ++ s ++ ")"
-          patternFor _ s         = s ++ "."
+          patternFor OneParen s  = s <> ")"
+          patternFor TwoParens s = "(" <> s <> ")"
+          patternFor _ s         = s <> "."
 
 getNumId :: (PandocMonad m) => WS m Int
 getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
@@ -776,8 +690,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
 
 makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
 makeTOC opts = do
-  let depth = "1-"++show (writerTOCDepth opts)
-  let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+  let depth = "1-" <> tshow (writerTOCDepth opts)
+  let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u"
   tocTitle <- gets stTocTitle
   title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
   return
@@ -788,7 +702,7 @@ makeTOC opts = do
           mknode "w:docPartUnique" [] ()]
          -- w:docPartObj
       ), -- w:sdtPr
-      mknode "w:sdtContent" [] (title++[
+      mknode "w:sdtContent" [] (title ++ [ Elem $
         mknode "w:p" [] (
           mknode "w:r" [] [
             mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
@@ -802,7 +716,9 @@ makeTOC opts = do
 
 -- | Convert Pandoc document to two lists of
 -- OpenXML elements (the main document and footnotes).
-writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element])
+writeOpenXML :: (PandocMonad m)
+             => WriterOptions -> Pandoc
+             -> WS m ([Content], [Element], [Element])
 writeOpenXML opts (Pandoc meta blocks) = do
   let tit = docTitle meta
   let auths = docAuthors meta
@@ -828,8 +744,9 @@ writeOpenXML opts (Pandoc meta blocks) = do
   let toComment (kvs, ils) = do
         annotation <- inlinesToOpenXML opts ils
         return $
-          mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
+          mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs]
             [ mknode "w:p" [] $
+              map Elem
               [ mknode "w:pPr" []
                 [ mknode "w:pStyle" [("w:val", "CommentText")] () ]
               , mknode "w:r" []
@@ -844,11 +761,11 @@ writeOpenXML opts (Pandoc meta blocks) = do
   toc <- if includeTOC
             then makeTOC opts
             else return []
-  let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
+  let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ map Elem toc
   return (meta' ++ doc', notes', comments')
 
 -- | Convert a list of Pandoc blocks to OpenXML.
-blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
+blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Content]
 blocksToOpenXML opts = fmap concat . mapM (blockToOpenXML opts) . separateTables
 
 -- Word combines adjacent tables unless you put an empty paragraph between
@@ -859,35 +776,29 @@ separateTables (x@Table{}:xs@(Table{}:_)) =
   x : RawBlock (Format "openxml") "<w:p />" : separateTables xs
 separateTables (x:xs) = x : separateTables xs
 
-pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
-pStyleM styleName = do
-  pStyleMap <- gets (smParaStyle . stStyleMaps)
-  let sty' = getStyleIdFromName styleName pStyleMap
-  return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
-
 rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
 rStyleM styleName = do
   cStyleMap <- gets (smCharStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName cStyleMap
-  return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] ()
 
-getUniqueId :: (PandocMonad m) => WS m String
+getUniqueId :: (PandocMonad m) => WS m Text
 -- the + 20 is to ensure that there are no clashes with the rIds
 -- already in word/document.xml.rel
 getUniqueId = do
   n <- gets stCurId
   modify $ \st -> st{stCurId = n + 1}
-  return $ show n
+  return $ tshow n
 
 -- | Key for specifying user-defined docx styles.
-dynamicStyleKey :: T.Text
+dynamicStyleKey :: Text
 dynamicStyleKey = "custom-style"
 
 -- | Convert a Pandoc block element to OpenXML.
-blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
 blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
 
-blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
 blockToOpenXML' _ Null = return []
 blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do
   stylemod <- case lookup dynamicStyleKey kvs of
@@ -921,18 +832,18 @@ blockToOpenXML' opts (Header lev (ident,_,kvs) lst) = do
                 Just n -> do
                    num <- withTextPropM (rStyleM "SectionNumber")
                             (inlineToOpenXML opts (Str n))
-                   return $ num ++ [mknode "w:r" [] [mknode "w:tab" [] ()]]
+                   return $ num ++ [Elem $ mknode "w:r" [] [mknode "w:tab" [] ()]]
                 Nothing -> return []
            else return []
   contents <- (number ++) <$> inlinesToOpenXML opts lst
   if T.null ident
-     then return [mknode "w:p" [] (paraProps ++ contents)]
+     then return [Elem $ mknode "w:p" [] (map Elem paraProps ++ contents)]
      else do
        let bookmarkName = ident
        modify $ \s -> s{ stSectionIds = Set.insert bookmarkName
                                       $ stSectionIds s }
        bookmarkedContents <- wrapBookmark bookmarkName contents
-       return [mknode "w:p" [] (paraProps ++ bookmarkedContents)]
+       return [Elem $ mknode "w:p" [] (map Elem paraProps ++ bookmarkedContents)]
 blockToOpenXML' opts (Plain lst) = do
   isInTable <- gets stInTable
   isInList <- gets stInList
@@ -944,15 +855,31 @@ blockToOpenXML' opts (Plain lst) = do
 -- title beginning with fig: indicates that the image is a figure
 blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do
   setFirstPara
+  fignum <- gets stNextFigureNum
+  unless (null alt) $ modify $ \st -> st{ stNextFigureNum = fignum + 1 }
+  let figid = "fig" <> tshow fignum
+  figname <- translateTerm Term.Figure
   prop <- pStyleM $
         if null alt
         then "Figure"
         else "Captioned Figure"
   paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False)
   contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
-  captionNode <- withParaPropM (pStyleM "Image Caption")
-                 $ blockToOpenXML opts (Para alt)
-  return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
+  captionNode <- if null alt
+                    then return []
+                    else withParaPropM (pStyleM "Image Caption")
+                         $ blockToOpenXML opts
+                            (Para $ Span (figid,[],[])
+                               [Str (figname <> "\160"),
+                                RawInline (Format "openxml")
+                                ("<w:fldSimple w:instr=\"SEQ Figure"
+                                <> " \\* ARABIC \"><w:r><w:t>"
+                                <> tshow fignum
+                                <> "</w:t></w:r></w:fldSimple>"),
+                                Str ":", Space] : alt)
+  return $
+    Elem (mknode "w:p" [] (map Elem paraProps ++ contents))
+    : captionNode
 blockToOpenXML' opts (Para lst)
   | null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
   | otherwise = do
@@ -969,10 +896,12 @@ blockToOpenXML' opts (Para lst)
             ps               -> ps
       modify $ \s -> s { stFirstPara = False }
       contents <- inlinesToOpenXML opts lst
-      return [mknode "w:p" [] (paraProps' ++ contents)]
+      return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)]
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
-  | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | format == Format "openxml" = return [
+        Text (CData CDataRaw str Nothing)
+      ]
   | otherwise                  = do
       report $ BlockNotRendered b
       return []
@@ -987,73 +916,14 @@ blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do
   wrapBookmark ident p
 blockToOpenXML' _ HorizontalRule = do
   setFirstPara
-  return [
+  return [ Elem $
     mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
     $ mknode "v:rect" [("style","width:0;height:1.5pt"),
                        ("o:hralign","center"),
                        ("o:hrstd","t"),("o:hr","t")] () ]
-blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
-  let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  setFirstPara
-  modify $ \s -> s { stInTable = True }
-  let captionStr = stringify caption
-  caption' <- if null caption
-                 then return []
-                 else withParaPropM (pStyleM "Table Caption")
-                      $ blockToOpenXML opts (Para caption)
-  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-  -- Table cells require a <w:p> element, even an empty one!
-  -- Not in the spec but in Word 2007, 2010. See #4953.
-  let cellToOpenXML (al, cell) = do
-        es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
-        return $ if any (\e -> qName (elName e) == "p") es
-           then es
-           else es ++ [mknode "w:p" [] ()]
-  headers' <- mapM cellToOpenXML $ zip aligns headers
-  rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  let borderProps = mknode "w:tcPr" []
-                    [ mknode "w:tcBorders" []
-                      $ mknode "w:bottom" [("w:val","single")] ()
-                    , mknode "w:vAlign" [("w:val","bottom")] () ]
-  compactStyle <- pStyleM "Compact"
-  let emptyCell' = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell border contents = mknode "w:tc" []
-                            $ [ borderProps | border ] ++
-                            if null contents
-                               then emptyCell'
-                               else contents
-  let mkrow border cells = mknode "w:tr" [] $
-                        [mknode "w:trPr" [] [
-                          mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
-                        ++ map (mkcell border) cells
-  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
-  let fullrow = 5000 -- 100% specified in pct
-  let rowwidth = fullrow * sum widths
-  let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", show (floor (textwidth * w) :: Integer))] ()
-  let hasHeader = not $ all null headers
-  modify $ \s -> s { stInTable = False }
-  return $
-    caption' ++
-    [mknode "w:tbl" []
-      ( mknode "w:tblPr" []
-        (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
-            mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
-                               ,("w:lastRow","0")
-                               ,("w:firstColumn","0")
-                               ,("w:lastColumn","0")
-                               ,("w:noHBand","0")
-                               ,("w:noVBand","0")] () :
-          [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
-          | not (null caption) ] )
-      : mknode "w:tblGrid" []
-        (if all (==0) widths
-            then []
-            else map mkgridcol widths)
-      : [ mkrow True headers' | hasHeader ] ++
-      map (mkrow False) rows'
-      )]
+blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) =
+  tableToOpenXML (blocksToOpenXML opts)
+                 (Grid.toTable attr caption colspecs thead tbodies tfoot)
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
   | OrderedList (start, numstyle, numdelim) lst <- el
@@ -1070,7 +940,9 @@ blockToOpenXML' opts (DefinitionList items) = do
   setFirstPara
   return l
 
-definitionListItemToOpenXML  :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
+definitionListItemToOpenXML  :: (PandocMonad m)
+                             => WriterOptions -> ([Inline],[[Block]])
+                             -> WS m [Content]
 definitionListItemToOpenXML opts (term,defs) = do
   term' <- withParaPropM (pStyleM "Definition Term")
            $ blockToOpenXML opts (Para term)
@@ -1083,8 +955,11 @@ addList marker = do
   lists <- gets stLists
   modify $ \st -> st{ stLists = lists ++ [marker] }
 
-listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
-listItemToOpenXML _ _ []                   = return []
+listItemToOpenXML :: (PandocMonad m)
+                  => WriterOptions
+                  -> Int -> [Block]
+                  -> WS m [Content]
+listItemToOpenXML _ _ []                  = return []
 listItemToOpenXML opts numid (first:rest) = do
   oldInList <- gets stInList
   modify $ \st -> st{ stInList = True }
@@ -1103,15 +978,8 @@ listItemToOpenXML opts numid (first:rest) = do
   modify $ \st -> st{ stInList = oldInList }
   return $ first'' ++ rest''
 
-alignmentToString :: Alignment -> [Char]
-alignmentToString alignment = case alignment of
-                                 AlignLeft    -> "left"
-                                 AlignRight   -> "right"
-                                 AlignCenter  -> "center"
-                                 AlignDefault -> "left"
-
 -- | Convert a list of inline elements to OpenXML.
-inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
+inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content]
 inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
 
 withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
@@ -1120,10 +988,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid }
 asList :: (PandocMonad m) => WS m a -> WS m a
 asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
 
-isStyle :: Element -> Bool
-isStyle e = isElem [] "w" "rStyle" e ||
-            isElem [] "w" "pStyle" e
-
 getTextProps :: (PandocMonad m) => WS m [Element]
 getTextProps = do
   props <- asks envTextProperties
@@ -1146,23 +1010,13 @@ getParaProps displayMathPara = do
   listLevel <- asks envListLevel
   numid <- asks envListNumId
   let listPr = [mknode "w:numPr" []
-                [ mknode "w:ilvl" [("w:val",show listLevel)] ()
-                , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara]
+                [ mknode "w:ilvl" [("w:val",tshow listLevel)] ()
+                , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara]
   return $ case listPr ++ squashProps props of
                 [] -> []
                 ps -> [mknode "w:pPr" [] ps]
 
-withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
-withParaProp d p =
-  local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
-  where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
-
-withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
-withParaPropM md p = do
-  d <- md
-  withParaProp d p
-
-formattedString :: PandocMonad m => T.Text -> WS m [Element]
+formattedString :: PandocMonad m => Text -> WS m [Element]
 formattedString str =
   -- properly handle soft hyphens
   case splitTextBy (=='\173') str of
@@ -1171,7 +1025,7 @@ formattedString str =
          sh <- formattedRun [mknode "w:softHyphen" [] ()]
          intercalate sh <$> mapM formattedString' ws
 
-formattedString' :: PandocMonad m => T.Text -> WS m [Element]
+formattedString' :: PandocMonad m => Text -> WS m [Element]
 formattedString' str = do
   inDel <- asks envInDel
   formattedRun [ mktnode (if inDel then "w:delText" else "w:t")
@@ -1182,16 +1036,13 @@ formattedRun els = do
   props <- getTextProps
   return [ mknode "w:r" [] $ props ++ els ]
 
-setFirstPara :: PandocMonad m => WS m ()
-setFirstPara =  modify $ \s -> s { stFirstPara = True }
-
 -- | Convert an inline element to OpenXML.
-inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
 inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
 
-inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
 inlineToOpenXML' _ (Str str) =
-  formattedString str
+  map Elem <$> formattedString str
 inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
 inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
 inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
@@ -1199,10 +1050,11 @@ inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
 inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) =
   inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
-  ([mknode "w:r" []
-    (mknode "w:t"
-      [("xml:space","preserve")]
-      ("\t" :: String))] ++)
+   ([Elem $
+     mknode "w:r" []
+     (mknode "w:t"
+       [("xml:space","preserve")]
+       ("\t" :: Text))] ++)
     <$> inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
   inlinesToOpenXML opts ils
@@ -1212,18 +1064,18 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
   let ident' = fromMaybe ident (lookup "id" kvs)
       kvs' = filter (("id" /=) . fst) kvs
   modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
-  return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
+  return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ]
 inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
   -- prefer the "id" in kvs, since that is the one produced by the docx
   -- reader.
   let ident' = fromMaybe ident (lookup "id" kvs)
-  in
-    return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
-           , mknode "w:r" []
-             [ mknode "w:rPr" []
-               [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
-             , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
-           ]
+  in return . map Elem $
+     [ mknode "w:commentRangeEnd" [("w:id", ident')] ()
+     , mknode "w:r" []
+       [ mknode "w:rPr" []
+         [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
+       , mknode "w:commentReference" [("w:id", ident')] () ]
+     ]
 inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
   stylemod <- case lookup dynamicStyleKey kvs of
                    Just (fromString . T.unpack -> sty) -> do
@@ -1246,8 +1098,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
         defaultAuthor <- asks envChangesAuthor
         let author = fromMaybe defaultAuthor (lookup "author" kvs)
         let mdate = lookup "date" kvs
-        return $ ("w:author", T.unpack author) :
-                   maybe [] (\date -> [("w:date", T.unpack date)]) mdate
+        return $ ("w:author", author) :
+                   maybe [] (\date -> [("w:date", date)]) mdate
   insmod <- if "insertion" `elem` classes
                then do
                  changeAuthorDate <- getChangeAuthorDate
@@ -1255,8 +1107,9 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  modify $ \s -> s{stInsId = insId + 1}
                  return $ \f -> do
                    x <- f
-                   return [ mknode "w:ins"
-                              (("w:id", show insId) : changeAuthorDate) x]
+                   return [Elem $
+                           mknode "w:ins"
+                             (("w:id", tshow insId) : changeAuthorDate) x]
                else return id
   delmod <- if "deletion" `elem` classes
                then do
@@ -1265,16 +1118,20 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  modify $ \s -> s{stDelId = delId + 1}
                  return $ \f -> local (\env->env{envInDel=True}) $ do
                    x <- f
-                   return [mknode "w:del"
-                           (("w:id", show delId) : changeAuthorDate) x]
+                   return [Elem $ mknode "w:del"
+                             (("w:id", tshow delId) : changeAuthorDate) x]
                else return id
   contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
                      $ inlinesToOpenXML opts ils
   wrapBookmark ident contents
 inlineToOpenXML' opts (Strong lst) =
-  withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
+  withTextProp (mknode "w:b" [] ()) $
+  withTextProp (mknode "w:bCs" [] ()) $ -- needed for LTR, #6911
+  inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Emph lst) =
-  withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
+  withTextProp (mknode "w:i" [] ()) $
+  withTextProp (mknode "w:iCs" [] ()) $  -- needed for LTR, #6911
+  inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Underline lst) =
   withTextProp (mknode "w:u" [("w:val","single")] ()) $
     inlinesToOpenXML opts lst
@@ -1290,9 +1147,10 @@ inlineToOpenXML' opts (SmallCaps lst) =
 inlineToOpenXML' opts (Strikeout lst) =
   withTextProp (mknode "w:strike" [] ())
   $ inlinesToOpenXML opts lst
-inlineToOpenXML' _ LineBreak = return [br]
+inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
-  | f == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | f == Format "openxml" = return
+                            [Text (CData CDataRaw str Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
@@ -1305,26 +1163,26 @@ inlineToOpenXML' opts (Math mathType str) = do
   when (mathType == DisplayMath) setFirstPara
   res <- (lift . lift) (convertMath writeOMML mathType str)
   case res of
-       Right r -> return [r]
+       Right r -> return [Elem $ fromXLElement r]
        Left il -> inlineToOpenXML' opts il
 inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Code attrs str) = do
   let alltoktypes = [KeywordTok ..]
   tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes
-  let unhighlighted = intercalate [br] `fmap`
+  let unhighlighted = (map Elem . intercalate [br]) `fmap`
                        mapM formattedString (T.lines str)
       formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
       toHlTok (toktype,tok) =
         mknode "w:r" []
           [ mknode "w:rPr" [] $
             maybeToList (lookup toktype tokTypesMap)
-            , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+            , mknode "w:t" [("xml:space","preserve")] tok ]
   withTextPropM (rStyleM "Verbatim Char")
     $ if isNothing (writerHighlightStyle opts)
           then unhighlighted
           else case highlight (writerSyntaxMap opts)
                       formatOpenXML attrs str of
-                    Right h  -> return h
+                    Right h  -> return (map Elem h)
                     Left msg -> do
                       unless (T.null msg) $ report $ CouldNotHighlight msg
                       unhighlighted
@@ -1335,7 +1193,7 @@ inlineToOpenXML' opts (Note bs) = do
   let notemarker = mknode "w:r" []
                    [ mknode "w:rPr" [] footnoteStyle
                    , mknode "w:footnoteRef" [] () ]
-  let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker
+  let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
   let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
       insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : Space : ils) : xs
       insertNoteRef xs               = Para [notemarkerXml] : xs
@@ -1347,26 +1205,26 @@ inlineToOpenXML' opts (Note bs) = do
                 $ insertNoteRef bs)
   let newnote = mknode "w:footnote" [("w:id", notenum)] contents
   modify $ \s -> s{ stFootnotes = newnote : notes }
-  return [ mknode "w:r" []
+  return [ Elem $ mknode "w:r" []
            [ mknode "w:rPr" [] footnoteStyle
            , mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
 -- internal link:
 inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   return
-    [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
+    [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
 -- external link:
 inlineToOpenXML' opts (Link _ txt (src,_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   extlinks <- gets stExternalLinks
-  id' <- case M.lookup (T.unpack src) extlinks of
+  id' <- case M.lookup src extlinks of
             Just i   -> return i
             Nothing  -> do
-              i <- ("rId"++) `fmap` getUniqueId
+              i <- ("rId" <>) <$> getUniqueId
               modify $ \st -> st{ stExternalLinks =
-                        M.insert (T.unpack src) i extlinks }
+                        M.insert src i extlinks }
               return i
-  return [ mknode "w:hyperlink" [("r:id",id')] contents ]
+  return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
   pageWidth <- asks envPrintWidth
   imgs <- gets stImages
@@ -1384,17 +1242,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
                                              ,("noChangeAspect","1")] ()
         nvPicPr  = mknode "pic:nvPicPr" []
                         [ mknode "pic:cNvPr"
-                            [("descr",T.unpack src),("id","0"),("name","Picture")] ()
+                            [("descr",src),("id","0"),("name","Picture")] ()
                         , cNvPicPr ]
         blipFill = mknode "pic:blipFill" []
-          [ mknode "a:blip" [("r:embed",ident)] ()
+          [ mknode "a:blip" [("r:embed",T.pack ident)] ()
           , mknode "a:stretch" [] $
               mknode "a:fillRect" [] ()
           ]
         xfrm =    mknode "a:xfrm" []
                         [ mknode "a:off" [("x","0"),("y","0")] ()
-                        , mknode "a:ext" [("cx",show xemu)
-                                         ,("cy",show yemu)] () ]
+                        , mknode "a:ext" [("cx",tshow xemu)
+                                         ,("cy",tshow yemu)] () ]
         prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                          mknode "a:avLst" [] ()
         ln =      mknode "a:ln" [("w","9525")]
@@ -1415,12 +1273,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt = mknode "w:r" [] $
           mknode "w:drawing" [] $
             mknode "wp:inline" []
-              [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+              [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] ()
               , mknode "wp:effectExtent"
                 [("b","0"),("l","0"),("r","0"),("t","0")] ()
               , mknode "wp:docPr"
-                [ ("descr", T.unpack $ stringify alt)
-                , ("title", T.unpack title)
+                [ ("descr", stringify alt)
+                , ("title", title)
                 , ("id","1")
                 , ("name","Picture")
                 ] ()
@@ -1430,10 +1288,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt
 
   wrapBookmark imgident =<< case stImage of
-    Just imgData -> return [generateImgElt imgData]
+    Just imgData -> return [Elem $ generateImgElt imgData]
     Nothing -> ( do --try
       (img, mt) <- P.fetchItem src
-      ident <- ("rId"++) `fmap` getUniqueId
+      ident <- ("rId" <>) <$> getUniqueId
 
       let
         imgext = case mt >>= extensionFromMimeType of
@@ -1446,11 +1304,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
             Just Eps  -> ".eps"
             Just Svg  -> ".svg"
             Just Emf  -> ".emf"
+            Just Tiff -> ".tiff"
             Nothing   -> ""
-        imgpath = "media/" <> ident <> T.unpack imgext
-        mbMimeType = mt <|> getMimeType imgpath
+        imgpath = "media/" <> ident <> imgext
+        mbMimeType = mt <|> getMimeType (T.unpack imgpath)
 
-        imgData = (ident, imgpath, mbMimeType, img)
+        imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img)
 
       if T.null imgext
          then -- without an extension there is no rule for content type
@@ -1458,7 +1317,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
          else do
            -- insert mime type to use in constructing [Content_Types].xml
            modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st }
-           return [generateImgElt imgData]
+           return [Elem $ generateImgElt imgData]
       )
       `catchError` ( \e -> do
         report $ CouldNotFetchResource src $ T.pack (show e)
@@ -1469,22 +1328,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
 br :: Element
 br = mknode "w:r" [] [mknode "w:br" [] ()]
 
--- Word will insert these footnotes into the settings.xml file
--- (whether or not they're visible in the document). If they're in the
--- file, but not in the footnotes.xml file, it will produce
--- problems. So we want to make sure we insert them into our document.
-defaultFootnotes :: [Element]
-defaultFootnotes = [ mknode "w:footnote"
-                     [("w:type", "separator"), ("w:id", "-1")]
-                     [ mknode "w:p" []
-                       [mknode "w:r" []
-                        [ mknode "w:separator" [] ()]]]
-                   , mknode "w:footnote"
-                     [("w:type", "continuationSeparator"), ("w:id", "0")]
-                     [ mknode "w:p" []
-                       [ mknode "w:r" []
-                         [ mknode "w:continuationSeparator" [] ()]]]]
-
 
 withDirection :: PandocMonad m => WS m a -> WS m a
 withDirection x = do
@@ -1508,20 +1351,20 @@ withDirection x = do
                                     , envTextProperties = EnvProps textStyle textProps'
                                     }
 
-wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element]
+wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content]
 wrapBookmark "" contents = return contents
 wrapBookmark ident contents = do
   id' <- getUniqueId
   let bookmarkStart = mknode "w:bookmarkStart"
                        [("w:id", id')
-                       ,("w:name", T.unpack $ toBookmarkName ident)] ()
+                       ,("w:name", toBookmarkName ident)] ()
       bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
-  return $ bookmarkStart : contents ++ [bookmarkEnd]
+  return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
 
 -- Word imposes a 40 character limit on bookmark names and requires
 -- that they begin with a letter.  So we just use a hash of the
 -- identifier when otherwise we'd have an illegal bookmark name.
-toBookmarkName :: T.Text -> T.Text
+toBookmarkName :: Text -> Text
 toBookmarkName s
   | Just (c, _) <- T.uncons s
   , isLetter c
diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
index c3c54c7e5..04868eaad 100644
--- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
@@ -2,7 +2,7 @@
 {- |
    Module : Text.Pandoc.Writers.Docx.StyleMap
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015-2019 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
new file mode 100644
index 000000000..7a84c5278
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -0,0 +1,227 @@
+{-# LANGUAGE LambdaCase          #-}
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{- |
+Module      : Text.Pandoc.Writers.Docx.Table
+Copyright   : Copyright (C) 2012-2021 John MacFarlane
+License     : GNU GPL, version 2 or above
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Table
+  ( tableToOpenXML
+  ) where
+
+import Control.Monad.State.Strict
+import Data.Array
+import Data.Text (Text)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm)
+import Text.Pandoc.Writers.Docx.Types
+import Text.Pandoc.Shared
+import Text.Printf (printf)
+import Text.Pandoc.Writers.GridTable hiding (Table)
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML hiding (Attr)
+import qualified Data.Text as T
+import qualified Text.Pandoc.Translations as Term
+import qualified Text.Pandoc.Writers.GridTable as Grid
+
+tableToOpenXML :: PandocMonad m
+               => ([Block] -> WS m [Content])
+               -> Grid.Table
+               -> WS m [Content]
+tableToOpenXML blocksToOpenXML gridTable = do
+  setFirstPara
+  let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) =
+        gridTable
+  let (Caption _maybeShortCaption captionBlocks) = caption
+  tablenum <- gets stNextTableNum
+  unless (null captionBlocks) $
+    modify $ \st -> st{ stNextTableNum = tablenum + 1 }
+  let tableid = if T.null ident
+                   then "table" <> tshow tablenum
+                   else ident
+  tablename <- translateTerm Term.Table
+  let captionStr = stringify captionBlocks
+  let aligns = map fst $ elems colspecs
+  captionXml <- if null captionBlocks
+                then return []
+                else withParaPropM (pStyleM "Table Caption")
+                     $ blocksToOpenXML
+                     $ addLabel tableid tablename tablenum captionBlocks
+  -- We set "in table" after processing the caption, because we don't
+  -- want the "Table Caption" style to be overwritten with "Compact".
+  modify $ \s -> s { stInTable = True }
+  head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot
+
+  let hasHeader = not . null . indices . partRowAttrs $ thead
+  let hasFooter = not . null . indices . partRowAttrs $ tfoot
+  -- for compatibility with Word <= 2007, we include a val with a bitmask
+  -- 0×0020  Apply first row conditional formatting
+  -- 0×0040  Apply last row conditional formatting
+  -- 0×0080  Apply first column conditional formatting
+  -- 0×0100  Apply last column conditional formatting
+  -- 0×0200  Do not apply row banding conditional formatting
+  -- 0×0400  Do not apply column banding conditional formattin
+  let tblLookVal = if hasHeader then (0x20 :: Int) else 0
+  let (gridCols, tblWattr) = tableLayout (elems colspecs)
+  let tbl = mknode "w:tbl" []
+        ( mknode "w:tblPr" []
+          ( mknode "w:tblStyle" [("w:val","Table")] () :
+            mknode "w:tblW" tblWattr () :
+            mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
+                               ,("w:lastRow",if hasFooter then "1" else "0")
+                               ,("w:firstColumn","0")
+                               ,("w:lastColumn","0")
+                               ,("w:noHBand","0")
+                               ,("w:noVBand","0")
+                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
+                               ] () :
+            [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+            | not (T.null captionStr) ]
+          )
+          : mknode "w:tblGrid" [] gridCols
+          : head' ++ mconcat bodies ++ foot'
+        )
+  modify $ \s -> s { stInTable = False }
+  return $ captionXml ++ [Elem tbl]
+
+addLabel :: Text -> Text -> Int -> [Block] -> [Block]
+addLabel tableid tablename tablenum bs =
+  case bs of
+    (Para ils : rest)  -> Para (label : Space : ils) : rest
+    (Plain ils : rest) -> Plain (label : Space : ils) : rest
+    _ -> Para [label] : bs
+ where
+  label = Span (tableid,[],[])
+            [Str (tablename <> "\160"),
+             RawInline (Format "openxml")
+               ("<w:fldSimple w:instr=\"SEQ Table"
+               <> " \\* ARABIC \"><w:r><w:t>"
+               <> tshow tablenum
+               <> "</w:t></w:r></w:fldSimple>"),
+             Str ":"]
+
+-- | Parts of a table
+data RowType = HeadRow | BodyRow | FootRow
+
+alignmentToString :: Alignment -> Text
+alignmentToString = \case
+  AlignLeft    -> "left"
+  AlignRight   -> "right"
+  AlignCenter  -> "center"
+  AlignDefault -> "left"
+
+tableLayout :: [ColSpec] -> ([Element], [(Text, Text)])
+tableLayout specs =
+  let
+    textwidth = 7920  -- 5.5 in in twips       (1 twip == 1/20 pt)
+    fullrow   = 5000  -- 100% specified in pct (1 pct  == 1/50th of a percent)
+    ncols = length specs
+    getWidth = \case
+      ColWidth n -> n
+      _          -> 0
+    widths = map (getWidth . snd) specs
+    rowwidth  = round (fullrow * sum widths) :: Int
+    widthToTwips w = floor (textwidth * w)   :: Int
+    mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] ()
+  in if all (== 0) widths
+     then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols)
+          , [ ("w:type", "auto"), ("w:w", "0")])
+     else ( map mkGridCol widths
+          , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
+
+cellGridToOpenXML :: PandocMonad m
+                  => ([Block] -> WS m [Content])
+                  -> RowType
+                  -> [Alignment]
+                  -> Part
+                  -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ cellArray _) =
+  if null (elems cellArray)
+  then return mempty
+  else mapM (rowToOpenXML blocksToOpenXML) $
+       partToRows rowType aligns part
+
+data OOXMLCell
+  = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
+  | OOXMLCellMerge ColSpan
+
+data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell]
+
+partToRows :: RowType -> [Alignment] -> Part -> [OOXMLRow]
+partToRows rowType aligns part =
+  let
+    toOOXMLCell :: Alignment -> RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
+    toOOXMLCell columnAlign ridx cidx = \case
+      ContentCell attr align rowspan colspan blocks ->
+        -- Respect non-default, cell specific alignment.
+        let align' = case align of
+              AlignDefault -> columnAlign
+              _            -> align
+        in [OOXMLCell attr align' rowspan colspan blocks]
+      ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' ->
+        case (partCellArray part)!idx' of
+          (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan]
+          x -> error $ "Content cell expected, got, " ++ show x ++
+                       " at index " ++ show idx'
+      _ -> mempty
+    mkRow :: (RowIndex, Attr) -> OOXMLRow
+    mkRow (ridx, attr) = OOXMLRow rowType attr
+                       . mconcat
+                       . zipWith (\align -> uncurry $ toOOXMLCell align ridx)
+                                 aligns
+                       . assocs
+                       . rowArray ridx
+                       $ partCellArray part
+  in map mkRow $ assocs (partRowAttrs part)
+
+rowToOpenXML :: PandocMonad m
+             => ([Block] -> WS m [Content])
+             -> OOXMLRow
+             -> WS m Element
+rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do
+  xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+  let addTrPr = case rowType of
+        HeadRow -> (mknode "w:trPr" []
+                    [mknode "w:tblHeader" [("w:val", "true")] ()] :)
+        BodyRow -> id
+        FootRow -> id
+  return $ mknode "w:tr" [] (addTrPr xmlcells)
+
+ooxmlCellToOpenXML :: PandocMonad m
+                   => ([Block] -> WS m [Content])
+                   -> OOXMLCell
+                   -> WS m Element
+ooxmlCellToOpenXML blocksToOpenXML = \case
+  OOXMLCellMerge (ColSpan colspan) -> do
+    return $ mknode "w:tc" []
+      [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                           , mknode "w:vMerge"   [("w:val", "continue")] () ]
+      , mknode "w:p" [] [mknode "w:pPr" [] ()]]
+  OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
+    compactStyle <- pStyleM "Compact"
+    es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
+    -- Table cells require a <w:p> element, even an empty one!
+    -- Not in the spec but in Word 2007, 2010. See #4953. And
+    -- apparently the last element must be a <w:p>, see #6983.
+    return . mknode "w:tc" [] $
+      Elem
+       (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                            | colspan > 1] ++
+                            [ mknode "w:vMerge" [("w:val", "restart")] ()
+                            | rowspan > RowSpan 1 ])) :
+      if null contents
+      then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+      else case reverse (onlyElems es) of
+             b:e:_ | qName (elName b) == "bookmarkEnd"  -- y tho?
+                   , qName (elName e) == "p" -> es
+             e:_   | qName (elName e) == "p" -> es
+             _ -> es ++ [Elem $ mknode "w:p" [] ()]
+
+alignmentFor :: Alignment -> Element
+alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
new file mode 100644
index 000000000..74b8d2753
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -0,0 +1,185 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+Module      : Text.Pandoc.Writers.Docx
+Copyright   : Copyright (C) 2012-2021 John MacFarlane
+License     : GNU GPL, version 2 or above
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Types
+  ( EnvProps (..)
+  , WriterEnv (..)
+  , defaultWriterEnv
+  , WriterState (..)
+  , defaultWriterState
+  , WS
+  , ListMarker (..)
+  , listMarkerToId
+  , pStyleM
+  , isStyle
+  , setFirstPara
+  , withParaProp
+  , withParaPropM
+  ) where
+
+import Control.Applicative ((<|>))
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Text (Text)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.MIME (MimeType)
+import Text.Pandoc.Writers.Docx.StyleMap
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import qualified Data.ByteString as B
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Data.Text as T
+
+data ListMarker = NoMarker
+                | BulletMarker
+                | NumberMarker ListNumberStyle ListNumberDelim Int
+                deriving (Show, Read, Eq, Ord)
+
+listMarkerToId :: ListMarker -> Text
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
+listMarkerToId (NumberMarker sty delim n) = T.pack $
+  '9' : '9' : styNum : delimNum : show n
+  where styNum = case sty of
+                      DefaultStyle -> '2'
+                      Example      -> '3'
+                      Decimal      -> '4'
+                      LowerRoman   -> '5'
+                      UpperRoman   -> '6'
+                      LowerAlpha   -> '7'
+                      UpperAlpha   -> '8'
+        delimNum = case delim of
+                      DefaultDelim -> '0'
+                      Period       -> '1'
+                      OneParen     -> '2'
+                      TwoParens    -> '3'
+
+
+data EnvProps = EnvProps{ styleElement  :: Maybe Element
+                        , otherElements :: [Element]
+                        }
+
+instance Semigroup EnvProps where
+  EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
+
+instance Monoid EnvProps where
+  mempty = EnvProps Nothing []
+  mappend = (<>)
+
+data WriterEnv = WriterEnv
+  { envTextProperties :: EnvProps
+  , envParaProperties :: EnvProps
+  , envRTL            :: Bool
+  , envListLevel      :: Int
+  , envListNumId      :: Int
+  , envInDel          :: Bool
+  , envChangesAuthor  :: Text
+  , envChangesDate    :: Text
+  , envPrintWidth     :: Integer
+  }
+
+defaultWriterEnv :: WriterEnv
+defaultWriterEnv = WriterEnv
+  { envTextProperties = mempty
+  , envParaProperties = mempty
+  , envRTL = False
+  , envListLevel = -1
+  , envListNumId = 1
+  , envInDel = False
+  , envChangesAuthor  = "unknown"
+  , envChangesDate    = "1969-12-31T19:00:00Z"
+  , envPrintWidth     = 1
+  }
+
+
+data WriterState = WriterState{
+         stFootnotes      :: [Element]
+       , stComments       :: [([(Text, Text)], [Inline])]
+       , stSectionIds     :: Set.Set Text
+       , stExternalLinks  :: M.Map Text Text
+       , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
+       , stLists          :: [ListMarker]
+       , stInsId          :: Int
+       , stDelId          :: Int
+       , stStyleMaps      :: StyleMaps
+       , stFirstPara      :: Bool
+       , stInTable        :: Bool
+       , stInList         :: Bool
+       , stTocTitle       :: [Inline]
+       , stDynamicParaProps :: Set.Set ParaStyleName
+       , stDynamicTextProps :: Set.Set CharStyleName
+       , stCurId          :: Int
+       , stNextFigureNum  :: Int
+       , stNextTableNum   :: Int
+       }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+        stFootnotes      = defaultFootnotes
+      , stComments       = []
+      , stSectionIds     = Set.empty
+      , stExternalLinks  = M.empty
+      , stImages         = M.empty
+      , stLists          = [NoMarker]
+      , stInsId          = 1
+      , stDelId          = 1
+      , stStyleMaps      = StyleMaps M.empty M.empty
+      , stFirstPara      = False
+      , stInTable        = False
+      , stInList         = False
+      , stTocTitle       = [Str "Table of Contents"]
+      , stDynamicParaProps = Set.empty
+      , stDynamicTextProps = Set.empty
+      , stCurId          = 20
+      , stNextFigureNum  = 1
+      , stNextTableNum   = 1
+      }
+
+setFirstPara :: PandocMonad m => WS m ()
+setFirstPara =  modify $ \s -> s { stFirstPara = True }
+
+type WS m = ReaderT WriterEnv (StateT WriterState m)
+
+-- Word will insert these footnotes into the settings.xml file
+-- (whether or not they're visible in the document). If they're in the
+-- file, but not in the footnotes.xml file, it will produce
+-- problems. So we want to make sure we insert them into our document.
+defaultFootnotes :: [Element]
+defaultFootnotes = [ mknode "w:footnote"
+                     [("w:type", "separator"), ("w:id", "-1")]
+                     [ mknode "w:p" []
+                       [mknode "w:r" []
+                        [ mknode "w:separator" [] ()]]]
+                   , mknode "w:footnote"
+                     [("w:type", "continuationSeparator"), ("w:id", "0")]
+                     [ mknode "w:p" []
+                       [ mknode "w:r" []
+                         [ mknode "w:continuationSeparator" [] ()]]]]
+
+pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
+pStyleM styleName = do
+  pStyleMap <- gets (smParaStyle . stStyleMaps)
+  let sty' = getStyleIdFromName styleName pStyleMap
+  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
+
+withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
+withParaProp d p =
+  local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
+  where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
+
+withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
+withParaPropM md p = do
+  d <- md
+  withParaProp d p
+
+isStyle :: Element -> Bool
+isStyle e = isElem [] "w" "rStyle" e ||
+            isElem [] "w" "pStyle" e
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 90ec6824f..602c70ebe 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.DokuWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Clare Macrae <clare.macrae@googlemail.com>
@@ -27,6 +27,7 @@ import Control.Monad.Reader (ReaderT, asks, local, runReaderT)
 import Control.Monad.State.Strict (StateT, evalStateT)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -172,7 +173,8 @@ blockToDokuWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then return []
                  else zipWithM (tableItemToDokuWiki opts) aligns headers
   rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length)
+                   $ transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 12004889f..508fb6a98 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.EPUB
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
                                    gets, lift, modify)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
+import Data.Char (isAlphaNum, isAscii, isDigit)
 import Data.List (isInfixOf, isPrefixOf)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
 import qualified Data.Set as Set
-import qualified Data.Text as TS
+import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
 import System.FilePath (takeExtension, takeFileName, makeRelative)
@@ -48,15 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
                             ObfuscationMethod (NoObfuscation), WrapOption (..),
                             WriterOptions (..))
 import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags',
-                           safeRead, stringify, trim, uniqueIdent, tshow)
+                           stringify, uniqueIdent, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.UUID (getRandomUUID)
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
-import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems, parseXML,
-                       ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XML.Light
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -68,69 +67,72 @@ newtype Chapter = Chapter [Block]
 data EPUBState = EPUBState {
         stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
       , stMediaNextId :: Int
-      , stEpubSubdir  :: String
+      , stEpubSubdir  :: FilePath
       }
 
 type E m = StateT EPUBState m
 
 data EPUBMetadata = EPUBMetadata{
-    epubIdentifier    :: [Identifier]
-  , epubTitle         :: [Title]
-  , epubDate          :: [Date]
-  , epubLanguage      :: String
-  , epubCreator       :: [Creator]
-  , epubContributor   :: [Creator]
-  , epubSubject       :: [String]
-  , epubDescription   :: Maybe String
-  , epubType          :: Maybe String
-  , epubFormat        :: Maybe String
-  , epubPublisher     :: Maybe String
-  , epubSource        :: Maybe String
-  , epubRelation      :: Maybe String
-  , epubCoverage      :: Maybe String
-  , epubRights        :: Maybe String
-  , epubCoverImage    :: Maybe String
-  , epubStylesheets   :: [FilePath]
-  , epubPageDirection :: Maybe ProgressionDirection
-  , epubIbooksFields  :: [(String, String)]
-  , epubCalibreFields :: [(String, String)]
+    epubIdentifier          :: [Identifier]
+  , epubTitle               :: [Title]
+  , epubDate                :: [Date]
+  , epubLanguage            :: Text
+  , epubCreator             :: [Creator]
+  , epubContributor         :: [Creator]
+  , epubSubject             :: [Text]
+  , epubDescription         :: Maybe Text
+  , epubType                :: Maybe Text
+  , epubFormat              :: Maybe Text
+  , epubPublisher           :: Maybe Text
+  , epubSource              :: Maybe Text
+  , epubRelation            :: Maybe Text
+  , epubCoverage            :: Maybe Text
+  , epubRights              :: Maybe Text
+  , epubBelongsToCollection :: Maybe Text
+  , epubGroupPosition       :: Maybe Text
+  , epubCoverImage          :: Maybe FilePath
+  , epubStylesheets         :: [FilePath]
+  , epubPageDirection       :: Maybe ProgressionDirection
+  , epubIbooksFields        :: [(Text, Text)]
+  , epubCalibreFields       :: [(Text, Text)]
   } deriving Show
 
 data Date = Date{
-    dateText  :: String
-  , dateEvent :: Maybe String
+    dateText  :: Text
+  , dateEvent :: Maybe Text
   } deriving Show
 
 data Creator = Creator{
-    creatorText   :: String
-  , creatorRole   :: Maybe String
-  , creatorFileAs :: Maybe String
+    creatorText   :: Text
+  , creatorRole   :: Maybe Text
+  , creatorFileAs :: Maybe Text
   } deriving Show
 
 data Identifier = Identifier{
-    identifierText   :: String
-  , identifierScheme :: Maybe String
+    identifierText   :: Text
+  , identifierScheme :: Maybe Text
   } deriving Show
 
 data Title = Title{
-    titleText   :: String
-  , titleFileAs :: Maybe String
-  , titleType   :: Maybe String
+    titleText   :: Text
+  , titleFileAs :: Maybe Text
+  , titleType   :: Maybe Text
   } deriving Show
 
 data ProgressionDirection = LTR | RTL deriving Show
 
-dcName :: String -> QName
+dcName :: Text -> QName
 dcName n = QName n Nothing (Just "dc")
 
-dcNode :: Node t => String -> t -> Element
+dcNode :: Node t => Text -> t -> Element
 dcNode = node . dcName
 
-opfName :: String -> QName
+opfName :: Text -> QName
 opfName n = QName n Nothing (Just "opf")
 
-toId :: FilePath -> String
-toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+toId :: FilePath -> Text
+toId = T.pack .
+       map (\x -> if isAlphaNum x || x == '-' || x == '_'
                      then x
                      else '_') . takeFileName
 
@@ -138,8 +140,8 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
-toVal' :: String -> Val TS.Text
-toVal' = toVal . TS.pack
+toVal' :: Text -> Val T.Text
+toVal' = toVal
 
 mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
 mkEntry path content = do
@@ -158,32 +160,37 @@ mkEntry path content = do
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
-  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+  elts <- case writerEpubMetadata opts of
+            Nothing -> return []
+            Just t -> case parseXMLContents (TL.fromStrict t) of
+                          Left msg -> throwError $
+                            PandocXMLError "epub metadata" msg
+                          Right ns -> return (onlyElems ns)
   let md' = foldr addMetadataFromXML md elts
   let addIdentifier m =
        if null (epubIdentifier m)
           then do
             randomId <- getRandomUUID
-            return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] }
+            return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] }
           else return m
   let addLanguage m =
-       if null (epubLanguage m)
+       if T.null (epubLanguage m)
           then case lookupContext "lang" (writerVariables opts) of
-                     Just x  -> return m{ epubLanguage = TS.unpack x }
+                     Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
                        mLang <- lift $ P.lookupEnv "LANG"
                        let localeLang =
                              case mLang of
                                Just lang ->
-                                 TS.map (\c -> if c == '_' then '-' else c) $
-                                 TS.takeWhile (/='.') lang
+                                 T.map (\c -> if c == '_' then '-' else c) $
+                                 T.takeWhile (/='.') lang
                                Nothing -> "en-US"
-                       return m{ epubLanguage = TS.unpack localeLang }
+                       return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
        if null (epubDate m)
           then do
-            currentTime <- lift P.getCurrentTime
+            currentTime <- lift P.getTimestamp
             return $ m{ epubDate = [ Date{
                              dateText = showDateTimeISO8601 currentTime
                            , dateEvent = Nothing } ] }
@@ -193,7 +200,7 @@ getEPUBMetadata opts meta = do
           then return m
           else do
             let authors' = map stringify $ docAuthors meta
-            let toAuthor name = Creator{ creatorText = TS.unpack name
+            let toAuthor name = Creator{ creatorText = name
                                        , creatorRole = Just "aut"
                                        , creatorFileAs = Nothing }
             return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
@@ -235,35 +242,37 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
   | name == "relation" = md { epubRelation = Just $ strContent e }
   | name == "coverage" = md { epubCoverage = Just $ strContent e }
   | name == "rights" = md { epubRights = Just $ strContent e }
+  | name == "belongs-to-collection" = md { epubBelongsToCollection = Just $ strContent e }
+  | name == "group-position" = md { epubGroupPosition = Just $ strContent e }  
   | otherwise = md
   where getAttr n = lookupAttr (opfName n) attrs
 addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
   case getAttr "property" of
-       Just s | "ibooks:" `isPrefixOf` s ->
-                md{ epubIbooksFields = (drop 7 s, strContent e) :
+       Just s | "ibooks:" `T.isPrefixOf` s ->
+                md{ epubIbooksFields = (T.drop 7 s, strContent e) :
                        epubIbooksFields md }
        _ -> case getAttr "name" of
-                 Just s | "calibre:" `isPrefixOf` s ->
+                 Just s | "calibre:" `T.isPrefixOf` s ->
                    md{ epubCalibreFields =
-                         (drop 8 s, fromMaybe "" $ getAttr "content") :
+                         (T.drop 8 s, fromMaybe "" $ getAttr "content") :
                           epubCalibreFields md }
                  _ -> md
   where getAttr n = lookupAttr (unqual n) attrs
 addMetadataFromXML _ md = md
 
-metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s)    = TS.unpack s
-metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils
-metaValueToString (MetaBlocks bs)   = TS.unpack $ stringify bs
+metaValueToString :: MetaValue -> Text
+metaValueToString (MetaString s)    = s
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs)   = stringify bs
 metaValueToString (MetaBool True)   = "true"
 metaValueToString (MetaBool False)  = "false"
 metaValueToString _                 = ""
 
 metaValueToPaths :: MetaValue -> [FilePath]
-metaValueToPaths (MetaList xs) = map metaValueToString xs
-metaValueToPaths x             = [metaValueToString x]
+metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs
+metaValueToPaths x             = [T.unpack $ metaValueToString x]
 
-getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a]
+getList :: T.Text -> Meta -> (MetaValue -> a) -> [a]
 getList s meta handleMetaValue =
   case lookupMeta s meta of
        Just (MetaList xs) -> map handleMetaValue xs
@@ -287,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue
                 , titleType = metaValueToString <$> M.lookup "type" m }
         handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
 
-getCreator :: TS.Text -> Meta -> [Creator]
+getCreator :: T.Text -> Meta -> [Creator]
 getCreator s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
@@ -295,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue
                   , creatorRole = metaValueToString <$> M.lookup "role" m }
         handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
 
-getDate :: TS.Text -> Meta -> [Date]
+getDate :: T.Text -> Meta -> [Date]
 getDate s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Date{ dateText = fromMaybe "" $
@@ -304,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue
         handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
                                   , dateEvent = Nothing }
 
-simpleList :: TS.Text -> Meta -> [String]
+simpleList :: T.Text -> Meta -> [Text]
 simpleList s meta =
   case lookupMeta s meta of
        Just (MetaList xs) -> map metaValueToString xs
@@ -313,26 +322,28 @@ simpleList s meta =
 
 metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
 metadataFromMeta opts meta = EPUBMetadata{
-      epubIdentifier         = identifiers
-    , epubTitle              = titles
-    , epubDate               = date
-    , epubLanguage           = language
-    , epubCreator            = creators
-    , epubContributor        = contributors
-    , epubSubject            = subjects
-    , epubDescription        = description
-    , epubType               = epubtype
-    , epubFormat             = format
-    , epubPublisher          = publisher
-    , epubSource             = source
-    , epubRelation           = relation
-    , epubCoverage           = coverage
-    , epubRights             = rights
-    , epubCoverImage         = coverImage
-    , epubStylesheets        = stylesheets
-    , epubPageDirection      = pageDirection
-    , epubIbooksFields       = ibooksFields
-    , epubCalibreFields      = calibreFields
+      epubIdentifier           = identifiers
+    , epubTitle                = titles
+    , epubDate                 = date
+    , epubLanguage             = language
+    , epubCreator              = creators
+    , epubContributor          = contributors
+    , epubSubject              = subjects
+    , epubDescription          = description
+    , epubType                 = epubtype
+    , epubFormat               = format
+    , epubPublisher            = publisher
+    , epubSource               = source
+    , epubRelation             = relation
+    , epubCoverage             = coverage
+    , epubRights               = rights
+    , epubBelongsToCollection  = belongsToCollection
+    , epubGroupPosition        = groupPosition
+    , epubCoverImage           = coverImage
+    , epubStylesheets          = stylesheets
+    , epubPageDirection        = pageDirection
+    , epubIbooksFields         = ibooksFields
+    , epubCalibreFields        = calibreFields
     }
   where identifiers = getIdentifier meta
         titles = getTitle meta
@@ -350,30 +361,31 @@ metadataFromMeta opts meta = EPUBMetadata{
         relation = metaValueToString <$> lookupMeta "relation" meta
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
-        coverImage =
-            (TS.unpack <$> lookupContext "epub-cover-image"
-                              (writerVariables opts))
+        belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta
+        groupPosition = metaValueToString <$> lookupMeta "group-position" meta
+        coverImage = T.unpack <$>
+            lookupContext "epub-cover-image" (writerVariables opts)
             `mplus` (metaValueToString <$> lookupMeta "cover-image" meta)
         mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta
         stylesheets = maybe [] metaValueToPaths mCss ++
                       case lookupContext "css" (writerVariables opts) of
-                         Just xs -> map TS.unpack xs
+                         Just xs -> map T.unpack xs
                          Nothing ->
                            case lookupContext "css" (writerVariables opts) of
-                             Just x  -> [TS.unpack x]
+                             Just x  -> [T.unpack x]
                              Nothing -> []
-        pageDirection = case map toLower . metaValueToString <$>
+        pageDirection = case T.toLower . metaValueToString <$>
                              lookupMeta "page-progression-direction" meta of
                               Just "ltr" -> Just LTR
                               Just "rtl" -> Just RTL
                               _          -> Nothing
         ibooksFields = case lookupMeta "ibooks" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
         calibreFields = case lookupMeta "calibre" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
 
 -- | Produce an EPUB2 file from a Pandoc document.
@@ -399,9 +411,11 @@ writeEPUB :: PandocMonad m
 writeEPUB epubVersion opts doc = do
   let epubSubdir = writerEpubSubdirectory opts
   -- sanity check on epubSubdir
-  unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+  unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
     throwError $ PandocEpubSubdirectoryError epubSubdir
-  let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir }
+  let initState = EPUBState { stMediaPaths = []
+                            , stMediaNextId = 0
+                            , stEpubSubdir = T.unpack epubSubdir }
   evalStateT (pandocToEPUB epubVersion opts doc) initState
 
 pandocToEPUB :: PandocMonad m
@@ -425,7 +439,7 @@ pandocToEPUB version opts doc = do
                         [] -> case epubTitle metadata of
                                    []    -> "UNTITLED"
                                    (x:_) -> titleText x
-                        x  -> TS.unpack $ stringify x
+                        x  -> stringify x
 
   -- stylesheet
   stylesheets <- case epubStylesheets metadata of
@@ -447,7 +461,8 @@ pandocToEPUB version opts doc = do
                            (ListVal $ map
                              (\e -> toVal' $
                                 (if useprefix then "../" else "") <>
-                                makeRelative epubSubdir (eRelativePath e))
+                                T.pack
+                                 (makeRelative epubSubdir (eRelativePath e)))
                              stylesheetEntries)
                              mempty
 
@@ -465,28 +480,34 @@ pandocToEPUB version opts doc = do
                 case epubCoverImage metadata of
                      Nothing   -> return ([],[])
                      Just img  -> do
-                       let coverImage = takeFileName img
+                       let fp = takeFileName img
+                       mediaPaths <- gets (map (fst . snd) . stMediaPaths)
+                       coverImageName <-  -- see #4206
+                            if ("media/" <> fp) `elem` mediaPaths
+                               then getMediaNextNewName (takeExtension fp)
+                               else return fp
                        imgContent <- lift $ P.readFileLazy img
                        (coverImageWidth, coverImageHeight) <-
                              case imageSize opts' (B.toStrict imgContent) of
                                Right sz  -> return $ sizeInPixels sz
                                Left err' -> (0, 0) <$ report
-                                 (CouldNotDetermineImageSize (TS.pack img) err')
+                                 (CouldNotDetermineImageSize (T.pack img) err')
                        cpContent <- lift $ writeHtml
                             opts'{ writerVariables =
                                    Context (M.fromList [
                                     ("coverpage", toVal' "true"),
                                     ("pagetitle", toVal $
-                                      escapeStringForXML $ TS.pack plainTitle),
-                                    ("cover-image", toVal' coverImage),
+                                      escapeStringForXML plainTitle),
+                                    ("cover-image",
+                                       toVal' $ T.pack coverImageName),
                                     ("cover-image-width", toVal' $
-                                       show coverImageWidth),
+                                       tshow coverImageWidth),
                                     ("cover-image-height", toVal' $
-                                       show coverImageHeight)]) <>
+                                       tshow coverImageHeight)]) <>
                                      cssvars True <> vars }
                             (Pandoc meta [])
                        coverEntry <- mkEntry "text/cover.xhtml" cpContent
-                       coverImageEntry <- mkEntry ("media/" ++ coverImage)
+                       coverImageEntry <- mkEntry ("media/" ++ coverImageName)
                                              imgContent
                        return ( [ coverEntry ]
                               , [ coverImageEntry ] )
@@ -498,7 +519,7 @@ pandocToEPUB version opts doc = do
                                         ("titlepage", toVal' "true"),
                                         ("body-type",  toVal' "frontmatter"),
                                         ("pagetitle", toVal $
-                                            escapeStringForXML $ TS.pack plainTitle)])
+                                            escapeStringForXML plainTitle)])
                                       <> cssvars True <> vars }
                                (Pandoc meta [])
   tpEntry <- mkEntry "text/title_page.xhtml" tpContent
@@ -507,7 +528,7 @@ pandocToEPUB version opts doc = do
   let matchingGlob f = do
         xs <- lift $ P.glob f
         when (null xs) $
-          report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files"
+          report $ CouldNotFetchResource (T.pack f) "glob did not match any font files"
         return xs
   let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
                         lift (P.readFileLazy f)
@@ -554,16 +575,42 @@ pandocToEPUB version opts doc = do
 
   let chapters' = secsToChapters secs
 
-  let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
+  let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)]
       extractLinkURL' num (Span (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
+      extractLinkURL' num (Link (ident, _, _) _ _)
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
+      extractLinkURL' num (Image (ident, _, _) _ _)
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
+      extractLinkURL' num (RawInline fmt raw)
+        | isHtmlFormat fmt
+        = foldr (\tag ->
+                   case tag of
+                     TagOpen{} ->
+                       case fromAttrib "id" tag of
+                         "" -> id
+                         x  -> ((x, showChapter num <> "#" <> x):)
+                     _ -> id)
+            [] (parseTags raw)
       extractLinkURL' _ _ = []
 
-  let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
+  let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)]
       extractLinkURL num (Div (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Header _ (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
+      extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
+      extractLinkURL num (RawBlock fmt raw)
+        | isHtmlFormat fmt
+        = foldr (\tag ->
+                   case tag of
+                     TagOpen{} ->
+                       case fromAttrib "id" tag of
+                         "" -> id
+                         x  -> ((x, showChapter num <> "#" <> x):)
+                     _ -> id)
+            [] (parseTags raw)
       extractLinkURL num b = query (extractLinkURL' num) b
 
   let reftable = concat $ zipWith (\(Chapter bs) num ->
@@ -572,7 +619,7 @@ pandocToEPUB version opts doc = do
 
   let fixInternalReferences :: Inline -> Inline
       fixInternalReferences (Link attr lab (src, tit))
-        | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of
+        | Just ('#', xs) <- T.uncons src = case lookup xs reftable of
              Just ys -> Link attr lab (ys, tit)
              Nothing -> Link attr lab (src, tit)
       fixInternalReferences x = x
@@ -585,7 +632,7 @@ pandocToEPUB version opts doc = do
                  chapters'
 
   let chapToEntry num (Chapter bs) =
-        mkEntry ("text/" ++ showChapter num) =<<
+        mkEntry ("text/" ++ T.unpack (showChapter num)) =<<
         writeHtml opts'{ writerVariables =
                             Context (M.fromList
                                      [("body-type", toVal' bodyType),
@@ -632,12 +679,12 @@ pandocToEPUB version opts doc = do
   let chapterNode ent = unode "item" !
                            ([("id", toId $ makeRelative epubSubdir
                                          $ eRelativePath ent),
-                             ("href", makeRelative epubSubdir
+                             ("href", T.pack $ makeRelative epubSubdir
                                       $ eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
                                     [] -> []
-                                    xs -> [("properties", unwords xs)])
+                                    xs -> [("properties", T.unwords xs)])
                         $ ()
 
   let chapterRefNode ent = unode "itemref" !
@@ -646,17 +693,17 @@ pandocToEPUB version opts doc = do
   let pictureNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
                             ("media-type",
-                               maybe "application/octet-stream" TS.unpack
+                               fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
-                            ("media-type", maybe "" TS.unpack $
+                            ("media-type", fromMaybe "" $
                                   getMimeType $ eRelativePath ent)] $ ()
 
   let tocTitle = maybe plainTitle
@@ -664,8 +711,8 @@ pandocToEPUB version opts doc = do
   uuid <- case epubIdentifier metadata of
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
-  currentTime <- lift P.getCurrentTime
-  let contentsData = UTF8.fromStringLazy $ ppTopElement $
+  currentTime <- lift P.getTimestamp
+  let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $
         unode "package" !
           ([("version", case version of
                              EPUB2 -> "2.0"
@@ -683,7 +730,8 @@ pandocToEPUB version opts doc = do
                                ,("media-type","application/xhtml+xml")] ++
                                [("properties","nav") | epub3 ]) $ ()
              ] ++
-             [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp)
+             [ unode "item" ! [("id","stylesheet" <> tshow n)
+                              , ("href", T.pack fp)
                               ,("media-type","text/css")] $ () |
                              (n :: Int, fp) <- zip [1..] (map
                                (makeRelative epubSubdir . eRelativePath)
@@ -728,7 +776,7 @@ pandocToEPUB version opts doc = do
   let tocLevel = writerTOCDepth opts
 
   let navPointNode :: PandocMonad m
-                   => (Int -> [Inline] -> TS.Text -> [Element] -> Element)
+                   => (Int -> [Inline] -> T.Text -> [Element] -> Element)
                    -> Block -> StateT Int m [Element]
       navPointNode formatter (Div (ident,_,_)
                                 (Header lvl (_,_,kvs) ils : children)) =
@@ -738,7 +786,7 @@ pandocToEPUB version opts doc = do
              n <- get
              modify (+1)
              let num = fromMaybe "" $ lookup "number" kvs
-             let tit = if writerNumberSections opts && not (TS.null num)
+             let tit = if writerNumberSections opts && not (T.null num)
                           then Span ("", ["section-header-number"], [])
                                 [Str num] : Space : ils
                           else ils
@@ -752,21 +800,21 @@ pandocToEPUB version opts doc = do
         concat <$> mapM (navPointNode formatter) bs
       navPointNode _ _ = return []
 
-  let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navMapFormatter n tit src subs = unode "navPoint" !
-               [("id", "navPoint-" ++ show n)] $
-                  [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit
-                  , unode "content" ! [("src", "text/" <> TS.unpack src)] $ ()
+               [("id", "navPoint-" <> tshow n)] $
+                  [ unode "navLabel" $ unode "text" $ stringify tit
+                  , unode "content" ! [("src", "text/" <> src)] $ ()
                   ] ++ subs
 
   let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
-                  [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta)
+                  [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
                   , unode "content" ! [("src", "text/title_page.xhtml")]
                   $ () ]
 
   navMap <- lift $ evalStateT
              (concat <$> mapM (navPointNode navMapFormatter) secs) 1
-  let tocData = UTF8.fromStringLazy $ ppTopElement $
+  let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "ncx" ! [("version","2005-1")
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
@@ -788,23 +836,24 @@ pandocToEPUB version opts doc = do
           ]
   tocEntry <- mkEntry "toc.ncx" tocData
 
-  let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navXhtmlFormatter n tit src subs = unode "li" !
-                                       [("id", "toc-li-" ++ show n)] $
+                                       [("id", "toc-li-" <> tshow n)] $
                                             (unode "a" !
-                                                [("href", "text/" <> TS.unpack src)]
+                                                [("href", "text/" <> src)]
                                              $ titElements)
                                             : case subs of
                                                  []    -> []
                                                  (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-          where titElements = parseXML titRendered
+          where titElements = either (const []) id $
+                                parseXMLContents (TL.fromStrict titRendered)
                 titRendered = case P.runPure
                                (writeHtmlStringForEPUB version
                                  opts{ writerTemplate = Nothing
                                      , writerVariables =
                                          Context (M.fromList
                                            [("pagetitle", toVal $
-                                             escapeStringForXML $ TS.pack plainTitle)])
+                                             escapeStringForXML plainTitle)])
                                        <> writerVariables opts}
                                  (Pandoc nullMeta
                                    [Plain $ walk clean tit])) of
@@ -819,33 +868,40 @@ pandocToEPUB version opts doc = do
   tocBlocks <- lift $ evalStateT
                  (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
   let navBlocks = [RawBlock (Format "html")
-                  $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces
+                  $ showElement $ -- prettyprinting introduces bad spaces
                    unode navtag ! ([("epub:type","toc") | epub3] ++
                                    [("id","toc")]) $
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
                     , unode "ol" ! [("class","toc")] $ tocBlocks ]]
   let landmarkItems = if epub3
-                         then [ unode "li"
+                         then unode "li"
+                                [ unode "a" ! [("href",
+                                                  "text/title_page.xhtml")
+                                               ,("epub:type", "titlepage")] $
+                                  ("Title Page" :: Text) ] :
+                              [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
-                                  ("Cover" :: String)] |
+                                  ("Cover" :: Text)] |
                                   isJust (epubCoverImage metadata)
                               ] ++
                               [ unode "li"
                                 [ unode "a" ! [("href", "#toc")
                                               ,("epub:type", "toc")] $
-                                    ("Table of contents" :: String)
+                                    ("Table of Contents" :: Text)
                                 ] | writerTableOfContents opts
                               ]
                          else []
-  let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $
+  let landmarks = [RawBlock (Format "html") $ ppElement $
                     unode "nav" ! [("epub:type","landmarks")
                                   ,("id","landmarks")
                                   ,("hidden","hidden")] $
                     [ unode "ol" landmarkItems ]
                   | not (null landmarkItems)]
   navData <- lift $ writeHtml opts'{ writerVariables =
-                     Context (M.fromList [("navpage", toVal' "true")])
+                     Context (M.fromList [("navpage", toVal' "true")
+                                         ,("body-type",  toVal' "frontmatter")
+                                         ])
                      <> cssvars False <> vars }
             (Pandoc (setMeta "title"
                      (walk removeNote $ fromList $ docTitle' meta) nullMeta)
@@ -857,22 +913,22 @@ pandocToEPUB version opts doc = do
                         UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
-  let containerData = UTF8.fromStringLazy $ ppTopElement $
+  let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
            unode "rootfile" ! [("full-path",
                     (if null epubSubdir
                         then ""
-                        else epubSubdir ++ "/") ++ "content.opf")
+                        else T.pack epubSubdir <> "/") <> "content.opf")
                ,("media-type","application/oebps-package+xml")] $ ()
   containerEntry <- mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
-  let apple = UTF8.fromStringLazy $ ppTopElement $
+  let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
-            unode "option" ! [("name","specified-fonts")] $ ("true" :: String)
+            unode "option" ! [("name","specified-fonts")] $ ("true" :: Text)
   appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
   -- construct archive
@@ -893,8 +949,9 @@ metadataElement version md currentTime =
                   ++ descriptionNodes ++ typeNodes ++ formatNodes
                   ++ publisherNodes ++ sourceNodes ++ relationNodes
                   ++ coverageNodes ++ rightsNodes ++ coverImageNodes
-                  ++ modifiedNodes
-        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+                  ++ modifiedNodes ++ belongsToCollectionNodes
+        withIds base f = concat . zipWith f (map (\x -> base <>
+                                                        T.cons '-' (tshow x))
                          ([1..] :: [Int]))
         identifierNodes = withIds "epub-id" toIdentifierNode $
                           epubIdentifier md
@@ -908,9 +965,9 @@ metadataElement version md currentTime =
                                  (x:_) -> [dcNode "date" ! [("id","epub-date")]
                                             $ dateText x]
         ibooksNodes = map ibooksNode (epubIbooksFields md)
-        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
+        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v
         calibreNodes = map calibreNode (epubCalibreFields md)
-        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k),
+        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k),
                                              ("content", v)] $ ()
         languageNodes = [dcTag "language" $ epubLanguage md]
         creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
@@ -932,7 +989,16 @@ metadataElement version md currentTime =
             $ epubCoverImage md
         modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
                showDateTimeISO8601 currentTime | version == EPUB3 ]
-        dcTag n s = unode ("dc:" ++ n) s
+        belongsToCollectionNodes = 
+            maybe []
+                (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-collection-1")] $ belongsToCollection )
+                :
+                [unode "meta" !  [("refines", "#epub-collection-1"), ("property", "collection-type")] $ ("series" :: Text) ])
+                (epubBelongsToCollection md)++
+            maybe []
+                (\groupPosition -> [unode "meta" !  [("refines", "#epub-collection-1"), ("property", "group-position")] $ groupPosition ])
+                (epubGroupPosition md)
+        dcTag n s = unode ("dc:" <> n) s
         dcTag' n s = [dcTag n s]
         toIdentifierNode id' (Identifier txt scheme)
           | version == EPUB2 = [dcNode "identifier" !
@@ -940,7 +1006,7 @@ metadataElement version md currentTime =
               txt]
           | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) :
               maybe [] ((\x -> [unode "meta" !
-                                [ ("refines",'#':id')
+                                [ ("refines","#" <> id')
                                 , ("property","identifier-type")
                                 , ("scheme","onix:codelist5")
                                 ]
@@ -956,10 +1022,10 @@ metadataElement version md currentTime =
                (creatorRole creator >>= toRelator)) $ creatorText creator]
           | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (creatorFileAs creator) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","role"),
+                   [("refines","#" <> id'),("property","role"),
                      ("scheme","marc:relators")] $ x])
                    (creatorRole creator >>= toRelator)
         toTitleNode id' title
@@ -971,16 +1037,16 @@ metadataElement version md currentTime =
           | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
               ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (titleFileAs title) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","title-type")] $ x])
+                   [("refines","#" <> id'),("property","title-type")] $ x])
                    (titleType title)
         toDateNode id' date = [dcNode "date" !
              (("id",id') :
                 maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
                  dateText date]
-        schemeToOnix :: String -> String
+        schemeToOnix :: Text -> Text
         schemeToOnix "ISBN-10"              = "02"
         schemeToOnix "GTIN-13"              = "03"
         schemeToOnix "UPC"                  = "04"
@@ -998,59 +1064,64 @@ metadataElement version md currentTime =
         schemeToOnix "OLCC"                 = "28"
         schemeToOnix _                      = "01"
 
-showDateTimeISO8601 :: UTCTime -> String
-showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+showDateTimeISO8601 :: UTCTime -> Text
+showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ"
 
 transformTag :: PandocMonad m
-             => Tag TS.Text
-             -> E m (Tag TS.Text)
+             => Tag T.Text
+             -> E m (Tag T.Text)
 transformTag tag@(TagOpen name attr)
   | name `elem` ["video", "source", "img", "audio"] &&
     isNothing (lookup "data-external" attr) = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
-  newsrc <- modifyMediaRef $ TS.unpack src
-  newposter <- modifyMediaRef $ TS.unpack poster
+  newsrc <- modifyMediaRef $ T.unpack src
+  newposter <- modifyMediaRef $ T.unpack poster
   let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
-              [("src", "../" <> newsrc) | not (TS.null newsrc)] ++
-              [("poster", "../" <> newposter) | not (TS.null newposter)]
+              [("src", "../" <> newsrc) | not (T.null newsrc)] ++
+              [("poster", "../" <> newposter) | not (T.null newposter)]
   return $ TagOpen name attr'
 transformTag tag = return tag
 
 modifyMediaRef :: PandocMonad m
                => FilePath
-               -> E m TS.Text
+               -> E m T.Text
 modifyMediaRef "" = return ""
 modifyMediaRef oldsrc = do
   media <- gets stMediaPaths
   case lookup oldsrc media of
-         Just (n,_) -> return $ TS.pack n
+         Just (n,_) -> return $ T.pack n
          Nothing    -> catchError
-           (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc
-               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack
+           (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc
+               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack
                          (("." <>) <$> (mbMime >>= extensionFromMimeType))
                newName <- getMediaNextNewName ext
                let newPath = "media/" ++ newName
                entry <- mkEntry newPath (B.fromChunks . (:[]) $ img)
                modify $ \st -> st{ stMediaPaths =
                             (oldsrc, (newPath, Just entry)):media}
-               return $ TS.pack newPath)
+               return $ T.pack newPath)
            (\e -> do
-                report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e)
-                return $ TS.pack oldsrc)
+                report $ CouldNotFetchResource (T.pack oldsrc) (tshow e)
+                return $ T.pack oldsrc)
 
-getMediaNextNewName :: PandocMonad m => String -> E m String
+getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath
 getMediaNextNewName ext = do
   nextId <- gets stMediaNextId
   modify $ \st -> st { stMediaNextId = nextId + 1 }
-  let nextName = "file" ++ show nextId ++ ext
-  (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName)
+  return $ "file" ++ show nextId ++ ext
+
+isHtmlFormat :: Format -> Bool
+isHtmlFormat (Format "html") = True
+isHtmlFormat (Format "html4") = True
+isHtmlFormat (Format "html5") = True
+isHtmlFormat _ = False
 
 transformBlock  :: PandocMonad m
                 => Block
                 -> E m Block
 transformBlock (RawBlock fmt raw)
-  | fmt == Format "html" = do
+  | isHtmlFormat fmt = do
   let tags = parseTags raw
   tags' <- mapM transformTag tags
   return $ RawBlock fmt (renderTags' tags')
@@ -1060,56 +1131,43 @@ transformInline  :: PandocMonad m
                  => WriterOptions
                  -> Inline
                  -> E m Inline
-transformInline _opts (Image attr lab (src,tit)) = do
-    newsrc <- modifyMediaRef $ TS.unpack src
+transformInline _opts (Image attr@(_,_,kvs) lab (src,tit))
+  | isNothing (lookup "external" kvs) = do
+    newsrc <- modifyMediaRef $ T.unpack src
     return $ Image attr lab ("../" <> newsrc, tit)
 transformInline opts x@(Math t m)
   | WebTeX url <- writerHTMLMathMethod opts = do
-    newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m))
+    newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m))
     let mathclass = if t == DisplayMath then "display" else "inline"
     return $ Span ("",["math",mathclass],[])
                 [Image nullAttr [x] ("../" <> newsrc, "")]
 transformInline _opts (RawInline fmt raw)
-  | fmt == Format "html" = do
+  | isHtmlFormat fmt = do
   let tags = parseTags raw
   tags' <- mapM transformTag tags
   return $ RawInline fmt (renderTags' tags')
 transformInline _ x = return x
 
-(!) :: (t -> Element) -> [(String, String)] -> t -> Element
+(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element
 (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
 
--- | Version of 'ppTopElement' that specifies UTF-8 encoding.
-ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
-  -- unEntity removes numeric  entities introduced by ppElement
-  -- (kindlegen seems to choke on these).
-  where unEntity [] = ""
-        unEntity ('&':'#':xs) =
-                   let (ds,ys) = break (==';') xs
-                       rest = drop 1 ys
-                   in  case safeRead (TS.pack $ "'\\" <> ds <> "'") of
-                          Just x  -> x : unEntity rest
-                          Nothing -> '&':'#':unEntity xs
-        unEntity (x:xs) = x : unEntity xs
-
 mediaTypeOf :: FilePath -> Maybe MimeType
 mediaTypeOf x =
   let mediaPrefixes = ["image", "video", "audio"] in
   case getMimeType x of
-    Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y
+    Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y
     _      -> Nothing
 
 -- Returns filename for chapter number.
-showChapter :: Int -> String
-showChapter = printf "ch%03d.xhtml"
+showChapter :: Int -> Text
+showChapter = T.pack . printf "ch%03d.xhtml"
 
 -- Add identifiers to any headers without them.
 addIdentifiers :: WriterOptions -> [Block] -> [Block]
 addIdentifiers opts bs = evalState (mapM go bs) Set.empty
  where go (Header n (ident,classes,kvs) ils) = do
          ids <- get
-         let ident' = if TS.null ident
+         let ident' = if T.null ident
                          then uniqueIdent (writerExtensions opts) ils ids
                          else ident
          modify $ Set.insert ident'
@@ -1117,27 +1175,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty
        go x = return x
 
 -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
-normalizeDate' :: String -> Maybe String
-normalizeDate' = fmap TS.unpack . go . trim . TS.pack
+normalizeDate' :: Text -> Maybe Text
+normalizeDate' = go . T.strip
   where
     go xs
-      | TS.length xs == 4            -- YYY
-      , TS.all isDigit xs = Just xs
-      | (y, s) <- TS.splitAt 4 xs    -- YYY-MM
-      , Just ('-', m) <- TS.uncons s
-      , TS.length m == 2
-      , TS.all isDigit y && TS.all isDigit m = Just xs
+      | T.length xs == 4            -- YYY
+      , T.all isDigit xs = Just xs
+      | (y, s) <- T.splitAt 4 xs    -- YYY-MM
+      , Just ('-', m) <- T.uncons s
+      , T.length m == 2
+      , T.all isDigit y && T.all isDigit m = Just xs
       | otherwise = normalizeDate xs
 
-toRelator :: String -> Maybe String
+toRelator :: Text -> Maybe Text
 toRelator x
   | x `elem` relators = Just x
-  | otherwise         = lookup (map toLower x) relatorMap
+  | otherwise         = lookup (T.toLower x) relatorMap
 
-relators :: [String]
+relators :: [Text]
 relators = map snd relatorMap
 
-relatorMap :: [(String, String)]
+relatorMap :: [(Text, Text)]
 relatorMap =
            [("abridger", "abr")
            ,("actor", "act")
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 701ff3d9b..3b5d04427 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -3,7 +3,7 @@
 {- |
 Module      : Text.Pandoc.Writers.FB2
 Copyright   : Copyright (C) 2011-2012 Sergey Astanin
-                            2012-2020 John MacFarlane
+                            2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane
@@ -19,29 +19,29 @@ FictionBook is an XML-based e-book format. For more information see:
 module Text.Pandoc.Writers.FB2 (writeFB2)  where
 
 import Control.Monad (zipWithM)
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
 import Data.Either (lefts, rights)
 import Data.List (intercalate)
-import Data.Text (Text, pack)
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
-import Text.XML.Light
-import qualified Text.XML.Light as X
-import qualified Text.XML.Light.Cursor as XC
-import qualified Text.XML.Light.Input as XI
+import Text.Pandoc.XML.Light as X
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
                            makeSections, tshow, stringify)
 import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
+import Data.Generics (everywhere, mkT)
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -86,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do
      (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch)
      let body' = replaceImagesWithAlt missing body
      let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
-     return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
+     return $ xml_head <> showContent fb2_xml <> "\n"
   where
   xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
   fb2_attrs =
@@ -98,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do
 description :: PandocMonad m => Meta -> FBM m Content
 description meta' = do
   let genre = case lookupMetaString "genre" meta' of
-                "" -> el "genre" ("unrecognised" :: String)
-                s  -> el "genre" (T.unpack s)
+                "" -> el "genre" ("unrecognised" :: Text)
+                s  -> el "genre" s
   bt <- booktitle meta'
   let as = authors meta'
   dd <- docdate meta'
@@ -110,7 +110,7 @@ description meta' = do
                Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
                Just (MetaString s)        -> [el "lang" $ iso639 s]
                _                          -> []
-             where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
+             where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
   let coverimage url = do
         let img = Image nullAttr mempty (url, "")
         im <- insertImage InlineImage img
@@ -122,7 +122,7 @@ description meta' = do
   return $ el "description"
     [ el "title-info" (genre :
                       (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
-    , el "document-info" [el "program-used" ("pandoc" :: String)]
+    , el "document-info" [el "program-used" ("pandoc" :: Text)]
     ]
 
 booktitle :: PandocMonad m => Meta -> FBM m [Content]
@@ -135,15 +135,15 @@ authors meta' = cMap author (docAuthors meta')
 
 author :: [Inline] -> [Content]
 author ss =
-  let ws = words . cMap plain $ ss
-      email = el "email" <$> take 1 (filter ('@' `elem`) ws)
-      ws' = filter ('@' `notElem`) ws
+  let ws = T.words $ mconcat $ map plain ss
+      email = el "email" <$> take 1 (filter (T.any (=='@')) ws)
+      ws' = filter (not . T.any (== '@')) ws
       names = case ws' of
                 [nickname] -> [ el "nickname" nickname ]
                 [fname, lname] -> [ el "first-name" fname
                                     , el "last-name" lname ]
                 (fname:rest) -> [ el "first-name" fname
-                                , el "middle-name" (concat . init $ rest)
+                                , el "middle-name" (T.concat . init $ rest)
                                 , el "last-name" (last rest) ]
                 [] -> []
   in  list $ el "author" (names ++ email)
@@ -204,7 +204,7 @@ renderFootnotes = do
          el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
   where
     renderFN (n, idstr, cs) =
-        let fn_texts = el "title" (el "p" (show n)) : cs
+        let fn_texts = el "title" (el "p" (tshow n)) : cs
         in  el "section" ([uattr "id" idstr], fn_texts)
 
 -- | Fetch images and encode them for the FictionBook XML.
@@ -280,7 +280,7 @@ isMimeType s =
  where
    types =  ["text","image","audio","video","application","message","multipart"]
    valid c = isAscii c && not (isControl c) && not (isSpace c) &&
-             c `notElem` ("()<>@,;:\\\"/[]?=" :: String)
+             c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char])
 
 footnoteID :: Int -> Text
 footnoteID i = "n" <> tshow i
@@ -304,10 +304,13 @@ blockToXml (Para [Image atr alt (src,tgt)])
   = insertImage NormalImage (Image atr alt (src,tit))
 blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss
 blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
-                             map (el "p" . el "code" . T.unpack) . T.lines $ s
+                             map (el "p" . el "code") . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
-    then return $ XI.parseXML str
+    then
+      case parseXMLContents (TL.fromStrict str) of
+         Left msg  -> throwError $ PandocXMLError "" msg
+         Right nds -> return nds
     else return []
 blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
@@ -341,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do
     c <- el "emphasis" <$> cMapM toXml caption
     return [el "table" (hd <> bd), el "p" c]
     where
-      mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content
+      mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content
       mkrow tag cells aligns' =
         el "tr" <$> mapM (mkcell tag) (zip cells aligns')
       --
-      mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content
+      mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content
       mkcell tag (cell, align) = do
         cblocks <- cMapM blockToXml cell
         return $ el tag ([align_attr align], cblocks)
@@ -419,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do
   inner <- cMapM toXml ss
   return $ [txt "“"] ++ inner ++ [txt "”"]
 toXml (Cite _ ss) = cMapM toXml ss  -- FIXME: support citation styles
-toXml (Code _ s) = return [el "code" $ T.unpack s]
+toXml (Code _ s) = return [el "code" s]
 toXml Space = return [txt " "]
 toXml SoftBreak = return [txt "\n"]
 toXml LineBreak = return [txt "\n"]
@@ -451,7 +454,7 @@ insertMath immode formula = do
        let imgurl = url <> T.pack (urlEncode $ T.unpack formula)
        let img = Image nullAttr alt (imgurl, "")
        insertImage immode img
-    _ -> return [el "code" $ T.unpack formula]
+    _ -> return [el "code" formula]
 
 insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
 insertImage immode (Image _ alt (url,ttl)) = do
@@ -466,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do
          el "image" $
             [ attr ("l","href") ("#" <> fname)
             , attr ("l","type") (tshow immode)
-            , uattr "alt" (T.pack $ cMap plain alt) ]
+            , uattr "alt" (mconcat $ map plain alt) ]
             ++ ttlattr
 insertImage _ _ = error "unexpected inline instead of image"
 
 replaceImagesWithAlt :: [Text] -> Content -> Content
-replaceImagesWithAlt missingHrefs body =
-  let cur = XC.fromContent body
-      cur' = replaceAll cur
-  in  XC.toTree . XC.root $ cur'
+replaceImagesWithAlt missingHrefs = everywhere (mkT go)
   where
-  --
-    replaceAll :: XC.Cursor -> XC.Cursor
-    replaceAll c =
-        let n = XC.current c
-            c' = if isImage n && isMissing n
-                 then XC.modifyContent replaceNode c
-                 else c
-        in  case XC.nextDF c' of
-              (Just cnext) -> replaceAll cnext
-              Nothing      -> c'  -- end of document
-  --
-    isImage :: Content -> Bool
-    isImage (Elem e) = elName e == uname "image"
-    isImage _        = False
-  --
+    go c = if isMissing c
+              then replaceNode c
+              else c
     isMissing (Elem img@Element{}) =
         let imgAttrs = elAttribs img
             badAttrs = map (attr ("l","href")) missingHrefs
@@ -500,18 +488,18 @@ replaceImagesWithAlt missingHrefs body =
     replaceNode :: Content -> Content
     replaceNode n@(Elem img@Element{}) =
         let attrs = elAttribs img
-            alt = getAttrVal attrs (uname "alt")
+            alt = getAttrVal attrs (unqual "alt")
             imtype = getAttrVal attrs (qname "l" "type")
         in case (alt, imtype) of
              (Just alt', Just imtype') ->
-                 if imtype' == show NormalImage
+                 if imtype' == tshow NormalImage
                  then el "p" alt'
-                 else txt $ T.pack alt'
-             (Just alt', Nothing) -> txt $ T.pack alt'  -- no type attribute
+                 else txt alt'
+             (Just alt', Nothing) -> txt alt'  -- no type attribute
              _ -> n   -- don't replace if alt text is not found
     replaceNode n = n
   --
-    getAttrVal :: [X.Attr] -> QName -> Maybe String
+    getAttrVal :: [X.Attr] -> QName -> Maybe Text
     getAttrVal attrs name =
         case filter ((name ==) . attrKey) attrs of
            (a:_) -> Just (attrVal a)
@@ -519,7 +507,7 @@ replaceImagesWithAlt missingHrefs body =
 
 
 -- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
+wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content
 wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
 
 -- " Create a singleton list.
@@ -527,31 +515,31 @@ list :: a -> [a]
 list = (:[])
 
 -- | Convert an 'Inline' to plaintext.
-plain :: Inline -> String
-plain (Str s)               = T.unpack s
-plain (Emph ss)             = cMap plain ss
-plain (Underline ss)        = cMap plain ss
-plain (Span _ ss)           = cMap plain ss
-plain (Strong ss)           = cMap plain ss
-plain (Strikeout ss)        = cMap plain ss
-plain (Superscript ss)      = cMap plain ss
-plain (Subscript ss)        = cMap plain ss
-plain (SmallCaps ss)        = cMap plain ss
-plain (Quoted _ ss)         = cMap plain ss
-plain (Cite _ ss)           = cMap plain ss  -- FIXME
-plain (Code _ s)            = T.unpack s
+plain :: Inline -> Text
+plain (Str s)               = s
+plain (Emph ss)             = mconcat $ map plain ss
+plain (Underline ss)        = mconcat $ map plain ss
+plain (Span _ ss)           = mconcat $ map plain ss
+plain (Strong ss)           = mconcat $ map plain ss
+plain (Strikeout ss)        = mconcat $ map plain ss
+plain (Superscript ss)      = mconcat $ map plain ss
+plain (Subscript ss)        = mconcat $ map plain ss
+plain (SmallCaps ss)        = mconcat $ map plain ss
+plain (Quoted _ ss)         = mconcat $ map plain ss
+plain (Cite _ ss)           = mconcat $ map plain ss  -- FIXME
+plain (Code _ s)            = s
 plain Space                 = " "
 plain SoftBreak             = " "
 plain LineBreak             = "\n"
-plain (Math _ s)            = T.unpack s
+plain (Math _ s)            = s
 plain (RawInline _ _)       = ""
-plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"])
-plain (Image _ alt _)       = cMap plain alt
+plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"])
+plain (Image _ alt _)       = mconcat $ map plain alt
 plain (Note _)              = ""  -- FIXME
 
 -- | Create an XML element.
 el :: (Node t)
-   => String   -- ^ unqualified element name
+   => Text     -- ^ unqualified element name
    -> t        -- ^ node contents
    -> Content  -- ^ XML content
 el name cs = Elem $ unode name cs
@@ -564,22 +552,18 @@ spaceBeforeAfter cs =
 
 -- | Create a plain-text XML content.
 txt :: Text -> Content
-txt s = Text $ CData CDataText (T.unpack s) Nothing
+txt s = Text $ CData CDataText s Nothing
 
 -- | Create an XML attribute with an unqualified name.
-uattr :: String -> Text -> Text.XML.Light.Attr
-uattr name = Attr (uname name) . T.unpack
+uattr :: Text -> Text -> X.Attr
+uattr name = Attr (unqual name)
 
 -- | Create an XML attribute with a qualified name from given namespace.
-attr :: (String, String) -> Text -> Text.XML.Light.Attr
-attr (ns, name) = Attr (qname ns name) . T.unpack
-
--- | Unqualified name
-uname :: String -> QName
-uname name = QName name Nothing Nothing
+attr :: (Text, Text) -> Text -> X.Attr
+attr (ns, name) = Attr (qname ns name)
 
 -- | Qualified name
-qname :: String -> String -> QName
+qname :: Text -> Text -> QName
 qname ns name = QName name Nothing (Just ns)
 
 -- | Abbreviation for 'concatMap'.
diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs
new file mode 100644
index 000000000..bc468febc
--- /dev/null
+++ b/src/Text/Pandoc/Writers/GridTable.hs
@@ -0,0 +1,157 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE LambdaCase                 #-}
+{-# LANGUAGE TupleSections              #-}
+
+{- |
+Module      : Text.Pandoc.Writers.GridTable
+Copyright   : © 2020-2021 Albert Krewinkel
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
+
+Grid representation of pandoc tables.
+-}
+module Text.Pandoc.Writers.GridTable
+  ( Table (..)
+  , GridCell (..)
+  , RowIndex (..)
+  , ColIndex (..)
+  , CellIndex
+  , Part (..)
+  , toTable
+  , rowArray
+  ) where
+
+import Control.Monad (forM_)
+import Control.Monad.ST
+import Data.Array
+import Data.Array.MArray
+import Data.Array.ST
+import Data.Maybe (listToMaybe)
+import Data.STRef
+import Text.Pandoc.Definition hiding (Table)
+import qualified Text.Pandoc.Builder as B
+
+-- | A grid cell contains either a real table cell, or is the
+-- continuation of a column or row-spanning cell. In the latter case,
+-- the index of the continued cell is provided.
+data GridCell
+  = ContentCell Attr Alignment RowSpan ColSpan [Block]
+  | ContinuationCell CellIndex
+  deriving (Show)
+
+-- | Row index in a table part.
+newtype RowIndex = RowIndex Int deriving (Enum, Eq, Ix, Ord, Show)
+-- | Column index in a table part.
+newtype ColIndex = ColIndex Int deriving (Enum, Eq, Ix, Ord, Show)
+
+-- | Index to a cell in a table part.
+type CellIndex = (RowIndex, ColIndex)
+
+-- | Cells are placed on a grid. Row attributes are stored in a separate
+-- array.
+data Part = Part
+  { partAttr :: Attr
+  , partCellArray :: Array (RowIndex,ColIndex) GridCell
+  , partRowAttrs  :: Array RowIndex Attr
+  }
+
+data Table = Table
+  { tableAttr     :: Attr
+  , tableCaption  :: Caption
+  , tableColSpecs :: Array ColIndex ColSpec
+  , tableRowHeads :: RowHeadColumns
+  , tableHead     :: Part
+  , tableBodies   :: [Part]
+  , tableFoot     :: Part
+  }
+
+toTable
+  :: B.Attr
+  -> B.Caption
+  -> [B.ColSpec]
+  -> B.TableHead
+  -> [B.TableBody]
+  -> B.TableFoot
+  -> Table
+toTable attr caption colSpecs  thead tbodies tfoot =
+  Table attr caption colSpecs' rowHeads thGrid tbGrids tfGrid
+  where
+    colSpecs' = listArray (ColIndex 1, ColIndex $ length colSpecs) colSpecs
+    rowHeads = case listToMaybe tbodies of
+      Nothing -> RowHeadColumns 0
+      Just (TableBody _attr rowHeadCols _headerRows _rows) -> rowHeadCols
+    thGrid = let (TableHead headAttr rows) = thead
+             in rowsToPart headAttr rows
+    tbGrids = map bodyToGrid tbodies
+    tfGrid = let (TableFoot footAttr rows) = tfoot
+             in rowsToPart footAttr rows
+    bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) =
+      rowsToPart bodyAttr (headRows ++ rows)
+
+data BuilderCell
+  = FilledCell GridCell
+  | FreeCell
+
+fromBuilderCell :: BuilderCell -> GridCell
+fromBuilderCell = \case
+  FilledCell c -> c
+  FreeCell -> error "Found an unassigned cell."
+
+rowsToPart :: Attr -> [B.Row] -> Part
+rowsToPart attr = \case
+  [] -> Part
+        attr
+        (listArray ((RowIndex 1, ColIndex 1), (RowIndex 0, ColIndex 0)) [])
+        (listArray (RowIndex 1, RowIndex 0) [])
+  rows@(Row _attr firstRow:_) ->
+    let nrows = length rows
+        ncols = sum $ map (\(Cell _ _ _ (ColSpan cs) _) -> cs) firstRow
+        gbounds = ((RowIndex 1, ColIndex 1), (RowIndex nrows, ColIndex ncols))
+        mutableGrid :: ST s (STArray s CellIndex GridCell)
+        mutableGrid = do
+          grid <- newArray gbounds FreeCell
+          ridx <- newSTRef (RowIndex 1)
+          forM_ rows $ \(Row _attr cells) -> do
+            cidx <- newSTRef (ColIndex 1)
+            forM_ cells $ \(Cell cellAttr align rs cs blks) -> do
+              ridx' <- readSTRef ridx
+              let nextFreeInRow colindex@(ColIndex c) = do
+                    readArray grid (ridx', colindex) >>= \case
+                      FreeCell -> pure colindex
+                      _ -> nextFreeInRow $ ColIndex (c + 1)
+              cidx' <- readSTRef cidx >>= nextFreeInRow
+              writeArray grid (ridx', cidx') . FilledCell $
+                ContentCell cellAttr align rs cs blks
+              forM_ (continuationIndices ridx' cidx' rs cs) $ \idx -> do
+                writeArray grid idx . FilledCell $
+                  ContinuationCell (ridx', cidx')
+              -- go to new column
+              writeSTRef cidx cidx'
+            -- go to next row
+            modifySTRef ridx (incrRowIndex 1)
+          -- Swap BuilderCells with normal GridCells.
+          mapArray fromBuilderCell grid
+    in Part
+       { partCellArray = runSTArray mutableGrid
+       , partRowAttrs = listArray (RowIndex 1, RowIndex nrows) $
+                        map (\(Row rowAttr _) -> rowAttr) rows
+       , partAttr = attr
+       }
+
+continuationIndices :: RowIndex -> ColIndex -> RowSpan -> ColSpan -> [CellIndex]
+continuationIndices (RowIndex ridx) (ColIndex cidx) rowspan colspan =
+  let (RowSpan rs) = rowspan
+      (ColSpan cs) = colspan
+  in [ (RowIndex r, ColIndex c) | r <- [ridx..(ridx + rs - 1)]
+                                , c <- [cidx..(cidx + cs - 1)]
+                                , (r, c) /= (ridx, cidx)]
+
+rowArray :: RowIndex -> Array CellIndex GridCell -> Array ColIndex GridCell
+rowArray ridx grid =
+  let ((_minRidx, minCidx), (_maxRidx, maxCidx)) = bounds grid
+  in ixmap (minCidx, maxCidx) (ridx,) grid
+
+incrRowIndex :: RowSpan -> RowIndex -> RowIndex
+incrRowIndex (RowSpan n) (RowIndex r) = RowIndex $ r + n
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index cba6b7d1c..6f91d1965 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -31,9 +31,9 @@ module Text.Pandoc.Writers.HTML (
 import Control.Monad.Identity (runIdentity)
 import Control.Monad.State.Strict
 import Data.Char (ord)
-import Data.List (intercalate, intersperse, partition, delete, (\\))
+import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
 import Data.List.NonEmpty (NonEmpty((:|)))
-import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe)
+import Data.Maybe (fromMaybe, isJust, isNothing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -265,6 +265,8 @@ pandocToHtml opts (Pandoc meta blocks) = do
   let stringifyHTML = escapeStringForXML . stringify
   let authsMeta = map stringifyHTML $ docAuthors meta
   let dateMeta  = stringifyHTML $ docDate meta
+  let descriptionMeta = escapeStringForXML $
+                          lookupMetaString "description" meta
   slideVariant <- gets stSlideVariant
   let sects = adjustNumbers opts $
               makeSections (writerNumberSections opts) Nothing $
@@ -354,6 +356,52 @@ pandocToHtml opts (Pandoc meta blocks) = do
                         PlainMath -> defField "displaymath-css" True
                         WebTeX _  -> defField "displaymath-css" True
                         _         -> id) .
+                  (if slideVariant == RevealJsSlides
+                      then -- set boolean options explicitly, since
+                           -- template can't distinguish False/undefined
+                         defField "controls" True .
+                         defField "controlsTutorial" True .
+                         defField "controlsLayout" ("bottom-right" :: Text) .
+                         defField "controlsBackArrows" ("faded" :: Text) .
+                         defField "progress" True .
+                         defField "slideNumber" False .
+                         defField "showSlideNumber" ("all" :: Text) .
+                         defField "hashOneBasedIndex" False .
+                         defField "hash" False .
+                         defField "respondToHashChanges" True .
+                         defField "history" False .
+                         defField "keyboard" True .
+                         defField "overview" True .
+                         defField "disableLayout" False .
+                         defField "center" True .
+                         defField "touch" True .
+                         defField "loop" False .
+                         defField "rtl" False .
+                         defField "navigationMode" ("default" :: Text) .
+                         defField "shuffle" False .
+                         defField "fragments" True .
+                         defField "fragmentInURL" True .
+                         defField "embedded" False .
+                         defField "help" True .
+                         defField "pause" True .
+                         defField "showNotes" False .
+                         defField "autoPlayMedia" ("null" :: Text) .
+                         defField "preloadIframes" ("null" :: Text) .
+                         defField "autoSlide" ("0" :: Text) .
+                         defField "autoSlideStoppable" True .
+                         defField "autoSlideMethod" ("null" :: Text) .
+                         defField "defaultTiming" ("null" :: Text) .
+                         defField "mouseWheel" False .
+                         defField "display" ("block" :: Text) .
+                         defField "hideInactiveCursor" True .
+                         defField "hideCursorTime" ("5000" :: Text) .
+                         defField "previewLinks" False .
+                         defField "transition" ("slide" :: Text) .
+                         defField "transitionSpeed" ("default" :: Text) .
+                         defField "backgroundTransition" ("fade" :: Text) .
+                         defField "viewDistance" ("3" :: Text) .
+                         defField "mobileViewDistance" ("2" :: Text)
+                      else id) .
                   defField "document-css" (isNothing mCss && slideVariant == NoSlides) .
                   defField "quotes" (stQuotes st) .
                   -- for backwards compatibility we populate toc
@@ -364,6 +412,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
                   defField "author-meta" authsMeta .
                   maybe id (defField "date-meta")
                     (normalizeDate dateMeta) .
+                  defField "description-meta" descriptionMeta .
                   defField "pagetitle"
                       (stringifyHTML . docTitle $ meta) .
                   defField "idprefix" (writerIdentifierPrefix opts) .
@@ -553,30 +602,35 @@ tagWithAttributes opts html5 selfClosing tagname attr =
 
 addAttrs :: PandocMonad m
          => WriterOptions -> Attr -> Html -> StateT WriterState m Html
-addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr
+addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr
 
 toAttrs :: PandocMonad m
         => [(Text, Text)] -> StateT WriterState m [Attribute]
 toAttrs kvs = do
   html5 <- gets stHtml5
   mbEpubVersion <- gets stEPUBVersion
-  return $ mapMaybe (\(x,y) ->
-            if html5
-               then
-                  if x `Set.member` (html5Attributes <> rdfaAttributes)
-                     || T.any (== ':') x -- e.g. epub: namespace
-                     || "data-" `T.isPrefixOf` x
-                     || "aria-" `T.isPrefixOf` x
-                     then Just $ customAttribute (textTag x) (toValue y)
-                     else Just $ customAttribute (textTag ("data-" <> x))
-                                  (toValue y)
-               else
-                 if mbEpubVersion == Just EPUB2 &&
-                    not (x `Set.member` (html4Attributes <> rdfaAttributes) ||
-                         "xml:" `T.isPrefixOf` x)
-                    then Nothing
-                    else Just $ customAttribute (textTag x) (toValue y))
-            kvs
+  reverse . snd <$> foldM (go html5 mbEpubVersion) (Set.empty, []) kvs
+ where
+  go html5 mbEpubVersion (keys, attrs) (k,v) = do
+    if k `Set.member` keys
+       then do
+         report $ DuplicateAttribute k v
+         return (keys, attrs)
+       else return (Set.insert k keys, addAttr html5 mbEpubVersion k v attrs)
+  addAttr html5 mbEpubVersion x y
+    | html5
+      = if x `Set.member` (html5Attributes <> rdfaAttributes)
+             || T.any (== ':') x -- e.g. epub: namespace
+             || "data-" `T.isPrefixOf` x
+             || "aria-" `T.isPrefixOf` x
+           then (customAttribute (textTag x) (toValue y) :)
+           else (customAttribute (textTag ("data-" <> x)) (toValue y) :)
+    | mbEpubVersion == Just EPUB2
+    , not (x `Set.member` (html4Attributes <> rdfaAttributes) ||
+      "xml:" `T.isPrefixOf` x)
+      = id
+    | otherwise
+      = (customAttribute (textTag x) (toValue y) :)
 
 attrsToHtml :: PandocMonad m
             => WriterOptions -> Attr -> StateT WriterState m [Attribute]
@@ -617,17 +671,20 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height
 figure :: PandocMonad m
        => WriterOptions -> Attr -> [Inline] -> (Text, Text)
        -> StateT WriterState m Html
-figure opts attr txt (s,tit) = do
+figure opts attr@(_, _, attrList) txt (s,tit) = do
   html5 <- gets stHtml5
   -- Screen-readers will normally read the @alt@ text and the figure; we
   -- want to avoid them reading the same text twice. With HTML5 we can
   -- use aria-hidden for the caption; with HTML4, we use an empty
   -- alt-text instead.
+  -- When the alt text differs from the caption both should be read.
   let alt = if html5 then txt else [Str ""]
   let tocapt = if html5
-                  then H5.figcaption !
-                       H5.customAttribute (textTag "aria-hidden")
-                                          (toValue @Text "true")
+                  then (H5.figcaption !) $
+                       if isJust (lookup "alt" attrList)
+                          then mempty
+                          else H5.customAttribute (textTag "aria-hidden")
+                                                  (toValue @Text "true")
                   else H.p ! A.class_ "caption"
   img <- inlineToHtml opts (Image attr alt (s,tit))
   capt <- if null txt
@@ -707,12 +764,12 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
   let fragmentClass = case slideVariant of
                            RevealJsSlides -> "fragment"
                            _              -> "incremental"
-  let inDiv zs = RawBlock (Format "html") ("<div class=\""
+  let inDiv' zs = RawBlock (Format "html") ("<div class=\""
                        <> fragmentClass <> "\">") :
                    (zs ++ [RawBlock (Format "html") "</div>"])
   let breakOnPauses zs = case splitBy isPause zs of
                            []   -> []
-                           y:ys -> y ++ concatMap inDiv ys
+                           y:ys -> y ++ concatMap inDiv' ys
   let (titleBlocks, innerSecs) =
         if titleSlide
            -- title slides have no content of their own
@@ -774,9 +831,10 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
                                            lookup "entry-spacing" kvs' >>=
                                            safeRead }
   let isCslBibEntry = "csl-entry" `elem` classes
-  let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++
-            [("style", "width:" <> w <> ";") | "column" `elem` classes,
-             ("width", w) <- kvs'] ++
+  let kvs = [(k,v) | (k,v) <- kvs'
+                   , k /= "width" || "column" `notElem` classes] ++
+            [("style", "width:" <> w <> ";") | "column" `elem` classes
+                                             , ("width", w) <- kvs'] ++
             [("role", "doc-bibliography") | isCslBibBody && html5] ++
             [("role", "doc-biblioentry") | isCslBibEntry && html5]
   let speakerNotes = "notes" `elem` classes
@@ -790,14 +848,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
       classes' = case slideVariant of
         NoSlides -> classes
         _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes
+  let paraToPlain (Para ils) = Plain ils
+      paraToPlain x          = x
+  let bs' = if "csl-entry" `elem` classes'
+               then walk paraToPlain bs
+               else bs
   contents <- if "columns" `elem` classes'
                  then -- we don't use blockListToHtml because it inserts
                       -- a newline between the column divs, which throws
                       -- off widths! see #4028
-                      mconcat <$> mapM (blockToHtml opts) bs
-                 else if isCslBibEntry
-                         then mconcat <$> mapM (cslEntryToHtml opts') bs
-                         else blockListToHtml opts' bs
+                      mconcat <$> mapM (blockToHtml opts) bs'
+                 else blockListToHtml opts' bs'
   let contents' = nl opts >> contents >> nl opts
   let (divtag, classes'') = if html5 && "section" `elem` classes'
                             then (H5.section, filter (/= "section") classes')
@@ -883,7 +944,7 @@ blockToHtml opts (BlockQuote blocks) = do
      else do
        contents <- blockListToHtml opts blocks
        return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do
+blockToHtml opts (Header level (ident,classes,kvs) lst) = do
   contents <- inlineListToHtml opts lst
   let secnum = fromMaybe mempty $ lookup "number" kvs
   let contents' = if writerNumberSections opts && not (T.null secnum)
@@ -891,7 +952,13 @@ blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do
                      then (H.span ! A.class_ "header-section-number"
                              $ toHtml secnum) >> strToHtml " " >> contents
                      else contents
-  addAttrs opts attr
+  html5 <- gets stHtml5
+  let kvs' = if html5
+             then kvs
+             else [ (k, v) | (k, v) <- kvs
+                           , k `elem` (["lang", "dir", "title", "style"
+                                      , "align"] ++ intrinsicEventsHTML4)]
+  addAttrs opts (ident,classes,kvs')
          $ case level of
               1 -> H.h1 contents'
               2 -> H.h2 contents'
@@ -927,7 +994,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
                                    numstyle']
                    else [])
   l <- ordList opts contents
-  return $ foldl (!) l attribs
+  return $ foldl' (!) l attribs
 blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
                   do term' <- liftM H.dt $ inlineListToHtml opts term
@@ -1225,6 +1292,10 @@ inlineToHtml opts inline = do
     LineBreak      -> return $ do
                         if html5 then H5.br else H.br
                         strToHtml "\n"
+    (Span ("",[cls],[]) ils)
+        | cls == "csl-block" || cls == "csl-left-margin" ||
+          cls == "csl-right-inline" || cls == "csl-indent"
+        -> inlineListToHtml opts ils >>= inDiv cls
 
     (Span (id',classes,kvs) ils) ->
                         let spanLikeTag = case classes of
@@ -1377,7 +1448,7 @@ inlineToHtml opts inline = do
                         return $ if T.null tit
                                     then link'
                                     else link' ! A.title (toValue tit)
-    (Image attr txt (s,tit)) -> do
+    (Image attr@(_, _, attrList) txt (s, tit)) -> do
                         let alternate = stringify txt
                         slideVariant <- gets stSlideVariant
                         let isReveal = slideVariant == RevealJsSlides
@@ -1390,7 +1461,8 @@ inlineToHtml opts inline = do
                               [A.title $ toValue tit | not (T.null tit)] ++
                               attrs
                             imageTag = (if html5 then H5.img else H.img
-                              , [A.alt $ toValue alternate | not (null txt)] )
+                              , [A.alt $ toValue alternate | not (null txt) &&
+                                  isNothing (lookup "alt" attrList)] )
                             mediaTag tg fallbackTxt =
                               let linkTxt = if null txt
                                             then fallbackTxt
@@ -1404,7 +1476,7 @@ inlineToHtml opts inline = do
                               Just "audio" -> mediaTag H5.audio "Audio"
                               Just _       -> (H5.embed, [])
                               _            -> imageTag
-                        return $ foldl (!) tag $ attributes ++ specAttrs
+                        return $ foldl' (!) tag $ attributes ++ specAttrs
                         -- note:  null title included, as in Markdown.pl
     (Note contents) -> do
                         notes <- gets stNotes
@@ -1457,11 +1529,15 @@ blockListToNote opts ref blocks = do
                     else let lastBlock   = last blocks
                              otherBlocks = init blocks
                          in  case lastBlock of
-                                  (Para lst)  -> otherBlocks ++
+                                  Para [Image _ _ (_,tit)]
+                                      | "fig:" `T.isPrefixOf` tit
+                                            -> otherBlocks ++ [lastBlock,
+                                                  Plain backlink]
+                                  Para lst  -> otherBlocks ++
                                                  [Para (lst ++ backlink)]
-                                  (Plain lst) -> otherBlocks ++
+                                  Plain lst -> otherBlocks ++
                                                  [Plain (lst ++ backlink)]
-                                  _           -> otherBlocks ++ [lastBlock,
+                                  _         -> otherBlocks ++ [lastBlock,
                                                  Plain backlink]
   contents <- blockListToHtml opts blocks'
   let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents
@@ -1474,22 +1550,12 @@ blockListToNote opts ref blocks = do
                        _          -> noteItem
   return $ nl opts >> noteItem'
 
-cslEntryToHtml :: PandocMonad m
-               => WriterOptions
-               -> Block
-               -> StateT WriterState m Html
-cslEntryToHtml opts (Para xs) = do
+inDiv :: PandocMonad m=> Text -> Html -> StateT WriterState m Html
+inDiv cls x = do
   html5 <- gets stHtml5
-  let inDiv :: Text -> Html -> Html
-      inDiv cls x = (if html5 then H5.div else H.div)
-                      x ! A.class_ (toValue cls)
-  let go (Span ("",[cls],[]) ils)
-        | cls == "csl-block" || cls == "csl-left-margin" ||
-          cls == "csl-right-inline" || cls == "csl-indent"
-        = inDiv cls <$> inlineListToHtml opts ils
-      go il = inlineToHtml opts il
-  mconcat <$> mapM go xs
-cslEntryToHtml opts x = blockToHtml opts x
+  return $
+    (if html5 then H5.div else H.div)
+                x ! A.class_ (toValue cls)
 
 isMathEnvironment :: Text -> Bool
 isMathEnvironment s = "\\begin{" `T.isPrefixOf` s &&
@@ -1529,6 +1595,12 @@ allowsMathEnvironments MathML      = True
 allowsMathEnvironments (WebTeX _)  = True
 allowsMathEnvironments _           = False
 
+-- | List of intrinsic event attributes allowed on all elements in HTML4.
+intrinsicEventsHTML4 :: [Text]
+intrinsicEventsHTML4 =
+  [ "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover"
+  , "onmouseout", "onmouseout", "onkeypress", "onkeydown", "onkeyup"]
+
 isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool
 isRawHtml f = do
   html5 <- gets stHtml5
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index aaa19ed07..75e14714b 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -15,6 +15,7 @@ Haddock:  <http://www.haskell.org/haddock/doc/html/>
 -}
 module Text.Pandoc.Writers.Haddock (writeHaddock) where
 import Control.Monad.State.Strict
+import Data.Char (isAlphaNum)
 import Data.Default
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -71,8 +72,18 @@ notesToHaddock opts notes =
 
 -- | Escape special characters for Haddock.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing haddockEscapes
-  where haddockEscapes = backslashEscapes "\\/'`\"@<"
+escapeString t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+ where
+  escChar '\\' = "\\\\"
+  escChar '/'  = "\\/"
+  escChar '\'' = "\\'"
+  escChar '`'  = "\\`"
+  escChar '"'  = "\\\""
+  escChar '@'  = "\\@"
+  escChar '<'  = "\\<"
+  escChar c    = T.singleton c
 
 -- | Convert Pandoc block element to haddock.
 blockToHaddock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index dcf5acfef..c254fbc58 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -621,7 +621,12 @@ imageICML opts style attr (src, _) = do
       image  = inTags True "Image"
                    [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)]
                  $ vcat [
-                     inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+                     inTags True "Properties" [] $ vcat [
+                         inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+                       , selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0")
+                         , ("Right",  showFl $ ow*ow / imgWidth)
+                         , ("Bottom", showFl $ oh*oh / imgHeight)]
+                       ]
                    , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')]
                    ]
       doc    = inTags True "CharacterStyleRange" attrs
diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs
index d01d5a7e5..2613851c5 100644
--- a/src/Text/Pandoc/Writers/Ipynb.hs
+++ b/src/Text/Pandoc/Writers/Ipynb.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 7058a4557..9db8723d1 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -1,9 +1,10 @@
+{-# LANGUAGE LambdaCase          #-}
+{-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -20,15 +21,17 @@ module Text.Pandoc.Writers.JATS
   , writeJatsPublishing
   , writeJatsArticleAuthoring
   ) where
+import Control.Applicative ((<|>))
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.Generics (everywhere, mkT)
 import Data.List (partition)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, listToMaybe)
 import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime)
 import qualified Data.Text as T
 import Data.Text (Text)
+import Text.Pandoc.Citeproc (getReferences)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (languages, languagesByExtension)
@@ -40,6 +43,7 @@ import Text.DocLayout
 import Text.Pandoc.Shared
 import Text.Pandoc.Templates (renderTemplate)
 import Text.DocTemplates (Context(..), Val(..))
+import Text.Pandoc.Writers.JATS.References (referencesToJATS)
 import Text.Pandoc.Writers.JATS.Table (tableToJATS)
 import Text.Pandoc.Writers.JATS.Types
 import Text.Pandoc.Writers.Math
@@ -71,15 +75,19 @@ writeJATS = writeJatsArchiving
 
 -- | Convert a @'Pandoc'@ document to JATS.
 writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text
-writeJats tagSet opts d =
-  runReaderT (evalStateT (docToJATS opts d) initialState)
-             environment
-  where initialState = JATSState { jatsNotes = [] }
-        environment = JATSEnv
+writeJats tagSet opts d = do
+  refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts
+          then getReferences Nothing d
+          else pure []
+  let environment = JATSEnv
           { jatsTagSet = tagSet
           , jatsInlinesWriter = inlinesToJATS
-          , jatsBlockWriter = blockToJATS
+          , jatsBlockWriter = wrappedBlocksToJATS
+          , jatsReferences = refs
           }
+  let initialState = JATSState { jatsNotes = [] }
+  runReaderT (evalStateT (docToJATS opts d) initialState)
+             environment
 
 -- | Convert Pandoc document to string in JATS format.
 docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text
@@ -168,13 +176,15 @@ plainToPara x         = x
 -- | Convert a list of pairs of terms and definitions into a list of
 -- JATS varlistentrys.
 deflistItemsToJATS :: PandocMonad m
-                      => WriterOptions -> [([Inline],[[Block]])] -> JATS m (Doc Text)
+                   => WriterOptions
+                   -> [([Inline],[[Block]])] -> JATS m (Doc Text)
 deflistItemsToJATS opts items =
   vcat <$> mapM (uncurry (deflistItemToJATS opts)) items
 
 -- | Convert a term and a list of blocks into a JATS varlistentry.
 deflistItemToJATS :: PandocMonad m
-                     => WriterOptions -> [Inline] -> [[Block]] -> JATS m (Doc Text)
+                  => WriterOptions
+                  -> [Inline] -> [[Block]] -> JATS m (Doc Text)
 deflistItemToJATS opts term defs = do
   term' <- inlinesToJATS opts term
   def' <- wrappedBlocksToJATS (not . isPara)
@@ -186,7 +196,8 @@ deflistItemToJATS opts term defs = do
 
 -- | Convert a list of lists of blocks to a list of JATS list items.
 listItemsToJATS :: PandocMonad m
-                => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text)
+                => WriterOptions
+                -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text)
 listItemsToJATS opts markers items =
   case markers of
        Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items
@@ -194,12 +205,13 @@ listItemsToJATS opts markers items =
 
 -- | Convert a list of blocks into a JATS list item.
 listItemToJATS :: PandocMonad m
-               => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text)
+               => WriterOptions
+               -> Maybe Text -> [Block] -> JATS m (Doc Text)
 listItemToJATS opts mbmarker item = do
   contents <- wrappedBlocksToJATS (not . isParaOrList) opts
                  (walk demoteHeaderAndRefs item)
   return $ inTagsIndented "list-item" $
-           maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker
+           maybe empty (inTagsSimple "label" . text . T.unpack) mbmarker
            $$ contents
 
 imageMimeType :: Text -> [(Text, Text)] -> (Text, Text)
@@ -213,33 +225,36 @@ imageMimeType src kvs =
                   (T.drop 1 . T.dropWhile (/='/') <$> mbMT)
   in (maintype, subtype)
 
-languageFor :: [Text] -> Text
-languageFor classes =
+languageFor :: WriterOptions -> [Text] -> Text
+languageFor opts classes =
   case langs of
      (l:_) -> escapeStringForXML l
      []    -> ""
-    where isLang l    = T.toLower l `elem` map T.toLower languages
+    where
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 
-codeAttr :: Attr -> (Text, [(Text, Text)])
-codeAttr (ident,classes,kvs) = (lang, attr)
+codeAttr :: WriterOptions -> Attr -> (Text, [(Text, Text)])
+codeAttr opts (ident,classes,kvs) = (lang, attr)
     where
-       attr = [("id",ident) | not (T.null ident)] ++
+       attr = [("id", escapeNCName ident) | not (T.null ident)] ++
               [("language",lang) | not (T.null lang)] ++
               [(k,v) | (k,v) <- kvs, k `elem` ["code-type",
                 "code-version", "executable",
                 "language-version", "orientation",
                     "platforms", "position", "specific-use"]]
-       lang  = languageFor classes
+       lang  = languageFor opts classes
 
 -- | Convert a Pandoc block element to JATS.
 blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text)
 blockToJATS _ Null = return empty
 blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do
-  let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')]
+  let idAttr = [ ("id", writerIdentifierPrefix opts <> escapeNCName id')
+               | not (T.null id')]
   let otherAttrs = ["sec-type", "specific-use"]
   let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs]
   title' <- inlinesToJATS opts ils
@@ -247,21 +262,26 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do
   return $ inTags True "sec" attribs $
       inTagsSimple "title" title' $$ contents
 -- Bibliography reference:
-blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) =
+blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident =
+  inTags True "ref" [("id", escapeNCName ident)] .
+  inTagsSimple "mixed-citation" <$>
   inlinesToJATS opts lst
 blockToJATS opts (Div ("refs",_,_) xs) = do
-  contents <- blocksToJATS opts xs
+  refs <- asks jatsReferences
+  contents <- if null refs
+              then blocksToJATS opts xs
+              else referencesToJATS opts refs
   return $ inTagsIndented "ref-list" contents
 blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do
   contents <- blocksToJATS opts bs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
                  "content-type", "orientation", "position"]]
   return $ inTags True cls attr contents
 blockToJATS opts (Div (ident,_,kvs) bs) = do
   contents <- blocksToJATS opts bs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
                  "content-type", "orientation", "position"]]
@@ -279,7 +299,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt
   let capt = if null txt
                 then empty
                 else inTagsSimple "caption" $ inTagsSimple "p" alt
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation",
                                               "position", "specific-use"]]
   let graphicattr = [("mimetype",maintype),
@@ -290,7 +310,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt
               capt $$ selfClosingTag "graphic" graphicattr
 blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do
   let (maintype, subtype) = imageMimeType src kvs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("mimetype", maintype),
               ("mime-subtype", subtype),
               ("xlink:href", src)] ++
@@ -306,13 +326,16 @@ blockToJATS opts (LineBlock lns) =
   blockToJATS opts $ linesToPara lns
 blockToJATS opts (BlockQuote blocks) = do
   tagSet <- asks jatsTagSet
-  let blocksToJats' = if tagSet == TagSetArticleAuthoring
-                      then wrappedBlocksToJATS (not . isPara)
-                      else blocksToJATS
-  inTagsIndented "disp-quote" <$> blocksToJats' opts blocks
-blockToJATS _ (CodeBlock a str) = return $
+  let needsWrap = if tagSet == TagSetArticleAuthoring
+                  then not . isPara
+                  else \case
+                    Header{}       -> True
+                    HorizontalRule -> True
+                    _              -> False
+  inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
+blockToJATS opts (CodeBlock a str) = return $
   inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "preformat" else "code"
 blockToJATS _ (BulletList []) = return empty
 blockToJATS opts (BulletList lst) =
@@ -392,9 +415,9 @@ inlineToJATS opts (Quoted SingleQuote lst) = do
 inlineToJATS opts (Quoted DoubleQuote lst) = do
   contents <- inlinesToJATS opts lst
   return $ char '“' <> contents <> char '”'
-inlineToJATS _ (Code a str) =
+inlineToJATS opts (Code a str) =
   return $ inTags False tag attr $ literal (escapeStringForXML str)
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "monospace" else "code"
 inlineToJATS _ il@(RawInline f x)
   | f == "jats" = return $ literal x
@@ -417,7 +440,8 @@ inlineToJATS opts (Note contents) = do
       let notenum = case notes of
                       (n, _):_ -> n + 1
                       []       -> 1
-      thenote <- inTags True "fn" [("id","fn" <> tshow notenum)]
+      thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)]
+                     . (inTagsSimple "label" (literal $ tshow notenum) <>)
                     <$> wrappedBlocksToJATS (not . isPara) opts
                          (walk demoteHeaderAndRefs contents)
       modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes }
@@ -425,18 +449,34 @@ inlineToJATS opts (Note contents) = do
                                     ("rid", "fn" <> tshow notenum)]
              $ text (show notenum)
 inlineToJATS opts (Cite _ lst) =
-  -- TODO revisit this after examining the jats.csl pipeline
   inlinesToJATS opts lst
-inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils
-inlineToJATS opts (Span (ident,_,kvs) ils) = do
+inlineToJATS opts (Span (ident,classes,kvs) ils) = do
   contents <- inlinesToJATS opts ils
-  let attr = [("id",ident) | not (T.null ident)] ++
-             [("xml:lang",l) | ("lang",l) <- kvs] ++
-             [(k,v) | (k,v) <- kvs
-                    ,  k `elem` ["content-type", "rationale",
-                                 "rid", "specific-use"]]
-  return $ selfClosingTag "milestone-start" attr <> contents <>
-           selfClosingTag "milestone-end" []
+  let commonAttr = [("id", escapeNCName ident) | not (T.null ident)] ++
+                   [("xml:lang",l) | ("lang",l) <- kvs] ++
+                   [(k,v) | (k,v) <- kvs,  k `elem` ["alt", "specific-use"]]
+  -- A named-content element is a good fit for spans, but requires a
+  -- content-type attribute to be present. We use either the explicit
+  -- attribute or the first class as content type. If neither is
+  -- available, then we fall back to using a @styled-content@ element.
+  let (tag, specificAttr) =
+        case lookup "content-type" kvs <|> listToMaybe classes of
+          Just ct -> ( "named-content"
+                     , ("content-type", ct) :
+                       [(k, v) | (k, v) <- kvs
+                       , k `elem` ["rid", "vocab", "vocab-identifier",
+                                   "vocab-term", "vocab-term-identifier"]])
+          -- Fall back to styled-content
+          Nothing -> ("styled-content"
+                     , [(k, v) | (k,v) <- kvs
+                       , k `elem` ["style", "style-type", "style-detail",
+                                   "toggle"]])
+  let attr = commonAttr ++ specificAttr
+  -- unwrap if wrapping element would have no attributes
+  return $
+    if null attr
+    then contents
+    else inTags False tag attr contents
 inlineToJATS _ (Math t str) = do
   let addPref (Xml.Attr q v)
          | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v
@@ -470,17 +510,20 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _))
   | escapeURI t == email =
   return $ inTagsSimple "email" $ literal (escapeStringForXML email)
 inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do
-  let attr = [("id", ident) | not (T.null ident)] ++
-             [("alt", stringify txt) | not (null txt)] ++
-             [("rid", src)] ++
-             [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
+  let attr = mconcat
+             [ [("id", escapeNCName ident) | not (T.null ident)]
+             , [("alt", stringify txt) | not (null txt)]
+             , [("rid", escapeNCName src)]
+             , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
+             , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src]
+             ]
   if null txt
      then return $ selfClosingTag "xref" attr
      else do
         contents <- inlinesToJATS opts txt
         return $ inTags False "xref" attr contents
 inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("ext-link-type", "uri"),
               ("xlink:href", src)] ++
              [("xlink:title", tit) | not (T.null tit)] ++
@@ -498,7 +541,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do
   let subtype = fromMaybe "" $
                   lookup "mime-subtype" kvs `mplus`
                   (T.drop 1 . T.dropWhile (/='/') <$> mbMT)
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("mimetype", maintype),
               ("mime-subtype", subtype),
               ("xlink:href", src)] ++
@@ -529,7 +572,7 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) =
 demoteHeaderAndRefs x = x
 
 parseDate :: Text -> Maybe Day
-parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day
+parseDate s = msum (map (`parsetimeWith` T.unpack s) formats)
   where parsetimeWith = parseTimeM True defaultTimeLocale
         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                     "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs
new file mode 100644
index 000000000..5b19fd034
--- /dev/null
+++ b/src/Text/Pandoc/Writers/JATS/References.hs
@@ -0,0 +1,164 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.JATS.References
+   Copyright   : © 2021 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
+   Stability   : alpha
+   Portability : portable
+
+Creation of a bibliography list using @<element-citation>@ elements in
+reference items.
+-}
+module Text.Pandoc.Writers.JATS.References
+  ( referencesToJATS
+  , referenceToJATS
+  ) where
+
+import Citeproc.Pandoc ()
+import Citeproc.Types
+  ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..)
+  , Val (..) , lookupVariable, valToText
+  )
+import Data.Text (Text)
+import Text.DocLayout (Doc, empty, isEmpty, literal, vcat)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Builder (Inlines)
+import Text.Pandoc.Options (WriterOptions)
+import Text.Pandoc.Shared (tshow)
+import Text.Pandoc.Writers.JATS.Types
+import Text.Pandoc.XML (escapeNCName, escapeStringForXML, inTags)
+import qualified Data.Text as T
+
+referencesToJATS :: PandocMonad m
+                 => WriterOptions
+                 -> [Reference Inlines]
+                 -> JATS m (Doc Text)
+referencesToJATS opts =
+  fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts)
+
+referenceToJATS :: PandocMonad m
+                => WriterOptions
+                -> Reference Inlines
+                -> JATS m (Doc Text)
+referenceToJATS _opts ref = do
+  let refType = referenceType ref
+  let pubType = [("publication-type", refType) | not (T.null refType)]
+  let ident = escapeNCName $ "ref-" <> unItemId (referenceId ref)
+  let wrap = inTags True "ref" [("id", ident)]
+           . inTags True "element-citation" pubType
+  return . wrap . vcat $
+    [ authors
+    , "title" `varInTag`
+      if refType == "book"
+      then "source"
+      else "article-title"
+    , if refType == "book"
+      then empty
+      else "container-title" `varInTag` "source"
+    , editors
+    , "publisher"       `varInTag` "publisher-name"
+    , "publisher-place" `varInTag` "publisher-loc"
+    , yearTag
+    , accessed
+    , "volume"          `varInTag` "volume"
+    , "issue"           `varInTag` "issue"
+    , "page-first"      `varInTag` "fpage"
+    , "page-last"       `varInTag` "lpage"
+    , "pages"           `varInTag` "page-range"
+    , "ISBN"            `varInTag` "isbn"
+    , "ISSN"            `varInTag` "issn"
+    , varInTagWith "doi"  "pub-id" [("pub-id-type", "doi")]
+    , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")]
+    ]
+  where
+    varInTag var tagName = varInTagWith var tagName []
+
+    varInTagWith var tagName tagAttribs =
+      case lookupVariable var ref >>= valToText of
+        Nothing  -> mempty
+        Just val -> inTags' tagName tagAttribs . literal $
+                    escapeStringForXML val
+
+    authors = case lookupVariable "author" ref of
+      Just (NamesVal names) ->
+        inTags True "person-group" [("person-group-type", "author")] . vcat $
+        map toNameElements names
+      _                     -> empty
+
+    editors = case lookupVariable "editor" ref of
+      Just (NamesVal names) ->
+        inTags True "person-group" [("person-group-type", "editor")] . vcat $
+        map toNameElements names
+      _                     -> empty
+
+    yearTag =
+      case lookupVariable "issued" ref of
+        Just (DateVal date) -> toDateElements date
+        _ -> empty
+
+    accessed =
+      case lookupVariable "accessed" ref of
+        Just (DateVal d) -> inTags' "date-in-citation"
+                                    [("content-type", "access-date")]
+                                    (toDateElements d)
+        _ -> empty
+
+toDateElements :: Date -> Doc Text
+toDateElements date =
+  case dateParts date of
+    dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <>
+                                  monthElement m <>
+                                  dayElement d
+    dp@(DateParts (y:m:_)):_   -> yearElement y dp <> monthElement m
+    dp@(DateParts (y:_)):_     -> yearElement y dp
+    _                          -> empty
+
+yearElement :: Int -> DateParts -> Doc Text
+yearElement year dp =
+  inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year)
+
+monthElement :: Int -> Doc Text
+monthElement month = inTags' "month" [] . literal $ twoDigits month
+
+dayElement :: Int -> Doc Text
+dayElement day = inTags' "day" [] . literal $ twoDigits day
+
+iso8601 :: DateParts -> Text
+iso8601 = T.intercalate "-" . \case
+  DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d]
+  DateParts (y:m:_)   -> [fourDigits y, twoDigits m]
+  DateParts (y:_)     -> [fourDigits y]
+  _                   -> []
+
+twoDigits :: Int -> Text
+twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n
+
+fourDigits :: Int -> Text
+fourDigits n = T.takeEnd 4 $ "000" <> tshow n
+
+toNameElements :: Name -> Doc Text
+toNameElements name =
+  if not (isEmpty nameTags)
+  then inTags' "name" [] nameTags
+  else nameLiteral name `inNameTag` "string-name"
+    where
+      inNameTag mVal tag = case mVal of
+        Nothing  -> empty
+        Just val -> inTags' tag [] . literal $ escapeStringForXML val
+      surnamePrefix = maybe mempty (`T.snoc` ' ') $
+                      nameNonDroppingParticle name
+      givenSuffix = maybe mempty (T.cons ' ') $
+                    nameDroppingParticle name
+      nameTags = mconcat
+        [ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname"
+        , ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names"
+        , nameSuffix name `inNameTag` "suffix"
+        ]
+
+-- | Put the supplied contents between start and end tags of tagType,
+--   with specified attributes.
+inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text
+inTags' = inTags False
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index a4d42832d..70569bdcd 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS.Table
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
@@ -24,7 +24,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options (WriterOptions)
 import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Writers.JATS.Types
-import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag)
+import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
@@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m
             -> JATS m (Doc Text)
 tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
   let (Caption _maybeShortCaption captionBlocks) = caption
+  -- Only paragraphs are allowed in captions, all other blocks must be
+  -- wrapped in @<p>@ elements.
+  let needsWrapping = \case
+        Plain{} -> False
+        Para{}  -> False
+        _       -> True
   tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
   captionDoc <- if null captionBlocks
                 then return empty
                 else do
                   blockToJATS <- asks jatsBlockWriter
-                  inTagsIndented "caption" . vcat <$>
-                    mapM (blockToJATS opts) captionBlocks
+                  inTagsIndented "caption" <$>
+                    blockToJATS needsWrapping opts captionBlocks
   return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
 
 captionlessTable :: PandocMonad m
@@ -216,7 +222,7 @@ cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) =
 
 toAttribs :: Attr -> [Text] -> [(Text, Text)]
 toAttribs (ident, _classes, kvs) knownAttribs =
-  (if T.null ident then id else (("id", ident) :)) $
+  (if T.null ident then id else (("id", escapeNCName ident) :)) $
   filter ((`elem` knownAttribs) . fst) kvs
 
 tableCellToJats :: PandocMonad m
@@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
   inlinesToJats <- asks jatsInlinesWriter
   let cellContents = \case
         [Plain inlines] -> inlinesToJats opts inlines
-        blocks          -> vcat <$> mapM (blockToJats opts) blocks
+        blocks          -> blockToJats needsWrapInCell opts blocks
   let tag' = case ctype of
         BodyCell   -> "td"
         HeaderCell -> "th"
@@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
               . maybeCons (colspanAttrib colspan)
               $ toAttribs attr validAttribs
   inTags False tag' attribs <$> cellContents item
+
+-- | Whether the JATS produced from this block should be wrapped in a
+-- @<p>@ element when put directly below a @<td>@ element.
+needsWrapInCell :: Block -> Bool
+needsWrapInCell = \case
+  Plain{}          -> False  -- should be unwrapped anyway
+  Para{}           -> False
+  BulletList{}     -> False
+  OrderedList{}    -> False
+  DefinitionList{} -> False
+  HorizontalRule   -> False
+  CodeBlock{}      -> False
+  RawBlock{}       -> False  -- responsibility of the user
+  _                -> True
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 8162f3bc0..8d8673cf6 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.JATS.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types
   )
 where
 
+import Citeproc.Types (Reference)
 import Control.Monad.Reader (ReaderT)
 import Control.Monad.State (StateT)
 import Data.Text (Text)
 import Text.DocLayout (Doc)
-import Text.Pandoc.Definition (Block, Inline)
+import Text.Pandoc.Builder (Block, Inline, Inlines)
 import Text.Pandoc.Options (WriterOptions)
 
 -- | JATS tag set variant
@@ -36,10 +37,20 @@ newtype JATSState = JATSState
   { jatsNotes :: [(Int, Doc Text)]
   }
 
+-- | Environment containing all information relevant for rendering.
 data JATSEnv m = JATSEnv
-  { jatsTagSet :: JATSTagSet
+  { jatsTagSet :: JATSTagSet  -- ^ The tag set that's being ouput
+
+  , jatsBlockWriter   :: (Block -> Bool)
+                      -> WriterOptions -> [Block]  -> JATS m (Doc Text)
+    -- ^ Converts a block list to JATS, wrapping top-level blocks into a
+    -- @<p>@ element if the property evaluates to @True@.
+    -- See #7227.
+
   , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
-  , jatsBlockWriter   :: WriterOptions -> Block    -> JATS m (Doc Text)
+    -- ^ Converts an inline list to JATS.
+
+  , jatsReferences    :: [Reference Inlines] -- ^ List of references
   }
 
 -- | JATS writer type
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index 6bc048a61..1351814e9 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE PatternGuards #-}
 {- |
    Module      : Text.Pandoc.Writers.Jira
-   Copyright   : © 2010-2020 Albert Krewinkel, John MacFarlane
+   Copyright   : © 2010-2021 Albert Krewinkel, John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -39,11 +39,17 @@ writeJira :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeJira opts = runDefaultConverter (writerWrapText opts) (pandocToJira opts)
 
 -- | State to keep track of footnotes.
-newtype ConverterState = ConverterState { stNotes :: [Text] }
+data ConverterState = ConverterState
+  { stNotes   :: [Text] -- ^ Footnotes to be appended to the end of the text
+  , stInPanel :: Bool   -- ^ whether we are in a @{panel}@ block
+  }
 
 -- | Initial converter state.
 startState :: ConverterState
-startState = ConverterState { stNotes = [] }
+startState = ConverterState
+  { stNotes = []
+  , stInPanel = False
+  }
 
 -- | Converter monad
 type JiraConverter m = ReaderT WrapOption (StateT ConverterState m)
@@ -126,13 +132,19 @@ toJiraCode :: PandocMonad m
            -> Text
            -> JiraConverter m [Jira.Block]
 toJiraCode (ident, classes, _attribs) code = do
-  let lang = case find (\c -> T.toLower c `elem` knownLanguages) classes of
-               Nothing -> Jira.Language "java"
-               Just l  -> Jira.Language l
-  let addAnchor b = if T.null ident
-                    then b
-                    else [Jira.Para (singleton (Jira.Anchor ident))] <> b
-  return . addAnchor . singleton $ Jira.Code lang mempty code
+  return . addAnchor ident . singleton $
+    case find (\c -> T.toLower c `elem` knownLanguages) classes of
+      Nothing -> Jira.NoFormat mempty code
+      Just l  -> Jira.Code (Jira.Language l) mempty code
+
+-- | Prepends an anchor with the given identifier.
+addAnchor :: Text -> [Jira.Block] -> [Jira.Block]
+addAnchor ident =
+  if T.null ident
+  then id
+  else \case
+    Jira.Para xs : bs -> (Jira.Para (Jira.Anchor ident : xs) : bs)
+    bs                -> (Jira.Para (singleton (Jira.Anchor ident)) : bs)
 
 -- | Creates a Jira definition list
 toJiraDefinitionList :: PandocMonad m
@@ -149,11 +161,16 @@ toJiraDefinitionList defItems = do
 toJiraPanel :: PandocMonad m
             => Attr -> [Block]
             -> JiraConverter m [Jira.Block]
-toJiraPanel attr blocks = do
-  jiraBlocks <- toJiraBlocks blocks
-  return $ if attr == nullAttr
-           then jiraBlocks
-           else singleton (Jira.Panel [] jiraBlocks)
+toJiraPanel (ident, classes, attribs) blocks = do
+  inPanel <- gets stInPanel
+  if inPanel || ("panel" `notElem` classes && null attribs)
+    then addAnchor ident <$> toJiraBlocks blocks
+    else do
+      modify $ \st -> st{ stInPanel = True }
+      jiraBlocks <- toJiraBlocks blocks
+      modify $ \st -> st{ stInPanel = inPanel }
+      let params = map (uncurry Jira.Parameter) attribs
+      return $ singleton (Jira.Panel params $ addAnchor ident jiraBlocks)
 
 -- | Creates a Jira header
 toJiraHeader :: PandocMonad m
@@ -263,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do
     | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email)
     | "user-account" `elem` classes              = (Jira.User, dropTilde url)
     | "attachment" `elem` classes                = (Jira.Attachment, url)
+    | "smart-card" `elem` classes                = (Jira.SmartCard, url)
+    | "smart-link" `elem` classes                = (Jira.SmartLink, url)
     | otherwise                                  = (Jira.External, url)
   dropTilde txt = case T.uncons txt of
     Just ('~', username) -> username
@@ -292,7 +311,13 @@ quotedToJira qtype xs = do
 spanToJira :: PandocMonad m
            => Attr -> [Inline]
            -> JiraConverter m [Jira.Inline]
-spanToJira (_, _classes, _) = toJiraInlines
+spanToJira (ident, _classes, attribs) inls =
+  let wrap = case lookup "color" attribs of
+               Nothing -> id
+               Just color -> singleton . Jira.ColorInline (Jira.ColorName color)
+  in wrap <$> case ident of
+    "" -> toJiraInlines inls
+    _  -> (Jira.Anchor ident :) <$> toJiraInlines inls
 
 registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline]
 registerNotes contents = do
@@ -308,7 +333,7 @@ registerNotes contents = do
 knownLanguages :: [Text]
 knownLanguages =
   [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++"
-  , "css", "erlang", "go", "groovy", "haskell", "html", "javascript"
+  , "css", "erlang", "go", "groovy", "haskell", "html", "java", "javascript"
   , "json", "lua", "nyan", "objc", "perl", "php", "python", "r", "ruby"
   , "scala", "sql", "swift", "visualbasic", "xml", "yaml"
   ]
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d665269ab..063e347fb 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,11 +1,12 @@
 {-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE TupleSections       #-}
 {-# LANGUAGE PatternGuards       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -18,12 +19,9 @@ module Text.Pandoc.Writers.LaTeX (
     writeLaTeX
   , writeBeamer
   ) where
-import Control.Applicative ((<|>))
 import Control.Monad.State.Strict
-import Data.Monoid (Any(..))
-import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace,
-                  isPunctuation, ord)
-import Data.List (foldl', intersperse, nubBy, (\\), uncons)
+import Data.Char (isDigit)
+import Data.List (intersperse, nubBy, (\\))
 import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
 import qualified Data.Map as M
 import Data.Text (Text)
@@ -31,79 +29,30 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
-                                 styleToLaTeX, toListingsLanguage)
+                                 styleToLaTeX)
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.DocLayout
 import Text.Pandoc.Shared
 import Text.Pandoc.Slides
-import Text.Pandoc.Walk
+import Text.Pandoc.Walk (query, walk, walkM)
+import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
+import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib,
+                                           citationsToBiblatex)
+import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
+import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossia, toBabel)
+import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..),
+                                       toLabel, inCmd,
+                                       wrapDiv, hypertarget, labelFor,
+                                       getListingsLanguage, mbBraced)
 import Text.Pandoc.Writers.Shared
-import Text.Printf (printf)
-import qualified Data.Text.Normalize as Normalize
-
-data WriterState =
-  WriterState { stInNote        :: Bool          -- true if we're in a note
-              , stInQuote       :: Bool          -- true if in a blockquote
-              , stExternalNotes :: Bool          -- true if in context where
-                                                 -- we need to store footnotes
-              , stInMinipage    :: Bool          -- true if in minipage
-              , stInHeading     :: Bool          -- true if in a section heading
-              , stInItem        :: Bool          -- true if in \item[..]
-              , stNotes         :: [Doc Text]    -- notes in a minipage
-              , stOLLevel       :: Int           -- level of ordered list nesting
-              , stOptions       :: WriterOptions -- writer options, so they don't have to be parameter
-              , stVerbInNote    :: Bool          -- true if document has verbatim text in note
-              , stTable         :: Bool          -- true if document has a table
-              , stStrikeout     :: Bool          -- true if document has strikeout
-              , stUrl           :: Bool          -- true if document has visible URL link
-              , stGraphics      :: Bool          -- true if document contains images
-              , stLHS           :: Bool          -- true if document has literate haskell code
-              , stHasChapters   :: Bool          -- true if document has chapters
-              , stCsquotes      :: Bool          -- true if document uses csquotes
-              , stHighlighting  :: Bool          -- true if document has highlighted code
-              , stIncremental   :: Bool          -- true if beamer lists should be displayed bit by bit
-              , stInternalLinks :: [Text]      -- list of internal link targets
-              , stBeamer        :: Bool          -- produce beamer
-              , stEmptyLine     :: Bool          -- true if no content on line
-              , stHasCslRefs    :: Bool          -- has a Div with class refs
-              , stIsFirstInDefinition :: Bool    -- first block in a defn list
-              }
-
-startingState :: WriterOptions -> WriterState
-startingState options = WriterState {
-                  stInNote = False
-                , stInQuote = False
-                , stExternalNotes = False
-                , stInHeading = False
-                , stInMinipage = False
-                , stInItem = False
-                , stNotes = []
-                , stOLLevel = 1
-                , stOptions = options
-                , stVerbInNote = False
-                , stTable = False
-                , stStrikeout = False
-                , stUrl = False
-                , stGraphics = False
-                , stLHS = False
-                , stHasChapters = case writerTopLevelDivision options of
-                                    TopLevelPart    -> True
-                                    TopLevelChapter -> True
-                                    _               -> False
-                , stCsquotes = False
-                , stHighlighting = False
-                , stIncremental = writerIncremental options
-                , stInternalLinks = []
-                , stBeamer = False
-                , stEmptyLine = True
-                , stHasCslRefs = False
-                , stIsFirstInDefinition = False }
+import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
 -- | Convert Pandoc to LaTeX.
 writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -117,8 +66,6 @@ writeBeamer options document =
   evalStateT (pandocToLaTeX options document) $
     (startingState options){ stBeamer = True }
 
-type LW m = StateT WriterState m
-
 pandocToLaTeX :: PandocMonad m
               => WriterOptions -> Pandoc -> LW m Text
 pandocToLaTeX options (Pandoc meta blocks) = do
@@ -199,6 +146,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do
 
   let dirs = query (extract "dir") blocks
 
+  let nociteIds = query (\case
+                           Cite cs _ -> map citationId cs
+                           _         -> [])
+                    $ lookupMetaInlines "nocite" meta
+
   let context  =  defField "toc" (writerTableOfContents options) $
                   defField "toc-depth" (tshow
                                         (writerTOCDepth options -
@@ -212,6 +164,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                   defField "documentclass" documentClass $
                   defField "verbatim-in-note" (stVerbInNote st) $
                   defField "tables" (stTable st) $
+                  defField "multirow" (stMultiRow st) $
                   defField "strikeout" (stStrikeout st) $
                   defField "url" (stUrl st) $
                   defField "numbersections" (writerNumberSections options) $
@@ -220,6 +173,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                   defField "has-chapters" (stHasChapters st) $
                   defField "has-frontmatter" (documentClass `elem` frontmatterClasses) $
                   defField "listings" (writerListings options || stLHS st) $
+                  defField "zero-width-non-joiner" (stZwnj st) $
                   defField "beamer" beamer $
                   (if stHighlighting st
                       then case writerHighlightStyle options of
@@ -230,9 +184,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                       else id) $
                   (case writerCiteMethod options of
                          Natbib   -> defField "biblio-title" biblioTitle .
-                                     defField "natbib" True
+                                     defField "natbib" True .
+                                     defField "nocite-ids" nociteIds
                          Biblatex -> defField "biblio-title" biblioTitle .
-                                     defField "biblatex" True
+                                     defField "biblatex" True .
+                                     defField "nocite-ids" nociteIds
                          _        -> id) $
                   defField "colorlinks" (any hasStringValue
                            ["citecolor", "urlcolor", "linkcolor", "toccolor",
@@ -297,154 +253,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
        Nothing  -> main
        Just tpl -> renderTemplate tpl context'
 
-data StringContext = TextString
-                   | URLString
-                   | CodeString
-                   deriving (Eq)
-
--- escape things as needed for LaTeX
-stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text
-stringToLaTeX context zs = do
-  opts <- gets stOptions
-  return $ T.pack $
-    foldr (go opts context) mempty $ T.unpack $
-    if writerPreferAscii opts
-       then Normalize.normalize Normalize.NFD zs
-       else zs
- where
-  go :: WriterOptions -> StringContext -> Char -> String -> String
-  go opts ctx x xs   =
-    let ligatures = isEnabled Ext_smart opts && ctx == TextString
-        isUrl = ctx == URLString
-        mbAccentCmd =
-          if writerPreferAscii opts && ctx == TextString
-             then uncons xs >>= \(c,_) -> M.lookup c accents
-             else Nothing
-        emits s =
-          case mbAccentCmd of
-               Just cmd ->
-                 cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent
-               Nothing  -> s <> xs
-        emitc c =
-          case mbAccentCmd of
-               Just cmd ->
-                 cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent
-               Nothing  -> c : xs
-        emitcseq cs =
-          case xs of
-            c:_ | isLetter c
-                , ctx == TextString
-                             -> cs <> " " <> xs
-                | isSpace c  -> cs <> "{}" <> xs
-                | ctx == TextString
-                             -> cs <> xs
-            _ -> cs <> "{}" <> xs
-        emitquote cs =
-          case xs of
-            '`':_  -> cs <> "\\," <> xs -- add thin space
-            '\'':_ -> cs <> "\\," <> xs -- add thin space
-            _      -> cs <> xs
-    in case x of
-         '?' | ligatures ->  -- avoid ?` ligature
-           case xs of
-             '`':_ -> emits "?{}"
-             _     -> emitc x
-         '!' | ligatures ->  -- avoid !` ligature
-           case xs of
-             '`':_ -> emits "!{}"
-             _     -> emitc x
-         '{' -> emits "\\{"
-         '}' -> emits "\\}"
-         '`' | ctx == CodeString -> emitcseq "\\textasciigrave"
-         '$' | not isUrl -> emits "\\$"
-         '%' -> emits "\\%"
-         '&' -> emits "\\&"
-         '_' | not isUrl -> emits "\\_"
-         '#' -> emits "\\#"
-         '-' | not isUrl -> case xs of
-                     -- prevent adjacent hyphens from forming ligatures
-                     ('-':_) -> emits "-\\/"
-                     _       -> emitc '-'
-         '~' | not isUrl -> emitcseq "\\textasciitilde"
-         '^' -> emits "\\^{}"
-         '\\'| isUrl     -> emitc '/' -- NB. / works as path sep even on Windows
-             | otherwise -> emitcseq "\\textbackslash"
-         '|' | not isUrl -> emitcseq "\\textbar"
-         '<' -> emitcseq "\\textless"
-         '>' -> emitcseq "\\textgreater"
-         '[' -> emits "{[}"  -- to avoid interpretation as
-         ']' -> emits "{]}"  -- optional arguments
-         '\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
-         '\160' -> emits "~"
-         '\x200B' -> emits "\\hspace{0pt}"  -- zero-width space
-         '\x202F' -> emits "\\,"
-         '\x2026' -> emitcseq "\\ldots"
-         '\x2018' | ligatures -> emitquote "`"
-         '\x2019' | ligatures -> emitquote "'"
-         '\x201C' | ligatures -> emitquote "``"
-         '\x201D' | ligatures -> emitquote "''"
-         '\x2014' | ligatures -> emits "---"
-         '\x2013' | ligatures -> emits "--"
-         _ | writerPreferAscii opts
-             -> case x of
-                  'ı' -> emitcseq "\\i"
-                  'ȷ' -> emitcseq "\\j"
-                  'å' -> emitcseq "\\aa"
-                  'Å' -> emitcseq "\\AA"
-                  'ß' -> emitcseq "\\ss"
-                  'ø' -> emitcseq "\\o"
-                  'Ø' -> emitcseq "\\O"
-                  'Ł' -> emitcseq "\\L"
-                  'ł' -> emitcseq "\\l"
-                  'æ' -> emitcseq "\\ae"
-                  'Æ' -> emitcseq "\\AE"
-                  'œ' -> emitcseq "\\oe"
-                  'Œ' -> emitcseq "\\OE"
-                  '£' -> emitcseq "\\pounds"
-                  '€' -> emitcseq "\\euro"
-                  '©' -> emitcseq "\\copyright"
-                  _   -> emitc x
-           | otherwise -> emitc x
-
-accents :: M.Map Char String
-accents = M.fromList
-  [ ('\779' , "\\H")
-  , ('\768' , "\\`")
-  , ('\769' , "\\'")
-  , ('\770' , "\\^")
-  , ('\771' , "\\~")
-  , ('\776' , "\\\"")
-  , ('\775' , "\\.")
-  , ('\772' , "\\=")
-  , ('\781' , "\\|")
-  , ('\817' , "\\b")
-  , ('\807' , "\\c")
-  , ('\783' , "\\G")
-  , ('\777' , "\\h")
-  , ('\803' , "\\d")
-  , ('\785' , "\\f")
-  , ('\778' , "\\r")
-  , ('\865' , "\\t")
-  , ('\782' , "\\U")
-  , ('\780' , "\\v")
-  , ('\774' , "\\u")
-  , ('\808' , "\\k")
-  , ('\785' , "\\newtie")
-  , ('\8413', "\\textcircled")
-  ]
-
-toLabel :: PandocMonad m => Text -> LW m Text
-toLabel z = go `fmap` stringToLaTeX URLString z
- where
-   go = T.concatMap $ \x -> case x of
-     _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x
-       | x `elemText` "_-+=:;." -> T.singleton x
-       | otherwise -> T.pack $ "ux" <> printf "%x" (ord x)
-
--- | Puts contents into LaTeX command.
-inCmd :: Text -> Doc Text -> Doc Text
-inCmd cmd contents = char '\\' <> literal cmd <> braces contents
-
 toSlides :: PandocMonad m => [Block] -> LW m [Block]
 toSlides bs = do
   opts <- gets stOptions
@@ -483,7 +291,12 @@ blockToLaTeX :: PandocMonad m
              => Block     -- ^ Block to convert
              -> LW m (Doc Text)
 blockToLaTeX Null = return empty
-blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do
+blockToLaTeX (Div attr@(identifier,"block":dclasses,_)
+             (Header _ _ ils : bs)) = do
+  let blockname
+        | "example" `elem` dclasses = "exampleblock"
+        | "alert" `elem` dclasses = "alertblock"
+        | otherwise = "block"
   ref <- toLabel identifier
   let anchor = if T.null identifier
                   then empty
@@ -491,8 +304,8 @@ blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do
                        braces (literal ref) <> braces empty
   title' <- inlineListToLaTeX ils
   contents <- blockListToLaTeX bs
-  wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$
-                 contents $$ "\\end{block}"
+  wrapDiv attr $ ("\\begin" <> braces blockname <> braces title' <> anchor) $$
+                 contents $$ "\\end" <> braces blockname
 blockToLaTeX (Div (identifier,"slide":dclasses,dkvs)
                (Header _ (_,hclasses,hkvs) ils : bs)) = do
   -- note: [fragile] is required or verbatim breaks
@@ -553,17 +366,16 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
                              Just s  -> braces (literal s))
                           $$ inner
                           $+$ "\\end{CSLReferences}"
-               else if "csl-entry" `elem` classes
-                       then vcat <$> mapM cslEntryToLaTeX bs
-                       else blockListToLaTeX bs
+               else blockListToLaTeX bs
   modify $ \st -> st{ stIncremental = oldIncremental }
   linkAnchor' <- hypertarget True identifier empty
-  -- see #2704 for the motivation for adding \leavevmode:
+  -- see #2704 for the motivation for adding \leavevmode
+  -- and #7078 for \vadjust pre
   let linkAnchor =
         case bs of
           Para _ : _
             | not (isEmpty linkAnchor')
-              -> "\\leavevmode" <> linkAnchor' <> "%"
+              -> "\\leavevmode\\vadjust pre{" <> linkAnchor' <> "}%"
           _ -> linkAnchor'
       wrapNotes txt = if beamer && "notes" `elem` classes
                          then "\\note" <> braces txt -- speaker notes
@@ -575,7 +387,7 @@ blockToLaTeX (Plain lst) =
 blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)])
   | Just tit <- T.stripPrefix "fig:" tgt
   = do
-      (capt, captForLof, footnotes) <- getCaption True txt
+      (capt, captForLof, footnotes) <- getCaption inlineListToLaTeX True txt
       lab <- labelFor ident
       let caption = "\\caption" <> captForLof <> braces capt <> lab
       img <- inlineToLaTeX (Image attr txt (src,tit))
@@ -776,181 +588,14 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
   hdr <- sectionHeader classes id' level lst
   modify $ \s -> s{stInHeading = False}
   return hdr
-blockToLaTeX (Table _ blkCapt specs thead tbody tfoot) = do
-  let (caption, aligns, widths, heads, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  -- simple tables have to have simple cells:
-  let isSimple [Plain _] = True
-      isSimple [Para  _] = True
-      isSimple []        = True
-      isSimple _         = False
-  let widths' = if all (== 0) widths && not (all (all isSimple) rows)
-                   then replicate (length aligns)
-                          (1 / fromIntegral (length aligns))
-                   else widths
-  (captionText, captForLof, captNotes) <- getCaption False caption
-  let toHeaders hs = do contents <- tableRowToLaTeX True aligns hs
-                        return ("\\toprule" $$ contents $$ "\\midrule")
-  let removeNote (Note _) = Span ("", [], []) []
-      removeNote x        = x
-  firsthead <- if isEmpty captionText || all null heads
-                  then return empty
-                  else ($$ text "\\endfirsthead") <$> toHeaders heads
-  head' <- if all null heads
-              then return "\\toprule"
-              -- avoid duplicate notes in head and firsthead:
-              else toHeaders (if isEmpty firsthead
-                                 then heads
-                                 else walk removeNote heads)
-  let capt = if isEmpty captionText
-                then empty
-                else "\\caption" <> captForLof <> braces captionText
-                         <> "\\tabularnewline"
-  rows' <- mapM (tableRowToLaTeX False aligns) rows
-  let colDescriptors =
-         (if all (== 0) widths'
-             then hcat . map literal
-             else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $
-         zipWith (toColDescriptor (length widths')) aligns widths'
-  modify $ \s -> s{ stTable = True }
-  notes <- notesToLaTeX <$> gets stNotes
-  return $ "\\begin{longtable}[]" <>
-              braces ("@{}" <> colDescriptors <> "@{}")
-              -- the @{} removes extra space at beginning and end
-         $$ capt
-         $$ firsthead
-         $$ head'
-         $$ "\\endhead"
-         $$ vcat rows'
-         $$ "\\bottomrule"
-         $$ "\\end{longtable}"
-         $$ captNotes
-         $$ notes
-
-getCaption :: PandocMonad m
-           => Bool -> [Inline] -> LW m (Doc Text, Doc Text, Doc Text)
-getCaption externalNotes txt = do
-  oldExternalNotes <- gets stExternalNotes
-  modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] }
-  capt <- inlineListToLaTeX txt
-  footnotes <- if externalNotes
-                  then notesToLaTeX <$> gets stNotes
-                  else return empty
-  modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] }
-  -- We can't have footnotes in the list of figures/tables, so remove them:
-  let getNote (Note _) = Any True
-      getNote _        = Any False
-  let hasNotes = getAny . query getNote
-  captForLof <- if hasNotes txt
-                   then brackets <$> inlineListToLaTeX (walk deNote txt)
-                   else return empty
-  return (capt, captForLof, footnotes)
-
-toColDescriptor :: Int -> Alignment -> Double -> Text
-toColDescriptor _numcols align 0 =
-  case align of
-         AlignLeft    -> "l"
-         AlignRight   -> "r"
-         AlignCenter  -> "c"
-         AlignDefault -> "l"
-toColDescriptor numcols align width =
-  T.pack $ printf
-     ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
-     align'
-     ((numcols - 1) * 2)
-     width
- where
-   align' :: String
-   align' = case align of
-              AlignLeft    -> "\\raggedright"
-              AlignRight   -> "\\raggedleft"
-              AlignCenter  -> "\\centering"
-              AlignDefault -> "\\raggedright"
+blockToLaTeX (Table attr blkCapt specs thead tbodies tfoot) =
+  tableToLaTeX inlineListToLaTeX blockListToLaTeX
+               (Ann.toTable attr blkCapt specs thead tbodies tfoot)
 
 blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
 blockListToLaTeX lst =
   vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst
 
-tableRowToLaTeX :: PandocMonad m
-                => Bool
-                -> [Alignment]
-                -> [[Block]]
-                -> LW m (Doc Text)
-tableRowToLaTeX header aligns cols = do
-  cells <- mapM (tableCellToLaTeX header) $ zip aligns cols
-  return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace"
-
--- For simple latex tables (without minipages or parboxes),
--- we need to go to some lengths to get line breaks working:
--- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
-fixLineBreaks :: Block -> Block
-fixLineBreaks (Para ils)  = Para $ fixLineBreaks' ils
-fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
-fixLineBreaks x           = x
-
-fixLineBreaks' :: [Inline] -> [Inline]
-fixLineBreaks' ils = case splitBy (== LineBreak) ils of
-                       []     -> []
-                       [xs]   -> xs
-                       chunks -> RawInline "tex" "\\vtop{" :
-                                 concatMap tohbox chunks <>
-                                 [RawInline "tex" "}"]
-  where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <>
-                    [RawInline "tex" "}"]
-
--- We also change display math to inline math, since display
--- math breaks in simple tables.
-displayMathToInline :: Inline -> Inline
-displayMathToInline (Math DisplayMath x) = Math InlineMath x
-displayMathToInline x                    = x
-
-tableCellToLaTeX :: PandocMonad m
-                 => Bool -> (Alignment, [Block])
-                 -> LW m (Doc Text)
-tableCellToLaTeX header (align, blocks) = do
-  beamer <- gets stBeamer
-  externalNotes <- gets stExternalNotes
-  inMinipage <- gets stInMinipage
-  -- See #5367 -- footnotehyper/footnote don't work in beamer,
-  -- so we need to produce the notes outside the table...
-  modify $ \st -> st{ stExternalNotes = beamer }
-  let isPlainOrPara Para{}  = True
-      isPlainOrPara Plain{} = True
-      isPlainOrPara _       = False
-  result <-
-    if all isPlainOrPara blocks
-       then
-         blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
-       else do
-         modify $ \st -> st{ stInMinipage = True }
-         cellContents <- blockListToLaTeX blocks
-         modify $ \st -> st{ stInMinipage = inMinipage }
-         let valign = text $ if header then "[b]" else "[t]"
-         let halign = case align of
-                      AlignLeft    -> "\\raggedright"
-                      AlignRight   -> "\\raggedleft"
-                      AlignCenter  -> "\\centering"
-                      AlignDefault -> "\\raggedright"
-         return $ "\\begin{minipage}" <> valign <>
-                  braces "\\linewidth" <> halign <> cr <>
-                  cellContents <> cr <>
-                  "\\end{minipage}"
-  modify $ \st -> st{ stExternalNotes = externalNotes }
-  return result
-
-
-notesToLaTeX :: [Doc Text] -> Doc Text
-notesToLaTeX [] = empty
-notesToLaTeX ns = (case length ns of
-                              n | n > 1 -> "\\addtocounter" <>
-                                           braces "footnote" <>
-                                           braces (text $ show $ 1 - n)
-                                | otherwise -> empty)
-                   $$
-                   vcat (intersperse
-                     ("\\addtocounter" <> braces "footnote" <> braces "1")
-                     $ map (\x -> "\\footnotetext" <> braces x)
-                     $ reverse ns)
-
 listItemToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
 listItemToLaTeX lst
   -- we need to put some text before a header if it's the first
@@ -1077,81 +722,6 @@ sectionHeader classes ident level lst = do
                                 braces txtNoNotes
                          else empty
 
-mapAlignment :: Text -> Text
-mapAlignment a = case a of
-                   "top" -> "T"
-                   "top-baseline" -> "t"
-                   "bottom" -> "b"
-                   "center" -> "c"
-                   _ -> a 
-
-wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text)
-wrapDiv (_,classes,kvs) t = do
-  beamer <- gets stBeamer
-  let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
-  lang <- toLang $ lookup "lang" kvs
-  let wrapColumns = if beamer && "columns" `elem` classes
-                    then \contents ->
-                           let valign = maybe "T" mapAlignment (lookup "align" kvs)
-                               totalwidth = maybe [] (\x -> ["totalwidth=" <> x])
-                                 (lookup "totalwidth" kvs)
-                               onlytextwidth = filter ("onlytextwidth" ==) classes
-                               options = text $ T.unpack $ T.intercalate "," $
-                                 valign : totalwidth ++ onlytextwidth 
-                           in inCmd "begin" "columns" <> brackets options
-                              $$ contents
-                              $$ inCmd "end" "columns"
-                    else id
-      wrapColumn  = if beamer && "column" `elem` classes
-                    then \contents ->
-                           let valign =
-                                 maybe ""
-                                 (brackets . text . T.unpack . mapAlignment)
-                                 (lookup "align" kvs)
-                               w = maybe "0.48" fromPct (lookup "width" kvs) 
-                           in  inCmd "begin" "column" <> 
-                               valign <>
-                               braces (literal w <> "\\textwidth")
-                               $$ contents
-                               $$ inCmd "end" "column"
-                    else id
-      fromPct xs =
-        case T.unsnoc xs of
-          Just (ds, '%') -> case safeRead ds of
-                              Just digits -> showFl (digits / 100 :: Double)
-                              Nothing -> xs
-          _              -> xs
-      wrapDir = case lookup "dir" kvs of
-                  Just "rtl" -> align "RTL"
-                  Just "ltr" -> align "LTR"
-                  _          -> id
-      wrapLang txt = case lang of
-                       Just lng -> let (l, o) = toPolyglossiaEnv lng
-                                       ops = if T.null o
-                                             then ""
-                                             else brackets $ literal o
-                                   in  inCmd "begin" (literal l) <> ops
-                                       $$ blankline <> txt <> blankline
-                                       $$ inCmd "end" (literal l)
-                       Nothing  -> txt
-  return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t
-
-hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text)
-hypertarget _ "" x    = return x
-hypertarget addnewline ident x = do
-  ref <- literal `fmap` toLabel ident
-  return $ text "\\hypertarget"
-              <> braces ref
-              <> braces ((if addnewline && not (isEmpty x)
-                             then "%" <> cr
-                             else empty) <> x)
-
-labelFor :: PandocMonad m => Text -> LW m (Doc Text)
-labelFor ""    = return empty
-labelFor ident = do
-  ref <- literal `fmap` toLabel ident
-  return $ text "\\label" <> braces ref
-
 -- | Convert list of inline elements to LaTeX.
 inlineListToLaTeX :: PandocMonad m
                   => [Inline]  -- ^ Inlines to convert
@@ -1176,27 +746,6 @@ inlineListToLaTeX lst = hcat <$>
            fixInitialLineBreaks xs
        fixInitialLineBreaks xs = xs
 
-isQuoted :: Inline -> Bool
-isQuoted (Quoted _ _) = True
-isQuoted _            = False
-
-cslEntryToLaTeX :: PandocMonad m
-                => Block
-                -> LW m (Doc Text)
-cslEntryToLaTeX (Para xs) =
-  mconcat <$> mapM go xs
- where
-   go (Span ("",["csl-block"],[]) ils) =
-     (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-left-margin"],[]) ils) =
-     inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-right-inline"],[]) ils) =
-     (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-indent"],[]) ils) =
-     (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils
-   go il = inlineToLaTeX il
-cslEntryToLaTeX x = blockToLaTeX x
-
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: PandocMonad m
               => Inline    -- ^ Inline to convert
@@ -1204,23 +753,38 @@ inlineToLaTeX :: PandocMonad m
 inlineToLaTeX (Span (id',classes,kvs) ils) = do
   linkAnchor <- hypertarget False id' empty
   lang <- toLang $ lookup "lang" kvs
-  let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
-             ["textnormal" | "csl-no-strong" `elem` classes ||
-                             "csl-no-smallcaps" `elem` classes] ++
-             ["RL" | ("dir", "rtl") `elem` kvs] ++
-             ["LR" | ("dir", "ltr") `elem` kvs] ++
-             (case lang of
-                Just lng -> let (l, o) = toPolyglossia lng
-                                ops = if T.null o then "" else "[" <> o <> "]"
-                            in  ["text" <> l <> ops]
-                Nothing  -> [])
+  let classToCmd "csl-no-emph" = Just "textup"
+      classToCmd "csl-no-strong" = Just "textnormal"
+      classToCmd "csl-no-smallcaps" = Just "textnormal"
+      classToCmd "csl-block" = Just "CSLBlock"
+      classToCmd "csl-left-margin" = Just "CSLLeftMargin"
+      classToCmd "csl-right-inline" = Just "CSLRightInline"
+      classToCmd "csl-indent" = Just "CSLIndent"
+      classToCmd _ = Nothing
+      kvToCmd ("dir","rtl") = Just "RL"
+      kvToCmd ("dir","ltr") = Just "LR"
+      kvToCmd _ = Nothing
+      langCmds =
+        case lang of
+           Just lng -> let (l, o) = toPolyglossia lng
+                           ops = if T.null o then "" else "[" <> o <> "]"
+                       in  ["text" <> l <> ops]
+           Nothing  -> []
+  let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds
   contents <- inlineListToLaTeX ils
-  return $ (if T.null id'
-               then empty
-               else "\\protect" <> linkAnchor) <>
-           (if null cmds
-               then braces contents
-               else foldr inCmd contents cmds)
+  return $
+    (case classes of
+              ["csl-block"] -> (cr <>)
+              ["csl-left-margin"] -> (cr <>)
+              ["csl-right-inline"] -> (cr <>)
+              ["csl-indent"] -> (cr <>)
+              _ -> id) $
+    (if T.null id'
+        then empty
+        else "\\protect" <> linkAnchor) <>
+    (if null cmds
+        then braces contents
+        else foldr inCmd contents cmds)
 inlineToLaTeX (Emph lst) = inCmd "emph" <$> inlineListToLaTeX lst
 inlineToLaTeX (Underline lst) = inCmd "underline" <$> inlineListToLaTeX lst
 inlineToLaTeX (Strong lst) = inCmd "textbf" <$> inlineListToLaTeX lst
@@ -1242,8 +806,8 @@ inlineToLaTeX (Cite cits lst) = do
   st <- get
   let opts = stOptions st
   case writerCiteMethod opts of
-     Natbib   -> citationsToNatbib cits
-     Biblatex -> citationsToBiblatex cits
+     Natbib   -> citationsToNatbib inlineListToLaTeX cits
+     Biblatex -> citationsToBiblatex inlineListToLaTeX cits
      _        -> inlineListToLaTeX lst
 
 inlineToLaTeX (Code (_,classes,kvs) str) = do
@@ -1267,7 +831,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
         let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
                        (c:_) -> c
                        []    -> '!'
-        let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
+        let isEscapable '\\' = True
+            isEscapable '{'  = True
+            isEscapable '}'  = True
+            isEscapable '%'  = True
+            isEscapable '~'  = True
+            isEscapable '_'  = True
+            isEscapable '&'  = True
+            isEscapable '#'  = True
+            isEscapable '^'  = True
+            isEscapable _    = False
+        let escChar c | isEscapable c = T.pack ['\\',c]
+                      | otherwise     = T.singleton c
+        let str' = T.concatMap escChar str
         -- we always put lstinline in a dummy 'passthrough' command
         -- (defined in the default template) so that we don't have
         -- to change the way we escape characters depending on whether
@@ -1317,6 +893,10 @@ inlineToLaTeX (Quoted qt lst) = do
                    if isEnabled Ext_smart opts
                       then char '`' <> inner <> char '\''
                       else char '\x2018' <> inner <> char '\x2019'
+    where
+      isQuoted (Span _ (x:_)) = isQuoted x
+      isQuoted (Quoted _ _)   = True
+      isQuoted _              = False
 inlineToLaTeX (Str str) = do
   setEmptyLine False
   liftM literal $ stringToLaTeX TextString str
@@ -1339,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do
 inlineToLaTeX LineBreak = do
   emptyLine <- gets stEmptyLine
   setEmptyLine True
-  return $ (if emptyLine then "~" else "") <> "\\\\" <> cr
+  return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr
 inlineToLaTeX SoftBreak = do
   wrapText <- gets (writerWrapText . stOptions)
   case wrapText of
@@ -1454,153 +1034,6 @@ protectCode x = [x]
 setEmptyLine :: PandocMonad m => Bool -> LW m ()
 setEmptyLine b = modify $ \st -> st{ stEmptyLine = b }
 
-citationsToNatbib :: PandocMonad m => [Citation] -> LW m (Doc Text)
-citationsToNatbib
-            [one]
-  = citeCommand c p s k
-  where
-    Citation { citationId = k
-             , citationPrefix = p
-             , citationSuffix = s
-             , citationMode = m
-             }
-      = one
-    c = case m of
-             AuthorInText   -> "citet"
-             SuppressAuthor -> "citeyearpar"
-             NormalCitation -> "citep"
-
-citationsToNatbib cits
-  | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
-  = citeCommand "citep" p s ks
-  where
-     noPrefix  = all (null . citationPrefix)
-     noSuffix  = all (null . citationSuffix)
-     ismode m  = all ((==) m  . citationMode)
-     p         = citationPrefix  $
-                 head cits
-     s         = citationSuffix  $
-                 last cits
-     ks        = T.intercalate ", " $ map citationId cits
-
-citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
-     author <- citeCommand "citeauthor" [] [] (citationId c)
-     cits   <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs)
-     return $ author <+> cits
-
-citationsToNatbib cits = do
-  cits' <- mapM convertOne cits
-  return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
-  where
-    combineTwo a b | isEmpty a = b
-                   | otherwise = a <> text "; " <> b
-    convertOne Citation { citationId = k
-                        , citationPrefix = p
-                        , citationSuffix = s
-                        , citationMode = m
-                        }
-        = case m of
-               AuthorInText   -> citeCommand "citealt"  p s k
-               SuppressAuthor -> citeCommand "citeyear" p s k
-               NormalCitation -> citeCommand "citealp"  p s k
-
-citeCommand :: PandocMonad m
-            => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text)
-citeCommand c p s k = do
-  args <- citeArguments p s k
-  return $ literal ("\\" <> c) <> args
-
-type Prefix = [Inline]
-type Suffix = [Inline]
-type CiteId = Text
-data CiteGroup = CiteGroup Prefix Suffix [CiteId]
-
-citeArgumentsList :: PandocMonad m
-              => CiteGroup -> LW m (Doc Text)
-citeArgumentsList (CiteGroup _ _ []) = return empty
-citeArgumentsList (CiteGroup pfxs sfxs ids) = do
-      pdoc <- inlineListToLaTeX pfxs
-      sdoc <- inlineListToLaTeX sfxs'
-      return $ optargs pdoc sdoc <>
-              braces (literal (T.intercalate "," (reverse ids)))
-      where sfxs' = stripLocatorBraces $ case sfxs of
-                (Str t : r) -> case T.uncons t of
-                  Just (x, xs)
-                    | T.null xs
-                    , isPunctuation x -> dropWhile (== Space) r
-                    | isPunctuation x -> Str xs : r
-                  _ -> sfxs
-                _   -> sfxs
-            optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of
-                 (True, True ) -> empty
-                 (True, False) -> brackets sdoc
-                 (_   , _    ) -> brackets pdoc <> brackets sdoc
-
-citeArguments :: PandocMonad m
-              => [Inline] -> [Inline] -> Text -> LW m (Doc Text)
-citeArguments p s k = citeArgumentsList (CiteGroup p s [k])
-
--- strip off {} used to define locator in pandoc-citeproc; see #5722
-stripLocatorBraces :: [Inline] -> [Inline]
-stripLocatorBraces = walk go
-  where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs
-        go x        = x
-
-citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text)
-citationsToBiblatex
-            [one]
-  = citeCommand cmd p s k
-    where
-       Citation { citationId = k
-                , citationPrefix = p
-                , citationSuffix = s
-                , citationMode = m
-                } = one
-       cmd = case m of
-                  SuppressAuthor -> "autocite*"
-                  AuthorInText   -> "textcite"
-                  NormalCitation -> "autocite"
-
-citationsToBiblatex (c:cs)
-  | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs)
-    = do
-      let cmd = case citationMode c of
-                    SuppressAuthor -> "\\autocite*"
-                    AuthorInText   -> "\\textcite"
-                    NormalCitation -> "\\autocite"
-      return $ text cmd <>
-               braces (literal (T.intercalate "," (map citationId (c:cs))))
-  | otherwise
-    = do
-      let cmd = case citationMode c of
-                    SuppressAuthor -> "\\autocites*"
-                    AuthorInText   -> "\\textcites"
-                    NormalCitation -> "\\autocites"
-
-      groups <- mapM citeArgumentsList (reverse (foldl' grouper [] (c:cs)))
-
-      return $ text cmd <> mconcat groups
-
-  where grouper prev cit = case prev of
-         ((CiteGroup oPfx oSfx ids):rest)
-             | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest
-         _ -> CiteGroup pfx sfx [cid] : prev
-         where pfx = citationPrefix cit
-               sfx = citationSuffix cit
-               cid = citationId cit
-
-citationsToBiblatex _ = return empty
-
--- Determine listings language from list of class attributes.
-getListingsLanguage :: [Text] -> Maybe Text
-getListingsLanguage xs
-  = foldr ((<|>) . toListingsLanguage) Nothing xs
-
-mbBraced :: Text -> Text
-mbBraced x = if not (T.all isAlphaNum x)
-                then "{" <> x <> "}"
-                else x
-
 -- Extract a key from divs and spans
 extract :: Text -> Block -> [Text]
 extract key (Div attr _)     = lookKey key attr
@@ -1617,175 +1050,3 @@ extractInline _ _               = []
 -- Look up a key in an attribute and give a list of its values
 lookKey :: Text -> Attr -> [Text]
 lookKey key (_,_,kvs) =  maybe [] T.words $ lookup key kvs
-
--- In environments \Arabic instead of \arabic is used
-toPolyglossiaEnv :: Lang -> (Text, Text)
-toPolyglossiaEnv l =
-  case toPolyglossia l of
-    ("arabic", o) -> ("Arabic", o)
-    x             -> x
-
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Polyglossia (language, options) tuple
--- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
-toPolyglossia :: Lang -> (Text, Text)
-toPolyglossia (Lang "ar" _ "DZ" _)        = ("arabic", "locale=algeria")
-toPolyglossia (Lang "ar" _ "IQ" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "JO" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LB" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LY" _)        = ("arabic", "locale=libya")
-toPolyglossia (Lang "ar" _ "MA" _)        = ("arabic", "locale=morocco")
-toPolyglossia (Lang "ar" _ "MR" _)        = ("arabic", "locale=mauritania")
-toPolyglossia (Lang "ar" _ "PS" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "SY" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "TN" _)        = ("arabic", "locale=tunisia")
-toPolyglossia (Lang "de" _ _ vars)
-  | "1901" `elem` vars                    = ("german", "spelling=old")
-toPolyglossia (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                    = ("german", "variant=austrian, spelling=old")
-toPolyglossia (Lang "de" _ "AT" _)        = ("german", "variant=austrian")
-toPolyglossia (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                    = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ "CH" _)        = ("german", "variant=swiss")
-toPolyglossia (Lang "de" _ _ _)           = ("german", "")
-toPolyglossia (Lang "dsb" _ _ _)          = ("lsorbian", "")
-toPolyglossia (Lang "el" _ "polyton" _)   = ("greek",   "variant=poly")
-toPolyglossia (Lang "en" _ "AU" _)        = ("english", "variant=australian")
-toPolyglossia (Lang "en" _ "CA" _)        = ("english", "variant=canadian")
-toPolyglossia (Lang "en" _ "GB" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "NZ" _)        = ("english", "variant=newzealand")
-toPolyglossia (Lang "en" _ "UK" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "US" _)        = ("english", "variant=american")
-toPolyglossia (Lang "grc" _ _ _)          = ("greek",   "variant=ancient")
-toPolyglossia (Lang "hsb" _ _  _)         = ("usorbian", "")
-toPolyglossia (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars               = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _)        = ("portuguese", "variant=brazilian")
-toPolyglossia (Lang "sl" _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
-
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Babel language string.
--- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
--- List of supported languages (slightly outdated):
--- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
-toBabel :: Lang -> Text
-toBabel (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                  = "austrian"
-  | otherwise                           = "naustrian"
-toBabel (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                  = "swissgerman"
-  | otherwise                           = "nswissgerman"
-toBabel (Lang "de" _ _ vars)
-  | "1901" `elem` vars                  = "german"
-  | otherwise                           = "ngerman"
-toBabel (Lang "dsb" _ _ _)              = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
-  | "polyton" `elem` vars               = "polutonikogreek"
-toBabel (Lang "en" _ "AU" _)            = "australian"
-toBabel (Lang "en" _ "CA" _)            = "canadian"
-toBabel (Lang "en" _ "GB" _)            = "british"
-toBabel (Lang "en" _ "NZ" _)            = "newzealand"
-toBabel (Lang "en" _ "UK" _)            = "british"
-toBabel (Lang "en" _ "US" _)            = "american"
-toBabel (Lang "fr" _ "CA" _)            = "canadien"
-toBabel (Lang "fra" _ _ vars)
-  | "aca" `elem` vars                   = "acadian"
-toBabel (Lang "grc" _ _ _)              = "polutonikogreek"
-toBabel (Lang "hsb" _ _ _)              = "uppersorbian"
-toBabel (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars             = "classiclatin"
-toBabel (Lang "pt" _ "BR" _)            = "brazilian"
-toBabel (Lang "sl" _ _ _)               = "slovene"
-toBabel x                               = commonFromBcp47 x
-
--- Takes a list of the constituents of a BCP 47 language code
--- and converts it to a string shared by Babel and Polyglossia.
--- https://tools.ietf.org/html/bcp47#section-2.1
-commonFromBcp47 :: Lang -> Text
-commonFromBcp47 (Lang "sr" "Cyrl" _ _)          = "serbianc"
-commonFromBcp47 (Lang "zh" "Latn" _ vars)
-  | "pinyin" `elem` vars                        = "pinyin"
-commonFromBcp47 (Lang l _ _ _) = fromIso l
-  where
-    fromIso "af"  = "afrikaans"
-    fromIso "am"  = "amharic"
-    fromIso "ar"  = "arabic"
-    fromIso "as"  = "assamese"
-    fromIso "ast" = "asturian"
-    fromIso "bg"  = "bulgarian"
-    fromIso "bn"  = "bengali"
-    fromIso "bo"  = "tibetan"
-    fromIso "br"  = "breton"
-    fromIso "ca"  = "catalan"
-    fromIso "cy"  = "welsh"
-    fromIso "cs"  = "czech"
-    fromIso "cop" = "coptic"
-    fromIso "da"  = "danish"
-    fromIso "dv"  = "divehi"
-    fromIso "el"  = "greek"
-    fromIso "en"  = "english"
-    fromIso "eo"  = "esperanto"
-    fromIso "es"  = "spanish"
-    fromIso "et"  = "estonian"
-    fromIso "eu"  = "basque"
-    fromIso "fa"  = "farsi"
-    fromIso "fi"  = "finnish"
-    fromIso "fr"  = "french"
-    fromIso "fur" = "friulan"
-    fromIso "ga"  = "irish"
-    fromIso "gd"  = "scottish"
-    fromIso "gez" = "ethiopic"
-    fromIso "gl"  = "galician"
-    fromIso "he"  = "hebrew"
-    fromIso "hi"  = "hindi"
-    fromIso "hr"  = "croatian"
-    fromIso "hu"  = "magyar"
-    fromIso "hy"  = "armenian"
-    fromIso "ia"  = "interlingua"
-    fromIso "id"  = "indonesian"
-    fromIso "ie"  = "interlingua"
-    fromIso "is"  = "icelandic"
-    fromIso "it"  = "italian"
-    fromIso "jp"  = "japanese"
-    fromIso "km"  = "khmer"
-    fromIso "kmr" = "kurmanji"
-    fromIso "kn"  = "kannada"
-    fromIso "ko"  = "korean"
-    fromIso "la"  = "latin"
-    fromIso "lo"  = "lao"
-    fromIso "lt"  = "lithuanian"
-    fromIso "lv"  = "latvian"
-    fromIso "ml"  = "malayalam"
-    fromIso "mn"  = "mongolian"
-    fromIso "mr"  = "marathi"
-    fromIso "nb"  = "norsk"
-    fromIso "nl"  = "dutch"
-    fromIso "nn"  = "nynorsk"
-    fromIso "no"  = "norsk"
-    fromIso "nqo" = "nko"
-    fromIso "oc"  = "occitan"
-    fromIso "pa"  = "panjabi"
-    fromIso "pl"  = "polish"
-    fromIso "pms" = "piedmontese"
-    fromIso "pt"  = "portuguese"
-    fromIso "rm"  = "romansh"
-    fromIso "ro"  = "romanian"
-    fromIso "ru"  = "russian"
-    fromIso "sa"  = "sanskrit"
-    fromIso "se"  = "samin"
-    fromIso "sk"  = "slovak"
-    fromIso "sq"  = "albanian"
-    fromIso "sr"  = "serbian"
-    fromIso "sv"  = "swedish"
-    fromIso "syr" = "syriac"
-    fromIso "ta"  = "tamil"
-    fromIso "te"  = "telugu"
-    fromIso "th"  = "thai"
-    fromIso "ti"  = "ethiopic"
-    fromIso "tk"  = "turkmen"
-    fromIso "tr"  = "turkish"
-    fromIso "uk"  = "ukrainian"
-    fromIso "ur"  = "urdu"
-    fromIso "vi"  = "vietnamese"
-    fromIso _     = ""
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
new file mode 100644
index 000000000..ab4d365cc
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -0,0 +1,47 @@
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Caption
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Write figure or table captions as LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX.Caption
+  ( getCaption
+  ) where
+
+import Control.Monad.State.Strict
+import Data.Monoid (Any(..))
+import Data.Text (Text)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.DocLayout (Doc, brackets, empty)
+import Text.Pandoc.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Types
+  ( LW, WriterState (stExternalNotes, stNotes) )
+
+getCaption :: PandocMonad m
+           => ([Inline] -> LW m (Doc Text))
+           -> Bool -> [Inline]
+           -> LW m (Doc Text, Doc Text, Doc Text)
+getCaption inlineListToLaTeX externalNotes txt = do
+  oldExternalNotes <- gets stExternalNotes
+  modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] }
+  capt <- inlineListToLaTeX txt
+  footnotes <- if externalNotes
+                  then notesToLaTeX <$> gets stNotes
+                  else return empty
+  modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] }
+  -- We can't have footnotes in the list of figures/tables, so remove them:
+  let getNote (Note _) = Any True
+      getNote _        = Any False
+  let hasNotes = getAny . query getNote
+  captForLof <- if hasNotes txt
+                   then brackets <$> inlineListToLaTeX (walk deNote txt)
+                   else return empty
+  return (capt, captForLof, footnotes)
diff --git a/src/Text/Pandoc/Writers/LaTeX/Citation.hs b/src/Text/Pandoc/Writers/LaTeX/Citation.hs
new file mode 100644
index 000000000..f48a43d7a
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Citation.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Citation
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Citation
+  ( citationsToNatbib,
+    citationsToBiblatex
+  ) where
+
+import Data.Text (Text)
+import Data.Char (isPunctuation)
+import qualified Data.Text as T
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Data.List (foldl')
+import Text.DocLayout (Doc, brackets, empty, (<+>), text, isEmpty, literal,
+                       braces)
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.LaTeX.Types ( LW )
+
+citationsToNatbib :: PandocMonad m
+                  => ([Inline] -> LW m (Doc Text))
+                  -> [Citation]
+                  -> LW m (Doc Text)
+citationsToNatbib inlineListToLaTeX [one]
+  = citeCommand inlineListToLaTeX c p s k
+  where
+    Citation { citationId = k
+             , citationPrefix = p
+             , citationSuffix = s
+             , citationMode = m
+             }
+      = one
+    c = case m of
+             AuthorInText   -> "citet"
+             SuppressAuthor -> "citeyearpar"
+             NormalCitation -> "citep"
+
+citationsToNatbib inlineListToLaTeX cits
+  | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
+  = citeCommand inlineListToLaTeX "citep" p s ks
+  where
+     noPrefix  = all (null . citationPrefix)
+     noSuffix  = all (null . citationSuffix)
+     ismode m  = all ((==) m  . citationMode)
+     p         = citationPrefix  $
+                 head cits
+     s         = citationSuffix  $
+                 last cits
+     ks        = T.intercalate ", " $ map citationId cits
+
+citationsToNatbib inlineListToLaTeX (c:cs)
+  | citationMode c == AuthorInText = do
+     author <- citeCommand inlineListToLaTeX "citeauthor" [] [] (citationId c)
+     cits   <- citationsToNatbib inlineListToLaTeX
+                  (c { citationMode = SuppressAuthor } : cs)
+     return $ author <+> cits
+
+citationsToNatbib inlineListToLaTeX cits = do
+  cits' <- mapM convertOne cits
+  return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
+  where
+    citeCommand' = citeCommand inlineListToLaTeX
+    combineTwo a b | isEmpty a = b
+                   | otherwise = a <> text "; " <> b
+    convertOne Citation { citationId = k
+                        , citationPrefix = p
+                        , citationSuffix = s
+                        , citationMode = m
+                        }
+        = case m of
+               AuthorInText   -> citeCommand' "citealt"  p s k
+               SuppressAuthor -> citeCommand' "citeyear" p s k
+               NormalCitation -> citeCommand' "citealp"  p s k
+
+citeCommand :: PandocMonad m
+            => ([Inline] -> LW m (Doc Text))
+            -> Text
+            -> [Inline]
+            -> [Inline]
+            -> Text
+            -> LW m (Doc Text)
+citeCommand inlineListToLaTeX c p s k = do
+  args <- citeArguments inlineListToLaTeX p s k
+  return $ literal ("\\" <> c) <> args
+
+type Prefix = [Inline]
+type Suffix = [Inline]
+type CiteId = Text
+data CiteGroup = CiteGroup Prefix Suffix [CiteId]
+
+citeArgumentsList :: PandocMonad m
+              => ([Inline] -> LW m (Doc Text))
+              -> CiteGroup
+              -> LW m (Doc Text)
+citeArgumentsList _inlineListToLaTeX (CiteGroup _ _ []) = return empty
+citeArgumentsList inlineListToLaTeX (CiteGroup pfxs sfxs ids) = do
+      pdoc <- inlineListToLaTeX pfxs
+      sdoc <- inlineListToLaTeX sfxs'
+      return $ optargs pdoc sdoc <>
+              braces (literal (T.intercalate "," (reverse ids)))
+      where sfxs' = stripLocatorBraces $ case sfxs of
+                (Str t : r) -> case T.uncons t of
+                  Just (x, xs)
+                    | T.null xs
+                    , isPunctuation x -> dropWhile (== Space) r
+                    | isPunctuation x -> Str xs : r
+                  _ -> sfxs
+                _   -> sfxs
+            optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of
+                 (True, True ) -> empty
+                 (True, False) -> brackets sdoc
+                 (_   , _    ) -> brackets pdoc <> brackets sdoc
+
+citeArguments :: PandocMonad m
+              => ([Inline] -> LW m (Doc Text))
+              -> [Inline]
+              -> [Inline]
+              -> Text
+              -> LW m (Doc Text)
+citeArguments inlineListToLaTeX p s k =
+  citeArgumentsList inlineListToLaTeX (CiteGroup p s [k])
+
+-- strip off {} used to define locator in pandoc-citeproc; see #5722
+stripLocatorBraces :: [Inline] -> [Inline]
+stripLocatorBraces = walk go
+  where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs
+        go x        = x
+
+citationsToBiblatex :: PandocMonad m
+                    => ([Inline] -> LW m (Doc Text))
+                    -> [Citation] -> LW m (Doc Text)
+citationsToBiblatex inlineListToLaTeX [one]
+  = citeCommand inlineListToLaTeX cmd p s k
+    where
+       Citation { citationId = k
+                , citationPrefix = p
+                , citationSuffix = s
+                , citationMode = m
+                } = one
+       cmd = case m of
+                  SuppressAuthor -> "autocite*"
+                  AuthorInText   -> "textcite"
+                  NormalCitation -> "autocite"
+
+citationsToBiblatex inlineListToLaTeX (c:cs)
+  | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs)
+    = do
+      let cmd = case citationMode c of
+                    SuppressAuthor -> "\\autocite*"
+                    AuthorInText   -> "\\textcite"
+                    NormalCitation -> "\\autocite"
+      return $ text cmd <>
+               braces (literal (T.intercalate "," (map citationId (c:cs))))
+  | otherwise
+    = do
+      let cmd = case citationMode c of
+                    SuppressAuthor -> "\\autocites*"
+                    AuthorInText   -> "\\textcites"
+                    NormalCitation -> "\\autocites"
+
+      groups <- mapM (citeArgumentsList inlineListToLaTeX)
+                     (reverse (foldl' grouper [] (c:cs)))
+
+      return $ text cmd <> mconcat groups
+
+  where grouper prev cit = case prev of
+         ((CiteGroup oPfx oSfx ids):rest)
+             | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest
+         _ -> CiteGroup pfx sfx [cid] : prev
+         where pfx = citationPrefix cit
+               sfx = citationSuffix cit
+               cid = citationId cit
+
+citationsToBiblatex _ _ = return empty
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
new file mode 100644
index 000000000..0ba68b74e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -0,0 +1,192 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Lang
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Lang
+  ( toPolyglossiaEnv,
+    toPolyglossia,
+    toBabel
+  ) where
+import Data.Text (Text)
+import Text.Collate.Lang (Lang(..))
+
+
+-- In environments \Arabic instead of \arabic is used
+toPolyglossiaEnv :: Lang -> (Text, Text)
+toPolyglossiaEnv l =
+  case toPolyglossia l of
+    ("arabic", o) -> ("Arabic", o)
+    x             -> x
+
+-- Takes a list of the constituents of a BCP47 language code and
+-- converts it to a Polyglossia (language, options) tuple
+-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
+toPolyglossia :: Lang -> (Text, Text)
+toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _)  = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "JO") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LB") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LY") _ _ _)  = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ (Just "MA") _ _ _)  = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ (Just "MR") _ _ _)  = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ (Just "PS") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "SY") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "TN") _ _ _)  = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars _ _)
+  | "1901" `elem` vars                         = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
+  | "1901" `elem` vars         = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
+  | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
+toPolyglossia (Lang "el" _ _ vars _ _)
+  | "polyton" `elem` vars                     = ("greek",   "variant=poly")
+toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
+toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars _ _)
+  | "x-classic" `elem` vars                   = ("latin", "variant=classic")
+toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
+toPolyglossia x                               = (commonFromBcp47 x, "")
+
+-- Takes a list of the constituents of a BCP47 language code and
+-- converts it to a Babel language string.
+-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
+-- List of supported languages (slightly outdated):
+-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
+toBabel :: Lang -> Text
+toBabel (Lang "de" _ (Just "AT") vars _ _)
+  | "1901" `elem` vars                  = "austrian"
+  | otherwise                           = "naustrian"
+toBabel (Lang "de" _ (Just "CH") vars _ _)
+  | "1901" `elem` vars                  = "swissgerman"
+  | otherwise                           = "nswissgerman"
+toBabel (Lang "de" _ _ vars _ _)
+  | "1901" `elem` vars                  = "german"
+  | otherwise                           = "ngerman"
+toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
+toBabel (Lang "el" _ _ vars _ _)
+  | "polyton" `elem` vars               = "polutonikogreek"
+toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
+toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
+toBabel (Lang "en" _ (Just "GB") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand"
+toBabel (Lang "en" _ (Just "UK") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "US") _ _ _) = "american"
+toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien"
+toBabel (Lang "fra" _ _ vars _ _)
+  | "aca" `elem` vars                   = "acadian"
+toBabel (Lang "grc" _ _ _ _ _)          = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _ _ _)          = "uppersorbian"
+toBabel (Lang "la" _ _ vars _ _)
+  | "x-classic" `elem` vars             = "classiclatin"
+toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian"
+toBabel (Lang "sl" _ _ _ _ _)           = "slovene"
+toBabel x                               = commonFromBcp47 x
+
+-- Takes a list of the constituents of a BCP47 language code
+-- and converts it to a string shared by Babel and Polyglossia.
+-- https://tools.ietf.org/html/bcp47#section-2.1
+commonFromBcp47 :: Lang -> Text
+commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _)      = "serbianc"
+commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _)
+  | "pinyin" `elem` vars                               = "pinyin"
+commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l
+  where
+    fromIso "af"  = "afrikaans"
+    fromIso "am"  = "amharic"
+    fromIso "ar"  = "arabic"
+    fromIso "as"  = "assamese"
+    fromIso "ast" = "asturian"
+    fromIso "bg"  = "bulgarian"
+    fromIso "bn"  = "bengali"
+    fromIso "bo"  = "tibetan"
+    fromIso "br"  = "breton"
+    fromIso "ca"  = "catalan"
+    fromIso "cy"  = "welsh"
+    fromIso "cs"  = "czech"
+    fromIso "cop" = "coptic"
+    fromIso "da"  = "danish"
+    fromIso "dv"  = "divehi"
+    fromIso "el"  = "greek"
+    fromIso "en"  = "english"
+    fromIso "eo"  = "esperanto"
+    fromIso "es"  = "spanish"
+    fromIso "et"  = "estonian"
+    fromIso "eu"  = "basque"
+    fromIso "fa"  = "farsi"
+    fromIso "fi"  = "finnish"
+    fromIso "fr"  = "french"
+    fromIso "fur" = "friulan"
+    fromIso "ga"  = "irish"
+    fromIso "gd"  = "scottish"
+    fromIso "gez" = "ethiopic"
+    fromIso "gl"  = "galician"
+    fromIso "he"  = "hebrew"
+    fromIso "hi"  = "hindi"
+    fromIso "hr"  = "croatian"
+    fromIso "hu"  = "magyar"
+    fromIso "hy"  = "armenian"
+    fromIso "ia"  = "interlingua"
+    fromIso "id"  = "indonesian"
+    fromIso "ie"  = "interlingua"
+    fromIso "is"  = "icelandic"
+    fromIso "it"  = "italian"
+    fromIso "ja"  = "japanese"
+    fromIso "km"  = "khmer"
+    fromIso "kmr" = "kurmanji"
+    fromIso "kn"  = "kannada"
+    fromIso "ko"  = "korean"
+    fromIso "la"  = "latin"
+    fromIso "lo"  = "lao"
+    fromIso "lt"  = "lithuanian"
+    fromIso "lv"  = "latvian"
+    fromIso "ml"  = "malayalam"
+    fromIso "mn"  = "mongolian"
+    fromIso "mr"  = "marathi"
+    fromIso "nb"  = "norsk"
+    fromIso "nl"  = "dutch"
+    fromIso "nn"  = "nynorsk"
+    fromIso "no"  = "norsk"
+    fromIso "nqo" = "nko"
+    fromIso "oc"  = "occitan"
+    fromIso "pa"  = "panjabi"
+    fromIso "pl"  = "polish"
+    fromIso "pms" = "piedmontese"
+    fromIso "pt"  = "portuguese"
+    fromIso "rm"  = "romansh"
+    fromIso "ro"  = "romanian"
+    fromIso "ru"  = "russian"
+    fromIso "sa"  = "sanskrit"
+    fromIso "se"  = "samin"
+    fromIso "sk"  = "slovak"
+    fromIso "sq"  = "albanian"
+    fromIso "sr"  = "serbian"
+    fromIso "sv"  = "swedish"
+    fromIso "syr" = "syriac"
+    fromIso "ta"  = "tamil"
+    fromIso "te"  = "telugu"
+    fromIso "th"  = "thai"
+    fromIso "ti"  = "ethiopic"
+    fromIso "tk"  = "turkmen"
+    fromIso "tr"  = "turkish"
+    fromIso "uk"  = "ukrainian"
+    fromIso "ur"  = "urdu"
+    fromIso "vi"  = "vietnamese"
+    fromIso _     = ""
diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
new file mode 100644
index 000000000..f225ef0c5
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Notes
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Output tables as LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX.Notes
+  ( notesToLaTeX
+  ) where
+
+import Data.List (intersperse)
+import Text.DocLayout ( Doc, braces, empty, text, vcat, ($$))
+import Data.Text (Text)
+
+notesToLaTeX :: [Doc Text] -> Doc Text
+notesToLaTeX = \case
+  [] -> empty
+  ns -> (case length ns of
+            n | n > 1 -> "\\addtocounter" <>
+                         braces "footnote" <>
+                         braces (text $ show $ 1 - n)
+              | otherwise -> empty)
+        $$
+        vcat (intersperse
+               ("\\addtocounter" <> braces "footnote" <> braces "1")
+               $ map (\x -> "\\footnotetext" <> braces x)
+               $ reverse ns)
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
new file mode 100644
index 000000000..27a8a0257
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -0,0 +1,307 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Table
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Output LaTeX formatted tables.
+-}
+module Text.Pandoc.Writers.LaTeX.Table
+  ( tableToLaTeX
+  ) where
+import Control.Monad.State.Strict
+import Data.List (intersperse)
+import qualified Data.List.NonEmpty as NonEmpty
+import Data.List.NonEmpty (NonEmpty ((:|)))
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.DocLayout
+  ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest
+  , text, vcat, ($$) )
+import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow)
+import Text.Pandoc.Walk (walk, query)
+import Data.Monoid (Any(..))
+import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
+import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Types
+  ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow
+                    , stNotes, stTable) )
+import Text.Printf (printf)
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
+
+tableToLaTeX :: PandocMonad m
+             => ([Inline] -> LW m (Doc Text))
+             -> ([Block]  -> LW m (Doc Text))
+             -> Ann.Table
+             -> LW m (Doc Text)
+tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do
+  let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl
+  CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption
+  let removeNote (Note _) = Span ("", [], []) []
+      removeNote x        = x
+  firsthead <- if isEmpty capt || isEmptyHead thead
+               then return empty
+               else ($$ text "\\endfirsthead") <$>
+                    headToLaTeX blksToLaTeX thead
+  head' <- if isEmptyHead thead
+           then return "\\toprule"
+           -- avoid duplicate notes in head and firsthead:
+           else headToLaTeX blksToLaTeX
+                (if isEmpty firsthead
+                 then thead
+                 else walk removeNote thead)
+  rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $
+                mconcat (map bodyRows tbodies) <> footRows tfoot
+  modify $ \s -> s{ stTable = True }
+  notes <- notesToLaTeX <$> gets stNotes
+  return
+    $  "\\begin{longtable}[]" <>
+          braces ("@{}" <> colDescriptors tbl <> "@{}")
+          -- the @{} removes extra space at beginning and end
+    $$ capt
+    $$ firsthead
+    $$ head'
+    $$ "\\endhead"
+    $$ vcat rows'
+    $$ "\\bottomrule"
+    $$ "\\end{longtable}"
+    $$ captNotes
+    $$ notes
+
+-- | Creates column descriptors for the table.
+colDescriptors :: Ann.Table -> Doc Text
+colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) =
+  let (aligns, widths) = unzip specs
+
+      defaultWidthsOnly = all (== ColWidthDefault) widths
+      isSimpleTable = all (all isSimpleCell) $ mconcat
+                      [ headRows thead
+                      , concatMap bodyRows tbodies
+                      , footRows tfoot
+                      ]
+
+      relativeWidths = if defaultWidthsOnly
+                       then replicate (length specs)
+                            (1 / fromIntegral (length specs))
+                       else map toRelWidth widths
+  in if defaultWidthsOnly && isSimpleTable
+     then hcat $ map (literal . colAlign) aligns
+     else (cr <>) . nest 2 . vcat . map literal $
+          zipWith (toColDescriptor (length specs))
+                  aligns
+                  relativeWidths
+  where
+    toColDescriptor :: Int -> Alignment -> Double -> Text
+    toColDescriptor numcols align width =
+      T.pack $ printf
+      ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
+      (T.unpack (alignCommand align))
+      ((numcols - 1) * 2)
+      width
+
+    isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) =
+      case blocks of
+        [Para _]  -> True
+        [Plain _] -> True
+        []        -> True
+        _         -> False
+
+    toRelWidth ColWidthDefault = 0
+    toRelWidth (ColWidth w)    = w
+
+alignCommand :: Alignment -> Text
+alignCommand = \case
+  AlignLeft    -> "\\raggedright"
+  AlignRight   -> "\\raggedleft"
+  AlignCenter  -> "\\centering"
+  AlignDefault -> "\\raggedright"
+
+colAlign :: Alignment -> Text
+colAlign = \case
+  AlignLeft    -> "l"
+  AlignRight   -> "r"
+  AlignCenter  -> "c"
+  AlignDefault -> "l"
+
+data CaptionDocs =
+  CaptionDocs
+  { captionCommand :: Doc Text
+  , captionNotes :: Doc Text
+  }
+
+captionToLaTeX :: PandocMonad m
+               => ([Inline] -> LW m (Doc Text))
+               -> Caption
+               -> LW m CaptionDocs
+captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do
+  let caption = blocksToInlines longCaption
+  (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
+  return $ CaptionDocs
+    { captionNotes = captNotes
+    , captionCommand = if isEmpty captionText
+                       then empty
+                       else "\\caption" <> captForLof <>
+                            braces captionText <> "\\tabularnewline"
+    }
+
+type BlocksWriter m = [Block] -> LW m (Doc Text)
+
+headToLaTeX :: PandocMonad m
+            => BlocksWriter m
+            -> Ann.TableHead
+            -> LW m (Doc Text)
+headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do
+  rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells)
+                       headerRows
+  return ("\\toprule" $$ vcat rowsContents $$ "\\midrule")
+
+-- | Converts a row of table cells into a LaTeX row.
+rowToLaTeX :: PandocMonad m
+           => BlocksWriter m
+           -> CellType
+           -> [Ann.Cell]
+           -> LW m (Doc Text)
+rowToLaTeX blocksWriter celltype row = do
+  cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row)
+  return $ hsep (intersperse "&" cellsDocs) <> " \\\\"
+
+-- | Pads row with empty cells to adjust for rowspans above this row.
+fillRow :: [Ann.Cell] -> [Ann.Cell]
+fillRow = go 0
+  where
+    go _ [] = []
+    go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) =
+      let (Cell _ _ _ (ColSpan colspan) _) = cell
+      in map mkEmptyCell [n .. colnum - 1] ++
+         acell : go (colnum + colspan) cells
+
+    mkEmptyCell :: Int -> Ann.Cell
+    mkEmptyCell colnum =
+      Ann.Cell ((AlignDefault, ColWidthDefault):|[])
+               (Ann.ColNumber colnum)
+               B.emptyCell
+
+isEmptyHead :: Ann.TableHead -> Bool
+isEmptyHead (Ann.TableHead _attr []) = True
+isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows
+
+-- | Gets all cells in a header row.
+headerRowCells :: Ann.HeaderRow -> [Ann.Cell]
+headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells
+
+-- | Gets all cells in a body row.
+bodyRowCells :: Ann.BodyRow -> [Ann.Cell]
+bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells
+
+-- | Gets a list of rows of the table body, where a row is a simple
+-- list of cells.
+bodyRows :: Ann.TableBody -> [[Ann.Cell]]
+bodyRows (Ann.TableBody _attr _rowheads headerRows rows) =
+  map headerRowCells headerRows <> map bodyRowCells rows
+
+-- | Gets a list of rows of the table head, where a row is a simple
+-- list of cells.
+headRows :: Ann.TableHead -> [[Ann.Cell]]
+headRows (Ann.TableHead _attr rows) = map headerRowCells rows
+
+-- | Gets a list of rows from the foot, where a row is a simple list
+-- of cells.
+footRows :: Ann.TableFoot -> [[Ann.Cell]]
+footRows (Ann.TableFoot _attr rows) = map headerRowCells rows
+
+-- For simple latex tables (without minipages or parboxes),
+-- we need to go to some lengths to get line breaks working:
+-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
+fixLineBreaks :: Block -> Block
+fixLineBreaks = walk fixLineBreaks'
+
+fixLineBreaks' :: [Inline] -> [Inline]
+fixLineBreaks' ils = case splitBy (== LineBreak) ils of
+                       []     -> []
+                       [xs]   -> xs
+                       chunks -> RawInline "tex" "\\vtop{" :
+                                 concatMap tohbox chunks <>
+                                 [RawInline "tex" "}"]
+  where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <>
+                    [RawInline "tex" "}"]
+
+-- We also change display math to inline math, since display
+-- math breaks in simple tables.
+displayMathToInline :: Inline -> Inline
+displayMathToInline (Math DisplayMath x) = Math InlineMath x
+displayMathToInline x                    = x
+
+cellToLaTeX :: PandocMonad m
+            => BlocksWriter m
+            -> CellType
+            -> Ann.Cell
+            -> LW m (Doc Text)
+cellToLaTeX blockListToLaTeX celltype annotatedCell = do
+  let (Ann.Cell specs _colnum cell) = annotatedCell
+  let hasWidths = snd (NonEmpty.head specs) /= ColWidthDefault
+  let specAlign = fst (NonEmpty.head specs)
+  let (Cell _attr align' rowspan colspan blocks) = cell
+  let align = case align' of
+                AlignDefault -> specAlign
+                _            -> align'
+  beamer <- gets stBeamer
+  externalNotes <- gets stExternalNotes
+  inMinipage <- gets stInMinipage
+  -- See #5367 -- footnotehyper/footnote don't work in beamer,
+  -- so we need to produce the notes outside the table...
+  modify $ \st -> st{ stExternalNotes = beamer }
+  let isPlainOrPara = \case
+        Para{}  -> True
+        Plain{} -> True
+        _       -> False
+  let hasLineBreak LineBreak = Any True
+      hasLineBreak _ = Any False
+  let hasLineBreaks = getAny $ query hasLineBreak blocks
+  result <-
+    if not hasWidths || (celltype /= HeaderCell
+                           && all isPlainOrPara blocks
+                           && not hasLineBreaks)
+       then
+         blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
+       else do
+         modify $ \st -> st{ stInMinipage = True }
+         cellContents <- blockListToLaTeX blocks
+         modify $ \st -> st{ stInMinipage = inMinipage }
+         let valign = text $ case celltype of
+                               HeaderCell -> "[b]"
+                               BodyCell   -> "[t]"
+         let halign = literal $ alignCommand align
+         return $ "\\begin{minipage}" <> valign <>
+                  braces "\\linewidth" <> halign <> cr <>
+                  cellContents <>
+                  (if hasLineBreaks then "\\strut" else mempty)
+                  <> cr <>
+                  "\\end{minipage}"
+  modify $ \st -> st{ stExternalNotes = externalNotes }
+  when (rowspan /= RowSpan 1) $
+    modify (\st -> st{ stMultiRow = True })
+  let inMultiColumn x = case colspan of
+                          (ColSpan 1) -> x
+                          (ColSpan n) -> "\\multicolumn"
+                                         <> braces (literal (tshow n))
+                                         <> braces (literal $ colAlign align)
+                                         <> braces x
+  let inMultiRow x = case rowspan of
+                       (RowSpan 1) -> x
+                       (RowSpan n) -> let nrows = literal (tshow n)
+                                      in "\\multirow" <> braces nrows
+                                         <> braces "*" <> braces x
+  return . inMultiColumn . inMultiRow $ result
+
+data CellType
+  = HeaderCell
+  | BodyCell
+  deriving Eq
diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs
new file mode 100644
index 000000000..c06b7e923
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs
@@ -0,0 +1,83 @@
+module Text.Pandoc.Writers.LaTeX.Types
+  ( LW
+  , WriterState (..)
+  , startingState
+  ) where
+
+import Control.Monad.State.Strict (StateT)
+import Data.Text (Text)
+import Text.DocLayout (Doc)
+import Text.Pandoc.Options
+  ( WriterOptions (writerIncremental, writerTopLevelDivision)
+  , TopLevelDivision (..)
+  )
+
+-- | LaTeX writer type. The type constructor @m@ will typically be an
+-- instance of PandocMonad.
+type LW m = StateT WriterState m
+
+data WriterState =
+  WriterState
+  { stInNote        :: Bool          -- ^ true if we're in a note
+  , stInQuote       :: Bool          -- ^ true if in a blockquote
+  , stExternalNotes :: Bool          -- ^ true if in context where
+                                     --   we need to store footnotes
+  , stInMinipage    :: Bool          -- ^ true if in minipage
+  , stInHeading     :: Bool          -- ^ true if in a section heading
+  , stInItem        :: Bool          -- ^ true if in \item[..]
+  , stNotes         :: [Doc Text]    -- ^ notes in a minipage
+  , stOLLevel       :: Int           -- ^ level of ordered list nesting
+  , stOptions       :: WriterOptions -- ^ writer options, so they don't have to
+                                     --   be parameter
+  , stVerbInNote    :: Bool          -- ^ true if document has verbatim text in note
+  , stTable         :: Bool          -- ^ true if document has a table
+  , stMultiRow      :: Bool          -- ^ true if document has multirow cells
+  , stStrikeout     :: Bool          -- ^ true if document has strikeout
+  , stUrl           :: Bool          -- ^ true if document has visible URL link
+  , stGraphics      :: Bool          -- ^ true if document contains images
+  , stLHS           :: Bool          -- ^ true if document has literate haskell code
+  , stHasChapters   :: Bool          -- ^ true if document has chapters
+  , stCsquotes      :: Bool          -- ^ true if document uses csquotes
+  , stHighlighting  :: Bool          -- ^ true if document has highlighted code
+  , stIncremental   :: Bool          -- ^ true if beamer lists should be
+  , stZwnj          :: Bool          -- ^ true if document has a ZWNJ character
+  , stInternalLinks :: [Text]        -- ^ list of internal link targets
+  , stBeamer        :: Bool          -- ^ produce beamer
+  , stEmptyLine     :: Bool          -- ^ true if no content on line
+  , stHasCslRefs    :: Bool          -- ^ has a Div with class refs
+  , stIsFirstInDefinition :: Bool    -- ^ first block in a defn list
+  }
+
+startingState :: WriterOptions -> WriterState
+startingState options =
+  WriterState
+  { stInNote = False
+  , stInQuote = False
+  , stExternalNotes = False
+  , stInHeading = False
+  , stInMinipage = False
+  , stInItem = False
+  , stNotes = []
+  , stOLLevel = 1
+  , stOptions = options
+  , stVerbInNote = False
+  , stTable = False
+  , stMultiRow = False
+  , stStrikeout = False
+  , stUrl = False
+  , stGraphics = False
+  , stLHS = False
+  , stHasChapters = case writerTopLevelDivision options of
+                      TopLevelPart    -> True
+                      TopLevelChapter -> True
+                      _               -> False
+  , stCsquotes = False
+  , stHighlighting = False
+  , stIncremental = writerIncremental options
+  , stZwnj = False
+  , stInternalLinks = []
+  , stBeamer = False
+  , stEmptyLine = True
+  , stHasCslRefs = False
+  , stIsFirstInDefinition = False
+  }
diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs
new file mode 100644
index 000000000..c34338121
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Util
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Util (
+    stringToLaTeX
+  , StringContext(..)
+  , toLabel
+  , inCmd
+  , wrapDiv
+  , hypertarget
+  , labelFor
+  , getListingsLanguage
+  , mbBraced
+  )
+where
+
+import Control.Applicative ((<|>))
+import Control.Monad (when)
+import Text.Pandoc.Class (PandocMonad, toLang)
+import Text.Pandoc.Options (WriterOptions(..), isEnabled)
+import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..))
+import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv)
+import Text.Pandoc.Highlighting (toListingsLanguage)
+import Text.DocLayout
+import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize (showFl)
+import Control.Monad.State.Strict (gets, modify)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Extensions (Extension(Ext_smart))
+import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum)
+import Text.Printf (printf)
+import Text.Pandoc.Shared (safeRead, elemText)
+import qualified Data.Text.Normalize as Normalize
+import Data.List (uncons)
+
+data StringContext = TextString
+                   | URLString
+                   | CodeString
+                   deriving (Eq)
+
+-- escape things as needed for LaTeX
+stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text
+stringToLaTeX context zs = do
+  opts <- gets stOptions
+  when ('\x200c' `elemText` zs) $
+    modify (\s -> s { stZwnj = True })
+  return $ T.pack $
+    foldr (go opts context) mempty $ T.unpack $
+    if writerPreferAscii opts
+       then Normalize.normalize Normalize.NFD zs
+       else zs
+ where
+  go :: WriterOptions -> StringContext -> Char -> String -> String
+  go opts ctx x xs   =
+    let ligatures = isEnabled Ext_smart opts && ctx == TextString
+        isUrl = ctx == URLString
+        mbAccentCmd =
+          if writerPreferAscii opts && ctx == TextString
+             then uncons xs >>= \(c,_) -> lookupAccent c
+             else Nothing
+        emits s =
+          case mbAccentCmd of
+               Just cmd ->
+                 cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent
+               Nothing  -> s <> xs
+        emitc c =
+          case mbAccentCmd of
+               Just cmd ->
+                 cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent
+               Nothing  -> c : xs
+        emitcseq cs =
+          case xs of
+            c:_ | isLetter c
+                , ctx == TextString
+                             -> cs <> " " <> xs
+                | isSpace c  -> cs <> "{}" <> xs
+                | ctx == TextString
+                             -> cs <> xs
+            _ -> cs <> "{}" <> xs
+        emitquote cs =
+          case xs of
+            '`':_  -> cs <> "\\," <> xs -- add thin space
+            '\'':_ -> cs <> "\\," <> xs -- add thin space
+            _      -> cs <> xs
+    in case x of
+         '?' | ligatures ->  -- avoid ?` ligature
+           case xs of
+             '`':_ -> emits "?{}"
+             _     -> emitc x
+         '!' | ligatures ->  -- avoid !` ligature
+           case xs of
+             '`':_ -> emits "!{}"
+             _     -> emitc x
+         '{' -> emits "\\{"
+         '}' -> emits "\\}"
+         '`' | ctx == CodeString -> emitcseq "\\textasciigrave"
+         '$' | not isUrl -> emits "\\$"
+         '%' -> emits "\\%"
+         '&' -> emits "\\&"
+         '_' | not isUrl -> emits "\\_"
+         '#' -> emits "\\#"
+         '-' | not isUrl -> case xs of
+                     -- prevent adjacent hyphens from forming ligatures
+                     ('-':_) -> emits "-\\/"
+                     _       -> emitc '-'
+         '~' | not isUrl -> emitcseq "\\textasciitilde"
+         '^' -> emits "\\^{}"
+         '\\'| isUrl     -> emitc '/' -- NB. / works as path sep even on Windows
+             | otherwise -> emitcseq "\\textbackslash"
+         '|' | not isUrl -> emitcseq "\\textbar"
+         '<' -> emitcseq "\\textless"
+         '>' -> emitcseq "\\textgreater"
+         '[' -> emits "{[}"  -- to avoid interpretation as
+         ']' -> emits "{]}"  -- optional arguments
+         '\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
+         '\160' -> emits "~"
+         '\x200B' -> emits "\\hspace{0pt}"  -- zero-width space
+         '\x202F' -> emits "\\,"
+         '\x2026' -> emitcseq "\\ldots"
+         '\x2018' | ligatures -> emitquote "`"
+         '\x2019' | ligatures -> emitquote "'"
+         '\x201C' | ligatures -> emitquote "``"
+         '\x201D' | ligatures -> emitquote "''"
+         '\x2014' | ligatures -> emits "---"
+         '\x2013' | ligatures -> emits "--"
+         _ | writerPreferAscii opts
+             -> case x of
+                  'ı' -> emitcseq "\\i"
+                  'ȷ' -> emitcseq "\\j"
+                  'å' -> emitcseq "\\aa"
+                  'Å' -> emitcseq "\\AA"
+                  'ß' -> emitcseq "\\ss"
+                  'ø' -> emitcseq "\\o"
+                  'Ø' -> emitcseq "\\O"
+                  'Ł' -> emitcseq "\\L"
+                  'ł' -> emitcseq "\\l"
+                  'æ' -> emitcseq "\\ae"
+                  'Æ' -> emitcseq "\\AE"
+                  'œ' -> emitcseq "\\oe"
+                  'Œ' -> emitcseq "\\OE"
+                  '£' -> emitcseq "\\pounds"
+                  '€' -> emitcseq "\\euro"
+                  '©' -> emitcseq "\\copyright"
+                  _   -> emitc x
+           | otherwise -> emitc x
+
+lookupAccent :: Char -> Maybe String
+lookupAccent '\779'  = Just "\\H"
+lookupAccent '\768'  = Just "\\`"
+lookupAccent '\769'  = Just "\\'"
+lookupAccent '\770'  = Just "\\^"
+lookupAccent '\771'  = Just "\\~"
+lookupAccent '\776'  = Just "\\\""
+lookupAccent '\775'  = Just "\\."
+lookupAccent '\772'  = Just "\\="
+lookupAccent '\781'  = Just "\\|"
+lookupAccent '\817'  = Just "\\b"
+lookupAccent '\807'  = Just "\\c"
+lookupAccent '\783'  = Just "\\G"
+lookupAccent '\777'  = Just "\\h"
+lookupAccent '\803'  = Just "\\d"
+lookupAccent '\785'  = Just "\\f"
+lookupAccent '\778'  = Just "\\r"
+lookupAccent '\865'  = Just "\\t"
+lookupAccent '\782'  = Just "\\U"
+lookupAccent '\780'  = Just "\\v"
+lookupAccent '\774'  = Just "\\u"
+lookupAccent '\808'  = Just "\\k"
+lookupAccent '\8413' = Just "\\textcircled"
+lookupAccent _       = Nothing
+
+toLabel :: PandocMonad m => Text -> LW m Text
+toLabel z = go `fmap` stringToLaTeX URLString z
+ where
+   go = T.concatMap $ \x -> case x of
+     _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x
+       | x `elemText` "_-+=:;." -> T.singleton x
+       | otherwise -> T.pack $ "ux" <> printf "%x" (ord x)
+
+-- | Puts contents into LaTeX command.
+inCmd :: Text -> Doc Text -> Doc Text
+inCmd cmd contents = char '\\' <> literal cmd <> braces contents
+
+mapAlignment :: Text -> Text
+mapAlignment a = case a of
+                   "top" -> "T"
+                   "top-baseline" -> "t"
+                   "bottom" -> "b"
+                   "center" -> "c"
+                   _ -> a
+
+wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text)
+wrapDiv (_,classes,kvs) t = do
+  beamer <- gets stBeamer
+  let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+  lang <- toLang $ lookup "lang" kvs
+  let wrapColumns = if beamer && "columns" `elem` classes
+                    then \contents ->
+                           let valign = maybe "T" mapAlignment (lookup "align" kvs)
+                               totalwidth = maybe [] (\x -> ["totalwidth=" <> x])
+                                 (lookup "totalwidth" kvs)
+                               onlytextwidth = filter ("onlytextwidth" ==) classes
+                               options = text $ T.unpack $ T.intercalate "," $
+                                 valign : totalwidth ++ onlytextwidth
+                           in inCmd "begin" "columns" <> brackets options
+                              $$ contents
+                              $$ inCmd "end" "columns"
+                    else id
+      wrapColumn  = if beamer && "column" `elem` classes
+                    then \contents ->
+                           let valign =
+                                 maybe ""
+                                 (brackets . text . T.unpack . mapAlignment)
+                                 (lookup "align" kvs)
+                               w = maybe "0.48" fromPct (lookup "width" kvs)
+                           in  inCmd "begin" "column" <>
+                               valign <>
+                               braces (literal w <> "\\textwidth")
+                               $$ contents
+                               $$ inCmd "end" "column"
+                    else id
+      fromPct xs =
+        case T.unsnoc xs of
+          Just (ds, '%') -> case safeRead ds of
+                              Just digits -> showFl (digits / 100 :: Double)
+                              Nothing -> xs
+          _              -> xs
+      wrapDir = case lookup "dir" kvs of
+                  Just "rtl" -> align "RTL"
+                  Just "ltr" -> align "LTR"
+                  _          -> id
+      wrapLang txt = case lang of
+                       Just lng -> let (l, o) = toPolyglossiaEnv lng
+                                       ops = if T.null o
+                                             then ""
+                                             else brackets $ literal o
+                                   in  inCmd "begin" (literal l) <> ops
+                                       $$ blankline <> txt <> blankline
+                                       $$ inCmd "end" (literal l)
+                       Nothing  -> txt
+  return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t
+
+hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text)
+hypertarget _ "" x    = return x
+hypertarget addnewline ident x = do
+  ref <- literal `fmap` toLabel ident
+  return $ text "\\hypertarget"
+              <> braces ref
+              <> braces ((if addnewline && not (isEmpty x)
+                             then "%" <> cr
+                             else empty) <> x)
+
+labelFor :: PandocMonad m => Text -> LW m (Doc Text)
+labelFor ""    = return empty
+labelFor ident = do
+  ref <- literal `fmap` toLabel ident
+  return $ text "\\label" <> braces ref
+
+-- Determine listings language from list of class attributes.
+getListingsLanguage :: [Text] -> Maybe Text
+getListingsLanguage xs
+  = foldr ((<|>) . toListingsLanguage) Nothing xs
+
+mbBraced :: Text -> Text
+mbBraced x = if not (T.all isAlphaNum x)
+                then "{" <> x <> "}"
+                else x
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 4eb0db042..87b2d8d21 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Man
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -16,6 +16,7 @@ Conversion of 'Pandoc' documents to roff man page format.
 module Text.Pandoc.Writers.Man ( writeMan ) where
 import Control.Monad.State.Strict
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -175,8 +176,7 @@ blockToMan opts (BulletList items) = do
   return (vcat contents)
 blockToMan opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 1 +
-                     maximum (map T.length markers)
+  let indent = 1 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
               zip markers items
   return (vcat contents)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 6aec6b244..fda2bbcef 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -14,7 +14,7 @@
 
 Conversion of 'Pandoc' documents to markdown-formatted plain text.
 
-Markdown:  <http://daringfireball.net/projects/markdown/>
+Markdown:  <https://daringfireball.net/projects/markdown/>
 -}
 module Text.Pandoc.Writers.Markdown (
   writeMarkdown,
@@ -22,15 +22,14 @@ module Text.Pandoc.Writers.Markdown (
   writePlain) where
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum)
 import Data.Default
-import Data.List (find, intersperse, sortOn, transpose)
+import Data.List (intersperse, sortOn, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Maybe (fromMaybe, mapMaybe, isNothing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Network.HTTP (urlEncode)
 import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Definition
@@ -44,59 +43,11 @@ import Text.Pandoc.Templates (renderTemplate)
 import Text.DocTemplates (Val(..), Context(..), FromContext(..))
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Writers.Math (texMathToInlines)
-import Text.Pandoc.XML (toHtml5Entities)
-import Data.Coerce (coerce)
-
-type Notes = [[Block]]
-type Ref   = (Text, Target, Attr)
-type Refs  = [Ref]
-
-type MD m = ReaderT WriterEnv (StateT WriterState m)
-
-evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
-evalMD md env st = evalStateT (runReaderT md env) st
-
-data WriterEnv = WriterEnv { envInList          :: Bool
-                           , envVariant         :: MarkdownVariant
-                           , envRefShortcutable :: Bool
-                           , envBlockLevel      :: Int
-                           , envEscapeSpaces    :: Bool
-                           }
-
-data MarkdownVariant =
-      PlainText
-    | Commonmark
-    | Markdown
-    deriving (Show, Eq)
-
-instance Default WriterEnv
-  where def = WriterEnv { envInList          = False
-                        , envVariant         = Markdown
-                        , envRefShortcutable = True
-                        , envBlockLevel      = 0
-                        , envEscapeSpaces    = False
-                        }
-
-data WriterState = WriterState { stNotes   :: Notes
-                               , stPrevRefs :: Refs
-                               , stRefs    :: Refs
-                               , stKeys    :: M.Map Key
-                                                (M.Map (Target, Attr) Int)
-                               , stLastIdx  :: Int
-                               , stIds     :: Set.Set Text
-                               , stNoteNum :: Int
-                               }
-
-instance Default WriterState
-  where def = WriterState{ stNotes = []
-                         , stPrevRefs = []
-                         , stRefs = []
-                         , stKeys = M.empty
-                         , stLastIdx = 0
-                         , stIds = Set.empty
-                         , stNoteNum = 1
-                         }
+import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, linkAttributes, attrsToMarkdown)
+import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
+                                           WriterState(..),
+                                           WriterEnv(..),
+                                           Ref, Refs, MD, evalMD)
 
 -- | Convert Pandoc to Markdown.
 writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -116,7 +67,16 @@ writePlain opts document =
 -- | Convert Pandoc to Commonmark.
 writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCommonMark opts document =
-  evalMD (pandocToMarkdown opts document) def{ envVariant = Commonmark } def
+  evalMD (pandocToMarkdown opts' document) def{ envVariant = Commonmark } def
+ where
+  opts' = opts{ writerExtensions =
+                   -- These extensions can't be enabled or disabled
+                   -- for commonmark because they're part of the core;
+                   -- we set them here so that escapeText will behave
+                   -- properly.
+                   enableExtension Ext_all_symbols_escapable $
+                   enableExtension Ext_intraword_underscores $
+                     writerExtensions opts }
 
 pandocTitleBlock :: Doc Text -> [Doc Text] -> Doc Text -> Doc Text
 pandocTitleBlock tit auths dat =
@@ -174,23 +134,24 @@ valToYaml :: Val Text -> Doc Text
 valToYaml (ListVal xs) =
   vcat $ map (\v -> hang 2 "- " (valToYaml v)) xs
 valToYaml (MapVal c) = contextToYaml c
+valToYaml (BoolVal True) = "true"
+valToYaml (BoolVal False) = "false"
 valToYaml (SimpleVal x)
   | isEmpty x = empty
   | otherwise =
       if hasNewlines x
          then hang 0 ("|" <> cr) x
-         else if fst $ foldr needsDoubleQuotes (False, True) x
+         else if isNothing $ foldM needsDoubleQuotes True x
            then "\"" <> fmap escapeInDoubleQuotes x <> "\""
            else x
     where
-      needsDoubleQuotes t (positive, isFirst)
+      needsDoubleQuotes isFirst t
         = if T.any isBadAnywhere t ||
              (isFirst && T.any isYamlPunct (T.take 1 t))
-              then (True, False)
-              else (positive, False)
+              then Nothing
+              else Just False
       isBadAnywhere '#' = True
       isBadAnywhere ':' = True
-      isBadAnywhere '`' = False
       isBadAnywhere _   = False
       hasNewlines NewLine = True
       hasNewlines BlankLines{} = True
@@ -295,75 +256,11 @@ noteToMarkdown opts num blocks = do
               then hang (writerTabStop opts) (marker <> spacer) contents
               else marker <> spacer <> contents
 
--- | Escape special characters for Markdown.
-escapeText :: WriterOptions -> Text -> Text
-escapeText opts = T.pack . go . T.unpack
-  where
-  go [] = []
-  go (c:cs) =
-    case c of
-       '<' | isEnabled Ext_all_symbols_escapable opts ->
-              '\\' : '<' : go cs
-           | otherwise -> "&lt;" ++ go cs
-       '>' | isEnabled Ext_all_symbols_escapable opts ->
-              '\\' : '>' : go cs
-           | otherwise -> "&gt;" ++ go cs
-       '@' | isEnabled Ext_citations opts ->
-               case cs of
-                    (d:_)
-                      | isAlphaNum d || d == '_'
-                         -> '\\':'@':go cs
-                    _ -> '@':go cs
-       _ | c `elem` ['\\','`','*','_','[',']','#'] ->
-              '\\':c:go cs
-       '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
-       '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
-       '~' | isEnabled Ext_subscript opts ||
-             isEnabled Ext_strikeout opts -> '\\':'~':go cs
-       '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs
-       '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs
-       '"' | isEnabled Ext_smart opts -> '\\':'"':go cs
-       '-' | isEnabled Ext_smart opts ->
-              case cs of
-                   '-':_ -> '\\':'-':go cs
-                   _     -> '-':go cs
-       '.' | isEnabled Ext_smart opts ->
-              case cs of
-                   '.':'.':rest -> '\\':'.':'.':'.':go rest
-                   _            -> '.':go cs
-       _   -> case cs of
-                '_':x:xs
-                  | isEnabled Ext_intraword_underscores opts
-                  , isAlphaNum c
-                  , isAlphaNum x -> c : '_' : x : go xs
-                _                -> c : go cs
-
-attrsToMarkdown :: Attr -> Doc Text
-attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
-        where attribId = case attribs of
-                                ("",_,_) -> empty
-                                (i,_,_)  -> "#" <> escAttr i
-              attribClasses = case attribs of
-                                (_,[],_) -> empty
-                                (_,cs,_) -> hsep $
-                                            map (escAttr . ("."<>))
-                                            cs
-              attribKeys = case attribs of
-                                (_,_,[]) -> empty
-                                (_,_,ks) -> hsep $
-                                            map (\(k,v) -> escAttr k
-                                              <> "=\"" <>
-                                              escAttr v <> "\"") ks
-              escAttr          = mconcat . map escAttrChar . T.unpack
-              escAttrChar '"'  = literal "\\\""
-              escAttrChar '\\' = literal "\\\\"
-              escAttrChar c    = literal $ T.singleton c
-
-linkAttributes :: WriterOptions -> Attr -> Doc Text
-linkAttributes opts attr =
-  if isEnabled Ext_link_attributes opts && attr /= nullAttr
-     then attrsToMarkdown attr
-     else empty
+-- | (Code) blocks with a single class and no attributes can just use it
+-- standalone, no need to bother with curly braces.
+classOrAttrsToMarkdown :: Attr -> Doc Text
+classOrAttrsToMarkdown ("",[cls],[]) = literal cls
+classOrAttrsToMarkdown attrs = attrsToMarkdown attrs
 
 -- | Ordered list start parser for use in Para below.
 olMarker :: Parser Text ParserState ()
@@ -424,9 +321,12 @@ blockToMarkdown' opts (Div attrs ils) = do
     case () of
          _ | isEnabled Ext_fenced_divs opts &&
              attrs /= nullAttr ->
-                nowrap (literal ":::" <+> attrsToMarkdown attrs) $$
-                chomp contents $$
-                literal ":::" <> blankline
+                let attrsToMd = if variant == Commonmark
+                                then attrsToMarkdown
+                                else classOrAttrsToMarkdown
+                in nowrap (literal ":::" <+> attrsToMd attrs) $$
+                   chomp contents $$
+                   literal ":::" <> blankline
            | isEnabled Ext_native_divs opts ||
              (isEnabled Ext_raw_html opts &&
               (variant == Commonmark ||
@@ -468,7 +368,7 @@ blockToMarkdown' opts (Plain inlines) = do
 -- title beginning with fig: indicates figure
 blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))])
   | isEnabled Ext_raw_html opts &&
-    not (isEnabled Ext_link_attributes opts) &&
+    not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) &&
     attr /= nullAttr = -- use raw HTML
     (<> blankline) . literal . T.strip <$>
       writeHtml5String opts{ writerTemplate = Nothing }
@@ -492,25 +392,24 @@ blockToMarkdown' opts b@(RawBlock f str) = do
   let renderEmpty = mempty <$ report (BlockNotRendered b)
   case variant of
     PlainText -> renderEmpty
-    _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
-                  "markdown_mmd", "markdown_strict"] ->
-            return $ literal str <> literal "\n"
-      | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-      | f `elem` ["html", "html5", "html4"] ->
-            case () of
-              _ | isEnabled Ext_markdown_attribute opts -> return $
-                    literal (addMarkdownAttribute str) <> literal "\n"
-                | isEnabled Ext_raw_html opts -> return $
-                    literal str <> literal "\n"
-                | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-                | otherwise -> renderEmpty
-      | f `elem` ["latex", "tex"] ->
-            case () of
-              _ | isEnabled Ext_raw_tex opts -> return $
-                    literal str <> literal "\n"
-                | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-                | otherwise -> renderEmpty
-      | otherwise -> renderEmpty
+    Commonmark
+      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
+         -> return $ literal str <> literal "\n"
+    Markdown
+      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+                  "markdown_mmd", "markdown_strict"]
+         -> return $ literal str <> literal "\n"
+    _ | isEnabled Ext_raw_attribute opts -> rawAttribBlock
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_markdown_attribute opts
+         -> return $ literal (addMarkdownAttribute str) <> literal "\n"
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_raw_html opts
+         -> return $ literal str <> literal "\n"
+      | f `elem` ["latex", "tex"]
+      , isEnabled Ext_raw_tex opts
+         -> return $ literal str <> literal "\n"
+    _ -> renderEmpty
 blockToMarkdown' opts HorizontalRule =
   return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline
 blockToMarkdown' opts (Header level attr inlines) = do
@@ -534,7 +433,8 @@ blockToMarkdown' opts (Header level attr inlines) = do
                                  && id' == autoId -> empty
                    (id',_,_)   | isEnabled Ext_mmd_header_identifiers opts ->
                                     space <> brackets (literal id')
-                   _ | isEnabled Ext_header_attributes opts ->
+                   _ | isEnabled Ext_header_attributes opts ||
+                       isEnabled Ext_attributes opts ->
                                     space <> attrsToMarkdown attr
                      | otherwise -> empty
   contents <- inlineListToMarkdown opts $
@@ -584,19 +484,21 @@ blockToMarkdown' opts (CodeBlock attribs str) = do
            | isEnabled Ext_fenced_code_blocks opts ->
           tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline
      _ -> nest (writerTabStop opts) (literal str) <> blankline
-   where endline c = literal $ case [T.length ln
-                                   | ln <- map trim (T.lines str)
-                                   , T.pack [c,c,c] `T.isPrefixOf` ln
-                                   , T.all (== c) ln] of
-                               [] -> T.replicate 3 $ T.singleton c
-                               xs -> T.replicate (maximum xs + 1) $ T.singleton c
-         backticks = endline '`'
-         tildes = endline '~'
-         attrs  = if isEnabled Ext_fenced_code_attributes opts
-                     then nowrap $ " " <> attrsToMarkdown attribs
-                     else case attribs of
-                                (_,cls:_,_) -> " " <> literal cls
-                                _             -> empty
+   where
+     endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $
+                        [T.length ln
+                         | ln <- map trim (T.lines str)
+                         , T.pack [c,c,c] `T.isPrefixOf` ln
+                         , T.all (== c) ln]
+     endline c = literal $ T.replicate (endlineLen c) $ T.singleton c
+     backticks = endline '`'
+     tildes = endline '~'
+     attrs  = if isEnabled Ext_fenced_code_attributes opts ||
+                 isEnabled Ext_attributes opts
+                 then nowrap $ " " <> classOrAttrsToMarkdown attribs
+                 else case attribs of
+                            (_,cls:_,_) -> " " <> literal cls
+                            _             -> empty
 blockToMarkdown' opts (BlockQuote blocks) = do
   variant <- asks envVariant
   -- if we're writing literate haskell, put a space before the bird tracks
@@ -609,7 +511,7 @@ blockToMarkdown' opts (BlockQuote blocks) = do
   return $ prefixed leader contents <> blankline
 blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do
   let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   caption' <- inlineListToMarkdown opts caption
   let caption''
@@ -672,7 +574,10 @@ blockToMarkdown' opts (BulletList items) = do
   contents <- inList $ mapM (bulletListItemToMarkdown opts) items
   return $ (if isTightList items then vcat else vsep) contents <> blankline
 blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do
-  let start' = if isEnabled Ext_startnum opts then start else 1
+  variant <- asks envVariant
+  let start' = if variant == Commonmark || isEnabled Ext_startnum opts
+                  then start
+                  else 1
   let sty'   = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle
   let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim
   let attribs = (start', sty', delim')
@@ -708,7 +613,8 @@ pipeTable headless aligns rawHeaders rawRows = do
       blockFor AlignCenter x y = cblock (x + 2) (sp <> y <> sp) <> lblock 0 empty
       blockFor AlignRight  x y = rblock (x + 2) (y <> sp) <> lblock 0 empty
       blockFor _           x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
-  let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
+  let widths = map (max 3 . maybe 3 maximum . nonEmpty . map offset) $
+                     transpose (rawHeaders : rawRows)
   let torow cs = nowrap $ literal "|" <>
                     hcat (intersperse (literal "|") $
                           zipWith3 blockFor aligns widths (map chomp cs))
@@ -742,11 +648,11 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break.
   -- The @+2@ is needed for specifying the alignment.
-  let numChars    = (+ 2) . maximum . map offset
+  let numChars    = (+ 2) . maybe 0 maximum . nonEmpty . map offset
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break *inside a word*.
   -- The @+2@ is needed for specifying the alignment.
-  let minNumChars = (+ 2) . maximum . map minOffset
+  let minNumChars = (+ 2) . maybe 0 maximum . nonEmpty . map minOffset
   let columns = transpose (rawHeaders : rawRows)
   -- minimal column width without wrapping a single word
   let relWidth w col =
@@ -885,6 +791,9 @@ blockListToMarkdown opts blocks = do
            b1 : commentSep : fixBlocks (b2:bs)
       fixBlocks (Plain ils : bs@(RawBlock{}:_)) =
            Plain ils : fixBlocks bs
+      fixBlocks (Plain ils : bs@(Div{}:_))
+          | isEnabled Ext_fenced_divs opts =
+           Para ils : fixBlocks bs
       fixBlocks (Plain ils : bs) | inlist =
            Plain ils : fixBlocks bs
       fixBlocks (Plain ils : bs) =
@@ -908,488 +817,7 @@ blockListToMarkdown opts blocks = do
         | otherwise                   = RawBlock "markdown" "&nbsp;\n"
   mconcat <$> mapM (blockToMarkdown opts) (fixBlocks blocks)
 
-getKey :: Doc Text -> Key
-getKey = toKey . render Nothing
-
-findUsableIndex :: [Text] -> Int -> Int
-findUsableIndex lbls i = if tshow i `elem` lbls
-                         then findUsableIndex lbls (i + 1)
-                         else i
-
-getNextIndex :: PandocMonad m => MD m Int
-getNextIndex = do
-  prevRefs <- gets stPrevRefs
-  refs <- gets stRefs
-  i <- (+ 1) <$> gets stLastIdx
-  modify $ \s -> s{ stLastIdx = i }
-  let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs
-  return $ findUsableIndex refLbls i
-
--- | Get reference for target; if none exists, create unique one and return.
---   Prefer label if possible; otherwise, generate a unique key.
-getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text
-getReference attr label target = do
-  refs <- gets stRefs
-  case find (\(_,t,a) -> t == target && a == attr) refs of
-    Just (ref, _, _) -> return ref
-    Nothing       -> do
-      keys <- gets stKeys
-      let key = getKey label
-      let rawkey = coerce key
-      case M.lookup key keys of
-           Nothing -> do -- no other refs with this label
-             (lab', idx) <- if T.null rawkey ||
-                                 T.length rawkey > 999 ||
-                                 T.any (\c -> c == '[' || c == ']') rawkey
-                               then do
-                                 i <- getNextIndex
-                                 return (tshow i, i)
-                               else
-                                 return (render Nothing label, 0)
-             modify (\s -> s{
-               stRefs = (lab', target, attr) : refs,
-               stKeys = M.insert (getKey label)
-                           (M.insert (target, attr) idx mempty)
-                                 (stKeys s) })
-             return lab'
-
-           Just km ->    -- we have refs with this label
-             case M.lookup (target, attr) km of
-                  Just i -> do
-                    let lab' = render Nothing $
-                               label <> if i == 0
-                                           then mempty
-                                           else literal (tshow i)
-                    -- make sure it's in stRefs; it may be
-                    -- a duplicate that was printed in a previous
-                    -- block:
-                    when ((lab', target, attr) `notElem` refs) $
-                       modify (\s -> s{
-                         stRefs = (lab', target, attr) : refs })
-                    return lab'
-                  Nothing -> do -- but this one is to a new target
-                    i <- getNextIndex
-                    let lab' = tshow i
-                    modify (\s -> s{
-                       stRefs = (lab', target, attr) : refs,
-                       stKeys = M.insert key
-                                   (M.insert (target, attr) i km)
-                                         (stKeys s) })
-                    return lab'
-
--- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text)
-inlineListToMarkdown opts lst = do
-  inlist <- asks envInList
-  go (if inlist then avoidBadWrapsInList lst else lst)
-  where go [] = return empty
-        go (i:is) = case i of
-            Link {} -> case is of
-                -- If a link is followed by another link, or '[', '(' or ':'
-                -- then we don't shortcut
-                Link {}:_                                       -> unshortcutable
-                Space:Link {}:_                                 -> unshortcutable
-                Space:(Str(thead -> Just '[')):_                -> unshortcutable
-                Space:(RawInline _ (thead -> Just '[')):_       -> unshortcutable
-                Space:(Cite _ _):_                              -> unshortcutable
-                SoftBreak:Link {}:_                             -> unshortcutable
-                SoftBreak:(Str(thead -> Just '[')):_            -> unshortcutable
-                SoftBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
-                SoftBreak:(Cite _ _):_                          -> unshortcutable
-                LineBreak:Link {}:_                             -> unshortcutable
-                LineBreak:(Str(thead -> Just '[')):_            -> unshortcutable
-                LineBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
-                LineBreak:(Cite _ _):_                          -> unshortcutable
-                (Cite _ _):_                                    -> unshortcutable
-                Str (thead -> Just '['):_                       -> unshortcutable
-                Str (thead -> Just '('):_                       -> unshortcutable
-                Str (thead -> Just ':'):_                       -> unshortcutable
-                (RawInline _ (thead -> Just '[')):_             -> unshortcutable
-                (RawInline _ (thead -> Just '(')):_             -> unshortcutable
-                (RawInline _ (thead -> Just ':')):_             -> unshortcutable
-                (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable
-                _                                               -> shortcutable
-            _ -> shortcutable
-          where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
-                unshortcutable = do
-                    iMark <- local
-                             (\env -> env { envRefShortcutable = False })
-                             (inlineToMarkdown opts i)
-                    fmap (iMark <>) (go is)
-                thead = fmap fst . T.uncons
-
-isSp :: Inline -> Bool
-isSp Space     = True
-isSp SoftBreak = True
-isSp _         = False
-
-avoidBadWrapsInList :: [Inline] -> [Inline]
-avoidBadWrapsInList [] = []
-avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s =
-  Str (" >" <> cs) : avoidBadWrapsInList xs
-avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))]
-  | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]]
-avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs)
-  | T.null cs && isSp s && c `elem` ['-','*','+'] =
-    Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList (s:Str cs:Space:xs)
-  | isSp s && isOrderedListMarker cs =
-    Str (" " <> cs) : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList [s, Str cs]
-  | isSp s && isOrderedListMarker cs = [Str $ " " <> cs]
-avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
-
-isOrderedListMarker :: Text -> Bool
-isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) &&
-              isRight (runParser (anyOrderedListMarker >> eof)
-                       defaultParserState "" xs)
-
-isRight :: Either a b -> Bool
-isRight (Right _) = True
-isRight (Left  _) = False
-
--- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text)
-inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) =
-  case lookup "data-emoji" kvs of
-       Just emojiname | isEnabled Ext_emoji opts ->
-            return $ ":" <> literal emojiname <> ":"
-       _ -> inlineToMarkdown opts (Str s)
-inlineToMarkdown opts (Span attrs ils) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts ils
-  return $ case variant of
-                PlainText -> contents
-                _     | attrs == nullAttr -> contents
-                      | isEnabled Ext_bracketed_spans opts ->
-                        let attrs' = if attrs /= nullAttr
-                                        then attrsToMarkdown attrs
-                                        else empty
-                        in "[" <> contents <> "]" <> attrs'
-                      | isEnabled Ext_raw_html opts ||
-                        isEnabled Ext_native_spans opts ->
-                        tagWithAttrs "span" attrs <> contents <> literal "</span>"
-                      | otherwise -> contents
-inlineToMarkdown _ (Emph []) = return empty
-inlineToMarkdown opts (Emph lst) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts lst
-  return $ case variant of
-             PlainText
-               | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_"
-               | otherwise ->  contents
-             _ -> "*" <> contents <> "*"
-inlineToMarkdown _ (Underline []) = return empty
-inlineToMarkdown opts (Underline lst) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts lst
-  case variant of
-    PlainText -> return contents
-    _     | isEnabled Ext_bracketed_spans opts ->
-            return $ "[" <> contents <> "]" <> "{.ul}"
-          | isEnabled Ext_native_spans opts ->
-            return $ tagWithAttrs "span" ("", ["underline"], [])
-              <> contents
-              <> literal "</span>"
-          | isEnabled Ext_raw_html opts ->
-            return $ "<u>" <> contents <> "</u>"
-          | otherwise -> inlineToMarkdown opts (Emph lst)
-inlineToMarkdown _ (Strong []) = return empty
-inlineToMarkdown opts (Strong lst) = do
-  variant <- asks envVariant
-  case variant of
-    PlainText ->
-             inlineListToMarkdown opts $
-               if isEnabled Ext_gutenberg opts
-                  then capitalize lst
-                  else lst
-    _ -> do
-       contents <- inlineListToMarkdown opts lst
-       return $ "**" <> contents <> "**"
-inlineToMarkdown _ (Strikeout []) = return empty
-inlineToMarkdown opts (Strikeout lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_strikeout opts
-              then "~~" <> contents <> "~~"
-              else if isEnabled Ext_raw_html opts
-                       then "<s>" <> contents <> "</s>"
-                       else contents
-inlineToMarkdown _ (Superscript []) = return empty
-inlineToMarkdown opts (Superscript lst) =
-  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
-    contents <- inlineListToMarkdown opts lst
-    if isEnabled Ext_superscript opts
-       then return $ "^" <> contents <> "^"
-       else if isEnabled Ext_raw_html opts
-                then return $ "<sup>" <> contents <> "</sup>"
-                else
-                  case traverse toSuperscriptInline lst of
-                    Just xs' | not (writerPreferAscii opts)
-                      -> inlineListToMarkdown opts xs'
-                    _ -> do
-                      let rendered = render Nothing contents
-                      return $
-                        case mapM toSuperscript (T.unpack rendered) of
-                           Just r  -> literal $ T.pack r
-                           Nothing -> literal $ "^(" <> rendered <> ")"
-inlineToMarkdown _ (Subscript []) = return empty
-inlineToMarkdown opts (Subscript lst) =
-  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
-    contents <- inlineListToMarkdown opts lst
-    if isEnabled Ext_subscript opts
-       then return $ "~" <> contents <> "~"
-       else if isEnabled Ext_raw_html opts
-                then return $ "<sub>" <> contents <> "</sub>"
-                else
-                  case traverse toSubscriptInline lst of
-                    Just xs' | not (writerPreferAscii opts)
-                      -> inlineListToMarkdown opts xs'
-                    _ -> do
-                      let rendered = render Nothing contents
-                      return $
-                        case mapM toSuperscript (T.unpack rendered) of
-                           Just r  -> literal $ T.pack r
-                           Nothing -> literal $ "_(" <> rendered <> ")"
-inlineToMarkdown opts (SmallCaps lst) = do
-  variant <- asks envVariant
-  if variant /= PlainText &&
-     (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
-     then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst)
-     else inlineListToMarkdown opts $ capitalize lst
-inlineToMarkdown opts (Quoted SingleQuote lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_smart opts
-              then "'" <> contents <> "'"
-              else
-                if writerPreferAscii opts
-                   then "&lsquo;" <> contents <> "&rsquo;"
-                   else "‘" <> contents <> "’"
-inlineToMarkdown opts (Quoted DoubleQuote lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_smart opts
-              then "\"" <> contents <> "\""
-              else
-                if writerPreferAscii opts
-                   then "&ldquo;" <> contents <> "&rdquo;"
-                   else "“" <> contents <> "”"
-inlineToMarkdown opts (Code attr str) = do
-  let tickGroups = filter (T.any (== '`')) $ T.group str
-  let longest    = if null tickGroups
-                     then 0
-                     else maximum $ map T.length tickGroups
-  let marker     = T.replicate (longest + 1) "`"
-  let spacer     = if longest == 0 then "" else " "
-  let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
-                      then attrsToMarkdown attr
-                      else empty
-  variant <- asks envVariant
-  case variant of
-     PlainText -> return $ literal str
-     _     ->  return $ literal
-                  (marker <> spacer <> str <> spacer <> marker) <> attrs
-inlineToMarkdown opts (Str str) = do
-  variant <- asks envVariant
-  let str' = (if writerPreferAscii opts
-                 then toHtml5Entities
-                 else id) .
-             (if isEnabled Ext_smart opts
-                 then unsmartify opts
-                 else id) .
-             (if variant == PlainText
-                 then id
-                 else escapeText opts) $ str
-  return $ literal str'
-inlineToMarkdown opts (Math InlineMath str) =
-  case writerHTMLMathMethod opts of
-       WebTeX url -> inlineToMarkdown opts
-                       (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str))
-       _ | isEnabled Ext_tex_math_dollars opts ->
-             return $ "$" <> literal str <> "$"
-         | isEnabled Ext_tex_math_single_backslash opts ->
-             return $ "\\(" <> literal str <> "\\)"
-         | isEnabled Ext_tex_math_double_backslash opts ->
-             return $ "\\\\(" <> literal str <> "\\\\)"
-         | otherwise -> do
-             variant <- asks envVariant
-             texMathToInlines InlineMath str >>=
-               inlineListToMarkdown opts .
-                 (if variant == PlainText then makeMathPlainer else id)
-inlineToMarkdown opts (Math DisplayMath str) =
-  case writerHTMLMathMethod opts of
-      WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
-             inlineToMarkdown opts (Image nullAttr [Str str]
-                    (url <> T.pack (urlEncode $ T.unpack str), str))
-      _ | isEnabled Ext_tex_math_dollars opts ->
-            return $ "$$" <> literal str <> "$$"
-        | isEnabled Ext_tex_math_single_backslash opts ->
-            return $ "\\[" <> literal str <> "\\]"
-        | isEnabled Ext_tex_math_double_backslash opts ->
-            return $ "\\\\[" <> literal str <> "\\\\]"
-        | otherwise -> (\x -> cr <> x <> cr) `fmap`
-            (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
-inlineToMarkdown opts il@(RawInline f str) = do
-  let tickGroups = filter (T.any (== '`')) $ T.group str
-  let numticks   = if null tickGroups
-                     then 1
-                     else 1 + maximum (map T.length tickGroups)
-  variant <- asks envVariant
-  let Format fmt = f
-  let rawAttribInline = return $
-         literal (T.replicate numticks "`") <> literal str <>
-         literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}"
-  let renderEmpty = mempty <$ report (InlineNotRendered il)
-  case variant of
-    PlainText -> renderEmpty
-    _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
-                  "markdown_mmd", "markdown_strict"] ->
-            return $ literal str
-      | isEnabled Ext_raw_attribute opts -> rawAttribInline
-      | f `elem` ["html", "html5", "html4"] ->
-            case () of
-              _ | isEnabled Ext_raw_html opts -> return $ literal str
-                | isEnabled Ext_raw_attribute opts -> rawAttribInline
-                | otherwise -> renderEmpty
-      | f `elem` ["latex", "tex"] ->
-            case () of
-              _ | isEnabled Ext_raw_tex opts -> return $ literal str
-                | isEnabled Ext_raw_attribute opts -> rawAttribInline
-                | otherwise -> renderEmpty
-      | otherwise -> renderEmpty
-inlineToMarkdown opts LineBreak = do
-  variant <- asks envVariant
-  if variant == PlainText || isEnabled Ext_hard_line_breaks opts
-     then return cr
-     else return $
-          if isEnabled Ext_escaped_line_breaks opts
-             then "\\" <> cr
-             else "  " <> cr
-inlineToMarkdown _ Space = do
-  escapeSpaces <- asks envEscapeSpaces
-  return $ if escapeSpaces then "\\ " else space
-inlineToMarkdown opts SoftBreak = do
-  escapeSpaces <- asks envEscapeSpaces
-  let space' = if escapeSpaces then "\\ " else space
-  return $ case writerWrapText opts of
-                WrapNone     -> space'
-                WrapAuto     -> space'
-                WrapPreserve -> cr
-inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
-inlineToMarkdown opts (Cite (c:cs) lst)
-  | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
-  | otherwise =
-      if citationMode c == AuthorInText
-         then do
-           suffs <- inlineListToMarkdown opts $ citationSuffix c
-           rest <- mapM convertOne cs
-           let inbr = suffs <+> joincits rest
-               br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
-           return $ literal ("@" <> citationId c) <+> br
-         else do
-           cits <- mapM convertOne (c:cs)
-           return $ literal "[" <> joincits cits <> literal "]"
-  where
-        joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
-        convertOne Citation { citationId      = k
-                            , citationPrefix  = pinlines
-                            , citationSuffix  = sinlines
-                            , citationMode    = m }
-                               = do
-           pdoc <- inlineListToMarkdown opts pinlines
-           sdoc <- inlineListToMarkdown opts sinlines
-           let k' = literal (modekey m <> "@" <> k)
-               r = case sinlines of
-                        Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
-                        _                                         -> k' <+> sdoc
-           return $ pdoc <+> r
-        modekey SuppressAuthor = "-"
-        modekey _              = ""
-inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
-  variant <- asks envVariant
-  linktext <- inlineListToMarkdown opts txt
-  let linktitle = if T.null tit
-                     then empty
-                     else literal $ " \"" <> tit <> "\""
-  let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src)
-  let useAuto = isURI src &&
-                case txt of
-                      [Str s] | escapeURI s == srcSuffix -> True
-                      _       -> False
-  let useRefLinks = writerReferenceLinks opts && not useAuto
-  shortcutable <- asks envRefShortcutable
-  let useShortcutRefLinks = shortcutable &&
-                            isEnabled Ext_shortcut_reference_links opts
-  reftext <- if useRefLinks
-                then literal <$> getReference attr linktext (src, tit)
-                else return mempty
-  case variant of
-    PlainText
-      | useAuto -> return $ literal srcSuffix
-      | otherwise -> return linktext
-    _ | useAuto -> return $ "<" <> literal srcSuffix <> ">"
-      | useRefLinks ->
-           let first  = "[" <> linktext <> "]"
-               second = if getKey linktext == getKey reftext
-                           then if useShortcutRefLinks
-                                   then ""
-                                   else "[]"
-                           else "[" <> reftext <> "]"
-           in  return $ first <> second
-      | isEnabled Ext_raw_html opts
-      , not (isEnabled Ext_link_attributes opts)
-      , attr /= nullAttr -> -- use raw HTML to render attributes
-          literal . T.strip <$>
-            writeHtml5String opts{ writerTemplate = Nothing }
-            (Pandoc nullMeta [Plain [lnk]])
-      | otherwise -> return $
-         "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <>
-         linkAttributes opts attr
-inlineToMarkdown opts img@(Image attr alternate (source, tit))
-  | isEnabled Ext_raw_html opts &&
-    not (isEnabled Ext_link_attributes opts) &&
-    attr /= nullAttr = -- use raw HTML
-    literal . T.strip <$>
-      writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
-  | otherwise = do
-  variant <- asks envVariant
-  let txt = if null alternate || alternate == [Str source]
-                                 -- to prevent autolinks
-               then [Str ""]
-               else alternate
-  linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
-  return $ case variant of
-             PlainText -> "[" <> linkPart <> "]"
-             _     -> "!" <> linkPart
-inlineToMarkdown opts (Note contents) = do
-  modify (\st -> st{ stNotes = contents : stNotes st })
-  st <- get
-  let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1)
-  if isEnabled Ext_footnotes opts
-     then return $ "[^" <> ref <> "]"
-     else return $ "[" <> ref <> "]"
-
-makeMathPlainer :: [Inline] -> [Inline]
-makeMathPlainer = walk go
-  where
-  go (Emph xs) = Span nullAttr xs
-  go x         = x
-
 lineBreakToSpace :: Inline -> Inline
 lineBreakToSpace LineBreak = Space
 lineBreakToSpace SoftBreak = Space
 lineBreakToSpace x         = x
-
-toSubscriptInline :: Inline -> Maybe Inline
-toSubscriptInline Space = Just Space
-toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
-toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s)
-toSubscriptInline LineBreak = Just LineBreak
-toSubscriptInline SoftBreak = Just SoftBreak
-toSubscriptInline _ = Nothing
-
-toSuperscriptInline :: Inline -> Maybe Inline
-toSuperscriptInline Space = Just Space
-toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
-toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s)
-toSuperscriptInline LineBreak = Just LineBreak
-toSuperscriptInline SoftBreak = Just SoftBreak
-toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
new file mode 100644
index 000000000..cd5f5b896
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -0,0 +1,616 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns        #-}
+{- |
+   Module      : Text.Pandoc.Writers.Markdown.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.Markdown.Inline (
+  inlineListToMarkdown,
+  linkAttributes,
+  attrsToMarkdown
+  ) where
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Char (isAlphaNum, isDigit)
+import Data.List (find, intersperse)
+import Data.List.NonEmpty (nonEmpty)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.HTTP (urlEncode)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
+import Text.DocLayout
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.XML (toHtml5Entities)
+import Data.Coerce (coerce)
+import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
+                                           WriterState(..),
+                                           WriterEnv(..), MD)
+
+-- | Escape special characters for Markdown.
+escapeText :: WriterOptions -> Text -> Text
+escapeText opts = T.pack . go . T.unpack
+ where
+  startsWithSpace (' ':_) = True
+  startsWithSpace ('\t':_) = True
+  startsWithSpace [] = True
+  startsWithSpace _ = False
+  go [] = []
+  go (c:cs) =
+    case c of
+       '<' | isEnabled Ext_all_symbols_escapable opts ->
+              '\\' : '<' : go cs
+           | otherwise -> "&lt;" ++ go cs
+       '>' | isEnabled Ext_all_symbols_escapable opts ->
+              '\\' : '>' : go cs
+           | otherwise -> "&gt;" ++ go cs
+       '@' | isEnabled Ext_citations opts ->
+               case cs of
+                    (d:_)
+                      | isAlphaNum d || d == '_' || d == '{'
+                         -> '\\':'@':go cs
+                    _ -> '@':go cs
+       '#' | isEnabled Ext_space_in_atx_header opts
+           , startsWithSpace cs
+           -> '\\':'#':go cs
+       _ | c `elem` ['\\','`','*','_','[',']'] ->
+              '\\':c:go cs
+       '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
+       '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
+       '~' | isEnabled Ext_subscript opts ||
+             isEnabled Ext_strikeout opts -> '\\':'~':go cs
+       '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs
+       '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs
+       '"' | isEnabled Ext_smart opts -> '\\':'"':go cs
+       '-' | isEnabled Ext_smart opts ->
+              case cs of
+                   '-':_ -> '\\':'-':go cs
+                   _     -> '-':go cs
+       '.' | isEnabled Ext_smart opts ->
+              case cs of
+                   '.':'.':rest -> '\\':'.':'.':'.':go rest
+                   _            -> '.':go cs
+       _   -> case cs of
+                '_':x:xs
+                  | isEnabled Ext_intraword_underscores opts
+                  , isAlphaNum c
+                  , isAlphaNum x -> c : '_' : x : go xs
+                '#':xs           -> c : '#' : go xs
+                '>':xs           -> c : '>' : go xs
+                _                -> c : go cs
+
+attrsToMarkdown :: Attr -> Doc Text
+attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
+        where attribId = case attribs of
+                                ("",_,_) -> empty
+                                (i,_,_)  -> "#" <> escAttr i
+              attribClasses = case attribs of
+                                (_,[],_) -> empty
+                                (_,cs,_) -> hsep $
+                                            map (escAttr . ("."<>))
+                                            cs
+              attribKeys = case attribs of
+                                (_,_,[]) -> empty
+                                (_,_,ks) -> hsep $
+                                            map (\(k,v) -> escAttr k
+                                              <> "=\"" <>
+                                              escAttr v <> "\"") ks
+              escAttr          = mconcat . map escAttrChar . T.unpack
+              escAttrChar '"'  = literal "\\\""
+              escAttrChar '\\' = literal "\\\\"
+              escAttrChar c    = literal $ T.singleton c
+
+linkAttributes :: WriterOptions -> Attr -> Doc Text
+linkAttributes opts attr =
+  if (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr
+     then attrsToMarkdown attr
+     else empty
+
+getKey :: Doc Text -> Key
+getKey = toKey . render Nothing
+
+findUsableIndex :: [Text] -> Int -> Int
+findUsableIndex lbls i = if tshow i `elem` lbls
+                         then findUsableIndex lbls (i + 1)
+                         else i
+
+getNextIndex :: PandocMonad m => MD m Int
+getNextIndex = do
+  prevRefs <- gets stPrevRefs
+  refs <- gets stRefs
+  i <- (+ 1) <$> gets stLastIdx
+  modify $ \s -> s{ stLastIdx = i }
+  let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs
+  return $ findUsableIndex refLbls i
+
+-- | Get reference for target; if none exists, create unique one and return.
+--   Prefer label if possible; otherwise, generate a unique key.
+getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text
+getReference attr label target = do
+  refs <- gets stRefs
+  case find (\(_,t,a) -> t == target && a == attr) refs of
+    Just (ref, _, _) -> return ref
+    Nothing       -> do
+      keys <- gets stKeys
+      let key = getKey label
+      let rawkey = coerce key
+      case M.lookup key keys of
+           Nothing -> do -- no other refs with this label
+             (lab', idx) <- if T.null rawkey ||
+                                 T.length rawkey > 999 ||
+                                 T.any (\c -> c == '[' || c == ']') rawkey
+                               then do
+                                 i <- getNextIndex
+                                 return (tshow i, i)
+                               else
+                                 return (render Nothing label, 0)
+             modify (\s -> s{
+               stRefs = (lab', target, attr) : refs,
+               stKeys = M.insert (getKey label)
+                           (M.insert (target, attr) idx mempty)
+                                 (stKeys s) })
+             return lab'
+
+           Just km ->    -- we have refs with this label
+             case M.lookup (target, attr) km of
+                  Just i -> do
+                    let lab' = render Nothing $
+                               label <> if i == 0
+                                           then mempty
+                                           else literal (tshow i)
+                    -- make sure it's in stRefs; it may be
+                    -- a duplicate that was printed in a previous
+                    -- block:
+                    when ((lab', target, attr) `notElem` refs) $
+                       modify (\s -> s{
+                         stRefs = (lab', target, attr) : refs })
+                    return lab'
+                  Nothing -> do -- but this one is to a new target
+                    i <- getNextIndex
+                    let lab' = tshow i
+                    modify (\s -> s{
+                       stRefs = (lab', target, attr) : refs,
+                       stKeys = M.insert key
+                                   (M.insert (target, attr) i km)
+                                         (stKeys s) })
+                    return lab'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text)
+inlineListToMarkdown opts lst = do
+  inlist <- asks envInList
+  go (if inlist then avoidBadWrapsInList lst else lst)
+  where go [] = return empty
+        go (x@Math{}:y@(Str t):zs)
+          | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058
+          = liftM2 (<>) (inlineToMarkdown opts x)
+              (go (RawInline (Format "html") "<!-- -->" : y : zs))
+        go (i:is) = case i of
+            Link {} -> case is of
+                -- If a link is followed by another link, or '[', '(' or ':'
+                -- then we don't shortcut
+                Link {}:_                                       -> unshortcutable
+                Space:Link {}:_                                 -> unshortcutable
+                Space:(Str(thead -> Just '[')):_                -> unshortcutable
+                Space:(RawInline _ (thead -> Just '[')):_       -> unshortcutable
+                Space:(Cite _ _):_                              -> unshortcutable
+                SoftBreak:Link {}:_                             -> unshortcutable
+                SoftBreak:(Str(thead -> Just '[')):_            -> unshortcutable
+                SoftBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
+                SoftBreak:(Cite _ _):_                          -> unshortcutable
+                LineBreak:Link {}:_                             -> unshortcutable
+                LineBreak:(Str(thead -> Just '[')):_            -> unshortcutable
+                LineBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
+                LineBreak:(Cite _ _):_                          -> unshortcutable
+                (Cite _ _):_                                    -> unshortcutable
+                Str (thead -> Just '['):_                       -> unshortcutable
+                Str (thead -> Just '('):_                       -> unshortcutable
+                Str (thead -> Just ':'):_                       -> unshortcutable
+                (RawInline _ (thead -> Just '[')):_             -> unshortcutable
+                (RawInline _ (thead -> Just '(')):_             -> unshortcutable
+                (RawInline _ (thead -> Just ':')):_             -> unshortcutable
+                (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable
+                _                                               -> shortcutable
+            _ -> shortcutable
+          where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
+                unshortcutable = do
+                    iMark <- local
+                             (\env -> env { envRefShortcutable = False })
+                             (inlineToMarkdown opts i)
+                    fmap (iMark <>) (go is)
+                thead = fmap fst . T.uncons
+
+isSp :: Inline -> Bool
+isSp Space     = True
+isSp SoftBreak = True
+isSp _         = False
+
+avoidBadWrapsInList :: [Inline] -> [Inline]
+avoidBadWrapsInList [] = []
+avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s =
+  Str (" >" <> cs) : avoidBadWrapsInList xs
+avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))]
+  | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]]
+avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs)
+  | T.null cs && isSp s && c `elem` ['-','*','+'] =
+    Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (s:Str cs:Space:xs)
+  | isSp s && isOrderedListMarker cs =
+    Str (" " <> cs) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList [s, Str cs]
+  | isSp s && isOrderedListMarker cs = [Str $ " " <> cs]
+avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
+
+isOrderedListMarker :: Text -> Bool
+isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) &&
+              isRight (runParser (anyOrderedListMarker >> eof)
+                       defaultParserState "" xs)
+ where
+  isRight (Right _) = True
+  isRight (Left  _) = False
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text)
+inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) =
+  case lookup "data-emoji" kvs of
+       Just emojiname | isEnabled Ext_emoji opts ->
+            return $ ":" <> literal emojiname <> ":"
+       _ -> inlineToMarkdown opts (Str s)
+inlineToMarkdown opts (Span attrs ils) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts ils
+  return $ case attrs of
+             (_,["csl-block"],_) -> (cr <>)
+             (_,["csl-left-margin"],_) -> (cr <>)
+             (_,["csl-indent"],_) -> (cr <>)
+             _ -> id
+         $ case variant of
+                PlainText -> contents
+                _     | attrs == nullAttr -> contents
+                      | isEnabled Ext_bracketed_spans opts ->
+                        let attrs' = if attrs /= nullAttr
+                                        then attrsToMarkdown attrs
+                                        else empty
+                        in "[" <> contents <> "]" <> attrs'
+                      | isEnabled Ext_raw_html opts ||
+                        isEnabled Ext_native_spans opts ->
+                        tagWithAttrs "span" attrs <> contents <> literal "</span>"
+                      | otherwise -> contents
+inlineToMarkdown _ (Emph []) = return empty
+inlineToMarkdown opts (Emph lst) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts lst
+  return $ case variant of
+             PlainText
+               | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_"
+               | otherwise ->  contents
+             _ -> "*" <> contents <> "*"
+inlineToMarkdown _ (Underline []) = return empty
+inlineToMarkdown opts (Underline lst) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts lst
+  case variant of
+    PlainText -> return contents
+    _     | isEnabled Ext_bracketed_spans opts ->
+            return $ "[" <> contents <> "]" <> "{.ul}"
+          | isEnabled Ext_native_spans opts ->
+            return $ tagWithAttrs "span" ("", ["underline"], [])
+              <> contents
+              <> literal "</span>"
+          | isEnabled Ext_raw_html opts ->
+            return $ "<u>" <> contents <> "</u>"
+          | otherwise -> inlineToMarkdown opts (Emph lst)
+inlineToMarkdown _ (Strong []) = return empty
+inlineToMarkdown opts (Strong lst) = do
+  variant <- asks envVariant
+  case variant of
+    PlainText ->
+             inlineListToMarkdown opts $
+               if isEnabled Ext_gutenberg opts
+                  then capitalize lst
+                  else lst
+    _ -> do
+       contents <- inlineListToMarkdown opts lst
+       return $ "**" <> contents <> "**"
+inlineToMarkdown _ (Strikeout []) = return empty
+inlineToMarkdown opts (Strikeout lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_strikeout opts
+              then "~~" <> contents <> "~~"
+              else if isEnabled Ext_raw_html opts
+                       then "<s>" <> contents <> "</s>"
+                       else contents
+inlineToMarkdown _ (Superscript []) = return empty
+inlineToMarkdown opts (Superscript lst) =
+  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
+    contents <- inlineListToMarkdown opts lst
+    if isEnabled Ext_superscript opts
+       then return $ "^" <> contents <> "^"
+       else if isEnabled Ext_raw_html opts
+                then return $ "<sup>" <> contents <> "</sup>"
+                else
+                  case traverse toSuperscriptInline lst of
+                    Just xs' | not (writerPreferAscii opts)
+                      -> inlineListToMarkdown opts xs'
+                    _ -> do
+                      let rendered = render Nothing contents
+                      return $
+                        case mapM toSuperscript (T.unpack rendered) of
+                           Just r  -> literal $ T.pack r
+                           Nothing -> literal $ "^(" <> rendered <> ")"
+inlineToMarkdown _ (Subscript []) = return empty
+inlineToMarkdown opts (Subscript lst) =
+  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
+    contents <- inlineListToMarkdown opts lst
+    if isEnabled Ext_subscript opts
+       then return $ "~" <> contents <> "~"
+       else if isEnabled Ext_raw_html opts
+                then return $ "<sub>" <> contents <> "</sub>"
+                else
+                  case traverse toSubscriptInline lst of
+                    Just xs' | not (writerPreferAscii opts)
+                      -> inlineListToMarkdown opts xs'
+                    _ -> do
+                      let rendered = render Nothing contents
+                      return $
+                        case mapM toSuperscript (T.unpack rendered) of
+                           Just r  -> literal $ T.pack r
+                           Nothing -> literal $ "_(" <> rendered <> ")"
+inlineToMarkdown opts (SmallCaps lst) = do
+  variant <- asks envVariant
+  if variant /= PlainText &&
+     (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
+     then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst)
+     else inlineListToMarkdown opts $ capitalize lst
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_smart opts
+              then "'" <> contents <> "'"
+              else
+                if writerPreferAscii opts
+                   then "&lsquo;" <> contents <> "&rsquo;"
+                   else "‘" <> contents <> "’"
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_smart opts
+              then "\"" <> contents <> "\""
+              else
+                if writerPreferAscii opts
+                   then "&ldquo;" <> contents <> "&rdquo;"
+                   else "“" <> contents <> "”"
+inlineToMarkdown opts (Code attr str) = do
+  let tickGroups   = filter (T.any (== '`')) $ T.group str
+  let longest      = maybe 0 maximum $ nonEmpty $ map T.length tickGroups
+  let marker       = T.replicate (longest + 1) "`"
+  let spacer       = if longest == 0 then "" else " "
+  let attrsEnabled = isEnabled Ext_inline_code_attributes opts ||
+                     isEnabled Ext_attributes opts
+  let attrs        = if attrsEnabled && attr /= nullAttr
+                        then attrsToMarkdown attr
+                        else empty
+  variant <- asks envVariant
+  case variant of
+     PlainText -> return $ literal str
+     _     ->  return $ literal
+                  (marker <> spacer <> str <> spacer <> marker) <> attrs
+inlineToMarkdown opts (Str str) = do
+  variant <- asks envVariant
+  let str' = (if writerPreferAscii opts
+                 then toHtml5Entities
+                 else id) .
+             (if isEnabled Ext_smart opts
+                 then unsmartify opts
+                 else id) .
+             (if variant == PlainText
+                 then id
+                 else escapeText opts) $ str
+  return $ literal str'
+inlineToMarkdown opts (Math InlineMath str) =
+  case writerHTMLMathMethod opts of
+       WebTeX url -> inlineToMarkdown opts
+                       (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str))
+       _ | isEnabled Ext_tex_math_dollars opts ->
+             return $ "$" <> literal str <> "$"
+         | isEnabled Ext_tex_math_single_backslash opts ->
+             return $ "\\(" <> literal str <> "\\)"
+         | isEnabled Ext_tex_math_double_backslash opts ->
+             return $ "\\\\(" <> literal str <> "\\\\)"
+         | otherwise -> do
+             variant <- asks envVariant
+             texMathToInlines InlineMath str >>=
+               inlineListToMarkdown opts .
+                 (if variant == PlainText then makeMathPlainer else id)
+inlineToMarkdown opts (Math DisplayMath str) =
+  case writerHTMLMathMethod opts of
+      WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
+             inlineToMarkdown opts (Image nullAttr [Str str]
+                    (url <> T.pack (urlEncode $ T.unpack str), str))
+      _ | isEnabled Ext_tex_math_dollars opts ->
+            return $ "$$" <> literal str <> "$$"
+        | isEnabled Ext_tex_math_single_backslash opts ->
+            return $ "\\[" <> literal str <> "\\]"
+        | isEnabled Ext_tex_math_double_backslash opts ->
+            return $ "\\\\[" <> literal str <> "\\\\]"
+        | otherwise -> (\x -> cr <> x <> cr) `fmap`
+            (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
+inlineToMarkdown opts il@(RawInline f str) = do
+  let tickGroups = filter (T.any (== '`')) $ T.group str
+  let numticks   = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups))
+  variant <- asks envVariant
+  let Format fmt = f
+  let rawAttribInline = return $
+         literal (T.replicate numticks "`") <> literal str <>
+         literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}"
+  let renderEmpty = mempty <$ report (InlineNotRendered il)
+  case variant of
+    PlainText -> renderEmpty
+    Commonmark
+      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
+         -> return $ literal str
+    Markdown
+      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+                  "markdown_mmd", "markdown_strict"]
+         -> return $ literal str
+    _ | isEnabled Ext_raw_attribute opts -> rawAttribInline
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_raw_html opts
+         -> return $ literal str
+      | f `elem` ["latex", "tex"]
+      , isEnabled Ext_raw_tex opts
+         -> return $ literal str
+    _ -> renderEmpty
+
+
+inlineToMarkdown opts LineBreak = do
+  variant <- asks envVariant
+  if variant == PlainText || isEnabled Ext_hard_line_breaks opts
+     then return cr
+     else return $
+          if isEnabled Ext_escaped_line_breaks opts
+             then "\\" <> cr
+             else "  " <> cr
+inlineToMarkdown _ Space = do
+  escapeSpaces <- asks envEscapeSpaces
+  return $ if escapeSpaces then "\\ " else space
+inlineToMarkdown opts SoftBreak = do
+  escapeSpaces <- asks envEscapeSpaces
+  let space' = if escapeSpaces then "\\ " else space
+  return $ case writerWrapText opts of
+                WrapNone     -> space'
+                WrapAuto     -> space'
+                WrapPreserve -> cr
+inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (Cite (c:cs) lst)
+  | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
+  | otherwise =
+      if citationMode c == AuthorInText
+         then do
+           suffs <- inlineListToMarkdown opts $ citationSuffix c
+           rest <- mapM convertOne cs
+           let inbr = suffs <+> joincits rest
+               br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
+           return $ literal ("@" <> maybeInBraces (citationId c)) <+> br
+         else do
+           cits <- mapM convertOne (c:cs)
+           return $ literal "[" <> joincits cits <> literal "]"
+  where
+        maybeInBraces key =
+          case readWith (citeKey False >> spaces >> eof)
+                 defaultParserState ("@" <> key) of
+            Left _  -> "{" <> key <> "}"
+            Right _ -> key
+        joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
+        convertOne Citation { citationId      = k
+                            , citationPrefix  = pinlines
+                            , citationSuffix  = sinlines
+                            , citationMode    = m }
+                               = do
+           pdoc <- inlineListToMarkdown opts pinlines
+           sdoc <- inlineListToMarkdown opts sinlines
+           let k' = literal (modekey m <> "@" <> maybeInBraces k)
+               r = case sinlines of
+                        Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
+                        _                                         -> k' <+> sdoc
+           return $ pdoc <+> r
+        modekey SuppressAuthor = "-"
+        modekey _              = ""
+inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
+  variant <- asks envVariant
+  linktext <- inlineListToMarkdown opts txt
+  let linktitle = if T.null tit
+                     then empty
+                     else literal $ " \"" <> tit <> "\""
+  let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src)
+  let useAuto = isURI src &&
+                case txt of
+                      [Str s] | escapeURI s == srcSuffix -> True
+                      _       -> False
+  let useRefLinks = writerReferenceLinks opts && not useAuto
+  shortcutable <- asks envRefShortcutable
+  let useShortcutRefLinks = shortcutable &&
+                            isEnabled Ext_shortcut_reference_links opts
+  reftext <- if useRefLinks
+                then literal <$> getReference attr linktext (src, tit)
+                else return mempty
+  case variant of
+    PlainText
+      | useAuto -> return $ literal srcSuffix
+      | otherwise -> return linktext
+    _ | useAuto -> return $ "<" <> literal srcSuffix <> ">"
+      | useRefLinks ->
+           let first  = "[" <> linktext <> "]"
+               second = if getKey linktext == getKey reftext
+                           then if useShortcutRefLinks
+                                   then ""
+                                   else "[]"
+                           else "[" <> reftext <> "]"
+           in  return $ first <> second
+      | isEnabled Ext_raw_html opts
+      , not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts)
+      , attr /= nullAttr -> -- use raw HTML to render attributes
+          literal . T.strip <$>
+            writeHtml5String opts{ writerTemplate = Nothing }
+            (Pandoc nullMeta [Plain [lnk]])
+      | otherwise -> return $
+         "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <>
+         linkAttributes opts attr
+inlineToMarkdown opts img@(Image attr alternate (source, tit))
+  | isEnabled Ext_raw_html opts &&
+    not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) &&
+    attr /= nullAttr = -- use raw HTML
+    literal . T.strip <$>
+      writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
+  | otherwise = do
+  variant <- asks envVariant
+  let txt = if null alternate || alternate == [Str source]
+                                 -- to prevent autolinks
+               then [Str ""]
+               else alternate
+  linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
+  return $ case variant of
+             PlainText -> "[" <> linkPart <> "]"
+             _     -> "!" <> linkPart
+inlineToMarkdown opts (Note contents) = do
+  modify (\st -> st{ stNotes = contents : stNotes st })
+  st <- get
+  let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1)
+  if isEnabled Ext_footnotes opts
+     then return $ "[^" <> ref <> "]"
+     else return $ "[" <> ref <> "]"
+
+makeMathPlainer :: [Inline] -> [Inline]
+makeMathPlainer = walk go
+  where
+  go (Emph xs) = Span nullAttr xs
+  go x         = x
+
+toSubscriptInline :: Inline -> Maybe Inline
+toSubscriptInline Space = Just Space
+toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
+toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s)
+toSubscriptInline LineBreak = Just LineBreak
+toSubscriptInline SoftBreak = Just SoftBreak
+toSubscriptInline _ = Nothing
+
+toSuperscriptInline :: Inline -> Maybe Inline
+toSuperscriptInline Space = Just Space
+toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
+toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s)
+toSuperscriptInline LineBreak = Just LineBreak
+toSuperscriptInline SoftBreak = Just SoftBreak
+toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs
new file mode 100644
index 000000000..a1d0d14e4
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown/Types.hs
@@ -0,0 +1,81 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+   Module      : Text.Pandoc.Writers.Markdown.Types
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.Markdown.Types (
+  MarkdownVariant(..),
+  WriterState(..),
+  WriterEnv(..),
+  Notes,
+  Ref,
+  Refs,
+  MD,
+  evalMD
+  ) where
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Default
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Text (Text)
+import Text.Pandoc.Parsing (Key)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+
+type Notes = [[Block]]
+type Ref   = (Text, Target, Attr)
+type Refs  = [Ref]
+
+type MD m = ReaderT WriterEnv (StateT WriterState m)
+
+evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
+evalMD md env st = evalStateT (runReaderT md env) st
+
+data WriterEnv = WriterEnv { envInList          :: Bool
+                           , envVariant         :: MarkdownVariant
+                           , envRefShortcutable :: Bool
+                           , envBlockLevel      :: Int
+                           , envEscapeSpaces    :: Bool
+                           }
+
+data MarkdownVariant =
+      PlainText
+    | Commonmark
+    | Markdown
+    deriving (Show, Eq)
+
+instance Default WriterEnv
+  where def = WriterEnv { envInList          = False
+                        , envVariant         = Markdown
+                        , envRefShortcutable = True
+                        , envBlockLevel      = 0
+                        , envEscapeSpaces    = False
+                        }
+
+data WriterState = WriterState { stNotes   :: Notes
+                               , stPrevRefs :: Refs
+                               , stRefs    :: Refs
+                               , stKeys    :: M.Map Key
+                                                (M.Map (Target, Attr) Int)
+                               , stLastIdx  :: Int
+                               , stIds     :: Set.Set Text
+                               , stNoteNum :: Int
+                               }
+
+instance Default WriterState
+  where def = WriterState{ stNotes = []
+                         , stPrevRefs = []
+                         , stRefs = []
+                         , stKeys = M.empty
+                         , stLastIdx = 0
+                         , stIds = Set.empty
+                         , stNoteNum = 1
+                         }
+
+
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index d1912caa6..5029be69f 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.MediaWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 96914d3c6..97c23f24d 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Ms
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -23,6 +23,7 @@ module Text.Pandoc.Writers.Ms ( writeMs ) where
 import Control.Monad.State.Strict
 import Data.Char (isLower, isUpper, ord)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Data.Maybe (catMaybes)
 import Data.Text (Text)
@@ -244,13 +245,17 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) =
       aligncode AlignDefault = "l"
   in do
   caption' <- inlineListToMs' opts caption
-  let iwidths = if all (== 0) widths
-                   then repeat ""
-                   else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths
+  let isSimple = all (== 0) widths
+  let totalWidth = 70
   -- 78n default width - 8n indent = 70n
   let coldescriptions = literal $ T.unwords
-                        (zipWith (\align width -> aligncode align <> width)
-                        alignments iwidths) <> "."
+                        (zipWith (\align width -> aligncode align <>
+                                    if width == 0
+                                       then ""
+                                       else T.pack $
+                                              printf "w(%0.1fn)"
+                                              (totalWidth * width))
+                        alignments widths) <> "."
   colheadings <- mapM (blockListToMs opts) headers
   let makeRow cols = literal "T{" $$
                      vcat (intersperse (literal "T}\tT{") cols) $$
@@ -259,12 +264,26 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) =
                         then empty
                         else makeRow colheadings $$ char '_'
   body <- mapM (\row -> do
-                         cols <- mapM (blockListToMs opts) row
+                         cols <- mapM (\(cell, w) ->
+                                   (if isSimple
+                                       then id
+                                       else (literal (".nr LL " <>
+                                              T.pack (printf "%0.1fn"
+                                                (w * totalWidth))) $$)) <$>
+                                   blockListToMs opts cell) (zip row widths)
                          return $ makeRow cols) rows
   setFirstPara
   return $ literal ".PP" $$ caption' $$
+           literal ".na" $$ -- we don't want justification in table cells
+           (if isSimple
+               then ""
+               else ".nr LLold \\n[LL]") $$
            literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$
-           colheadings' $$ vcat body $$ literal ".TE"
+           colheadings' $$ vcat body $$ literal ".TE" $$
+           (if isSimple
+               then ""
+               else ".nr LL \\n[LLold]") $$
+           literal ".ad"
 
 blockToMs opts (BulletList items) = do
   contents <- mapM (bulletListItemToMs opts) items
@@ -272,8 +291,7 @@ blockToMs opts (BulletList items) = do
   return (vcat contents)
 blockToMs opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 2 +
-                     maximum (map T.length markers)
+  let indent = 2 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $
               zip markers items
   setFirstPara
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index bf3265107..d5100f43f 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -31,6 +31,7 @@ import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace)
 import Data.Default
 import Data.List (intersperse, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Set as Set
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -158,7 +159,8 @@ simpleTable caption headers rows = do
   caption' <- inlineListToMuse caption
   headers' <- mapM blockListToMuse headers
   rows' <- mapM (mapM blockListToMuse) rows
-  let widthsInChars = maximum . map offset <$> transpose (headers' : rows')
+  let widthsInChars = maybe 0 maximum . nonEmpty . map offset <$>
+                       transpose (headers' : rows')
   let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks
         where sep' = lblock (T.length sep) $ literal sep
   let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars
@@ -238,7 +240,7 @@ blockToMuse (DefinitionList items) = do
           label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label
           let ind = offset' label' -- using Text.DocLayout.offset results in round trip failures
           hang ind (nowrap label') . vcat <$> mapM descriptionToMuse defs
-          where offset' d = maximum (0: map T.length
+          where offset' d = maximum (0 :| map T.length
                                          (T.lines $ render Nothing d))
         descriptionToMuse :: PandocMonad m
                           => [Block]
@@ -269,7 +271,8 @@ blockToMuse (Table _ blkCapt specs thead tbody tfoot) =
     (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
     blocksToDoc opts blocks =
       local (\env -> env { envOptions = opts }) $ blockListToMuse blocks
-    numcols = maximum (length aligns : length widths : map length (headers:rows))
+    numcols = maximum
+              (length aligns :| length widths : map length (headers:rows))
     isSimple = onlySimpleTableCells (headers : rows) && all (== 0) widths
 blockToMuse (Div _ bs) = flatBlockListToMuse bs
 blockToMuse Null = return empty
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 4d4dfca15..9c2ce805d 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Native
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index e41fb7176..e4eb4fd25 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.ODT
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -13,9 +13,10 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Codec.Archive.Zip
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
+import Data.Maybe (fromMaybe)
 import Data.Generics (everywhere', mkT)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
@@ -23,10 +24,11 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
@@ -34,13 +36,14 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
-                                   fixDisplayMath)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
+                                   fixDisplayMath, getLang)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
+import Text.Pandoc.XML.Light
 import Text.TeXMath
-import Text.XML.Light
+import qualified Text.XML.Light as XL
 
 newtype ODTState = ODTState { stEntries :: [Entry]
                          }
@@ -66,7 +69,7 @@ pandocToODT :: PandocMonad m
 pandocToODT opts doc@(Pandoc meta _) = do
   let title = docTitle meta
   let authors = docAuthors meta
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   lang <- toLang (getLang opts meta)
   refArchive <-
        case writerReferenceDoc opts of
@@ -172,24 +175,27 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
 updateStyleWithLang Nothing arch = return arch
 updateStyleWithLang (Just lang) arch = do
   epochtime <- floor `fmap` lift P.getPOSIXTime
-  return arch{ zEntries = [if eRelativePath e == "styles.xml"
-                              then case parseXMLDoc
-                                      (toStringLazy (fromEntry e)) of
-                                      Nothing -> e
-                                      Just d ->
-                                        toEntry "styles.xml" epochtime
-                                        ( fromStringLazy
-                                        . ppTopElement
-                                        . addLang lang $ d )
-                              else e
-                            | e <- zEntries arch] }
+  entries <- mapM (\e -> if eRelativePath e == "styles.xml"
+                            then case parseXMLElement
+                                    (toTextLazy (fromEntry e)) of
+                                    Left msg -> throwError $
+                                        PandocXMLError "styles.xml" msg
+                                    Right d -> return $
+                                      toEntry "styles.xml" epochtime
+                                      ( fromTextLazy
+                                      . TL.fromStrict
+                                      . ppTopElement
+                                      . addLang lang $ d )
+                            else return e) (zEntries arch)
+  return arch{ zEntries = entries }
 
+-- TODO FIXME avoid this generic traversal!
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langLanguage lang)
+                           = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langRegion lang)
+                           = Attr n (fromMaybe "" $ langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
@@ -235,8 +241,8 @@ transformPicMath _ (Math t math) = do
   case writeMathML dt <$> readTeX math of
        Left  _ -> return $ Math t math
        Right r -> do
-         let conf = useShortEmptyTags (const False) defaultConfigPP
-         let mathml = ppcTopElement conf r
+         let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP
+         let mathml = XL.ppcTopElement conf r
          epochtime <- floor `fmap` lift P.getPOSIXTime
          let dirname = "Formula-" ++ show (length entries) ++ "/"
          let fname = dirname ++ "content.xml"
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index ac991b594..0533d6c12 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.OOXML
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -29,32 +29,32 @@ import Control.Monad.Except (throwError)
 import Text.Pandoc.Error
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
+import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light as XML
+import Text.Pandoc.XML.Light
 
-mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode :: Node t => Text -> [(Text,Text)] -> t -> Element
 mknode s attrs =
   add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) .  node (nodename s)
 
-mktnode :: String -> [(String,String)] -> T.Text -> Element
-mktnode s attrs = mknode s attrs . T.unpack
+mktnode :: Text -> [(Text,Text)] -> T.Text -> Element
+mktnode s attrs = mknode s attrs
 
-nodename :: String -> QName
+nodename :: Text -> QName
 nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
-                             (xs,[])    -> (xs, Nothing)
-                             (ys, _:zs) -> (zs, Just ys)
+ where (name, prefix) = case T.break (==':') s of
+                          (xs,ys) -> case T.uncons ys of
+                                       Nothing     -> (xs, Nothing)
+                                       Just (_,zs) -> (zs, Just xs)
 
 toLazy :: B.ByteString -> BL.ByteString
 toLazy = BL.fromChunks . (:[])
 
 renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
-  UTF8.fromStringLazy (showElement elt)
+renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt))
 
 parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element
 parseXml refArchive distArchive relpath =
@@ -62,32 +62,32 @@ parseXml refArchive distArchive relpath =
          findEntryByPath relpath distArchive of
             Nothing -> throwError $ PandocSomeError $
                         T.pack relpath <> " missing in reference file"
-            Just e  -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
-                       Nothing -> throwError $ PandocSomeError $
-                                   T.pack relpath <> " corrupt in reference file"
-                       Just d  -> return d
+            Just e  -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of
+                       Left msg ->
+                         throwError $ PandocXMLError (T.pack relpath) msg
+                       Right d  -> return d
 
 -- Copied from Util
 
-attrToNSPair :: XML.Attr -> Maybe (String, String)
-attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair :: Attr -> Maybe (Text, Text)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 -- | Scales the image to fit the page
 -- sizes are passed in emu
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 810a94775..8c9229fc0 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE CPP               #-}
 {- |
    Module      : Text.Pandoc.Writers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 071a5542f..5f3224c2f 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE ViewPatterns      #-}
@@ -17,6 +18,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
 import Control.Arrow ((***), (>>>))
 import Control.Monad.State.Strict hiding (when)
 import Data.Char (chr)
+import Data.Foldable (find)
 import Data.List (sortOn, sortBy, foldl')
 import qualified Data.Map as Map
 import Data.Maybe (fromMaybe, isNothing)
@@ -24,7 +26,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
@@ -35,6 +37,7 @@ import Text.DocLayout
 import Text.Pandoc.Shared (linesToPara, tshow, blocksToInlines)
 import Text.Pandoc.Templates (renderTemplate)
 import qualified Text.Pandoc.Translations as Term (Term(Figure, Table))
+import Text.Pandoc.Walk
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
@@ -54,6 +57,11 @@ plainToPara x         = x
 
 type OD m = StateT WriterState m
 
+data ReferenceType
+  = HeaderRef
+  | TableRef
+  | ImageRef
+
 data WriterState =
     WriterState { stNotes          :: [Doc Text]
                 , stTableStyles    :: [Doc Text]
@@ -69,6 +77,7 @@ data WriterState =
                 , stImageId        :: Int
                 , stTableCaptionId :: Int
                 , stImageCaptionId :: Int
+                , stIdentTypes     :: [(Text,ReferenceType)]
                 }
 
 defaultWriterState :: WriterState
@@ -86,6 +95,7 @@ defaultWriterState =
                 , stImageId        = 1
                 , stTableCaptionId = 1
                 , stImageCaptionId = 1
+                , stIdentTypes     = []
                 }
 
 when :: Bool -> Doc Text -> Doc Text
@@ -227,7 +237,7 @@ handleSpaces s = case T.uncons s of
 -- | Convert Pandoc document to string in OpenDocument format.
 writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeOpenDocument opts (Pandoc meta blocks) = do
-  let defLang = Lang "en" "US" "" []
+  let defLang = Lang "en" (Just "US") Nothing [] [] []
   lang <- case lookupMetaString "lang" meta of
             "" -> pure defLang
             s  -> fromMaybe defLang <$> toLang (Just s)
@@ -243,6 +253,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do
                         meta
   ((body, metadata),s) <- flip runStateT
         defaultWriterState $ do
+           let collectInlineIdent (Image (ident,_,_) _ _) = [(ident,ImageRef)]
+               collectInlineIdent _                       = []
+           let collectBlockIdent (Header _ (ident,_,_) _)      = [(ident,HeaderRef)]
+               collectBlockIdent (Table (ident,_,_) _ _ _ _ _) = [(ident,TableRef)]
+               collectBlockIdent _                             = []
+           modify $ \s -> s{ stIdentTypes = query collectBlockIdent blocks ++ query collectInlineIdent blocks }
            m <- metaToContext opts
                   (blocksToOpenDocument opts)
                   (fmap chomp . inlinesToOpenDocument opts)
@@ -357,36 +373,32 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
 
 -- | Convert a Pandoc block element to OpenDocument.
 blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m (Doc Text)
-blockToOpenDocument o bs
-    | Plain          b <- bs = if null b
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs
-                             = figure attr c s t
-    | Para           b <- bs = if null b &&
-                                    not (isEnabled Ext_empty_paragraphs o)
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | LineBlock      b <- bs = blockToOpenDocument o $ linesToPara b
-    | Div attr xs      <- bs = mkDiv attr xs
-    | Header     i (ident,_,_) b
-                       <- bs = setFirstPara >> (inHeaderTags i ident
-                                  =<< inlinesToOpenDocument o b)
-    | BlockQuote     b <- bs = setFirstPara >> mkBlockQuote b
-    | DefinitionList b <- bs = setFirstPara >> defList b
-    | BulletList     b <- bs = setFirstPara >> bulletListToOpenDocument o b
-    | OrderedList  a b <- bs = setFirstPara >> orderedList a b
-    | CodeBlock    _ s <- bs = setFirstPara >> preformatted s
-    | Table a bc s th tb tf <- bs =  setFirstPara >> table (Ann.toTable a bc s th tb tf)
-    | HorizontalRule   <- bs = setFirstPara >> return (selfClosingTag "text:p"
-                                [ ("text:style-name", "Horizontal_20_Line") ])
-    | RawBlock f     s <- bs = if f == Format "opendocument"
-                                  then return $ text $ T.unpack s
-                                  else do
-                                    report $ BlockNotRendered bs
-                                    return empty
-    | Null             <- bs = return empty
-    | otherwise              = return empty
+blockToOpenDocument o = \case
+    Plain          b -> if null b
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t
+    Para           b -> if null b &&
+                           not (isEnabled Ext_empty_paragraphs o)
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    LineBlock      b -> blockToOpenDocument o $ linesToPara b
+    Div attr xs      -> mkDiv attr xs
+    Header     i (ident,_,_) b -> do
+      setFirstPara
+      inHeaderTags i ident =<< inlinesToOpenDocument o b
+    BlockQuote     b -> setFirstPara >> mkBlockQuote b
+    DefinitionList b -> setFirstPara >> defList b
+    BulletList     b -> setFirstPara >> bulletListToOpenDocument o b
+    OrderedList  a b -> setFirstPara >> orderedList a b
+    CodeBlock    _ s -> setFirstPara >> preformatted s
+    Table a bc s th tb tf -> setFirstPara >> table (Ann.toTable a bc s th tb tf)
+    HorizontalRule   -> setFirstPara >> return (selfClosingTag "text:p"
+                         [ ("text:style-name", "Horizontal_20_Line") ])
+    b@(RawBlock f s) -> if f == Format "opendocument"
+                        then return $ text $ T.unpack s
+                        else empty <$ report (BlockNotRendered b)
+    Null             -> return empty
     where
       defList       b = do setInDefinitionList True
                            r <- vcat  <$> mapM (deflistItemToOpenDocument o) b
@@ -411,7 +423,7 @@ blockToOpenDocument o bs
                            inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)]
                                       <$> orderedListToOpenDocument o pn b
       table :: PandocMonad m => Ann.Table -> OD m (Doc Text)
-      table (Ann.Table _ (Caption _ c) colspecs thead tbodies _) = do
+      table (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies _) = do
         tn <- length <$> gets stTableStyles
         pn <- length <$> gets stParaStyles
         let  genIds      = map chr [65..]
@@ -433,7 +445,7 @@ blockToOpenDocument o bs
                       then return empty
                       else inlinesToOpenDocument o (blocksToInlines c) >>=
                              if isEnabled Ext_native_numbering o
-                                then numberedTableCaption
+                                then numberedTableCaption ident
                                 else unNumberedCaption "TableCaption"
         th <- colHeadsToOpenDocument o (map fst paraHStyles) thead
         tr <- mapM (tableBodyToOpenDocument o (map fst paraStyles)) tbodies
@@ -442,36 +454,39 @@ blockToOpenDocument o bs
                           , ("table:style-name", name)
                           ] (vcat columns $$ th $$ vcat tr)
         return $ captionDoc $$ tableDoc
-      figure attr caption source title | null caption =
+      figure attr@(ident, _, _) caption source title | null caption =
         withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]]
                                   | otherwise    = do
         imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]]
         captionDoc <- inlinesToOpenDocument o caption >>=
                          if isEnabled Ext_native_numbering o
-                            then numberedFigureCaption
+                            then numberedFigureCaption ident
                             else unNumberedCaption "FigureCaption"
         return $ imageDoc $$ captionDoc
 
 
-numberedTableCaption :: PandocMonad m => Doc Text -> OD m (Doc Text)
-numberedTableCaption caption = do
+numberedTableCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text)
+numberedTableCaption ident caption = do
     id' <- gets stTableCaptionId
     modify (\st -> st{ stTableCaptionId = id' + 1 })
     capterm <- translateTerm Term.Table
-    return $ numberedCaption "TableCaption" capterm "Table" id' caption
+    return $ numberedCaption "TableCaption" capterm "Table" id' ident caption
 
-numberedFigureCaption :: PandocMonad m => Doc Text -> OD m (Doc Text)
-numberedFigureCaption caption = do
+numberedFigureCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text)
+numberedFigureCaption ident caption = do
     id' <- gets stImageCaptionId
     modify (\st -> st{ stImageCaptionId = id' + 1 })
     capterm <- translateTerm Term.Figure
-    return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption
+    return $ numberedCaption "FigureCaption" capterm  "Illustration" id' ident caption
 
-numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text
-numberedCaption style term name num caption =
+numberedCaption :: Text -> Text -> Text -> Int -> Text -> Doc Text -> Doc Text
+numberedCaption style term name num ident caption =
     let t = text $ T.unpack term
         r = num - 1
-        s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r),
+        ident' = case ident of
+          "" -> "ref" <> name <> tshow r
+          _ -> ident
+        s = inTags False "text:sequence" [ ("text:ref-name", ident'),
                                            ("text:name", name),
                                            ("text:formula", "ooow:" <> name <> "+1"),
                                            ("style:num-format", "1") ] $ text $ show num
@@ -607,7 +622,9 @@ inlineToOpenDocument o ils
                        else do
                          report $ InlineNotRendered ils
                          return empty
-    Link _ l (s,t) ->  mkLink s t <$> inlinesToOpenDocument o l
+    Link _ l (s,t) -> do
+      identTypes <- gets stIdentTypes
+      mkLink o identTypes s t <$> inlinesToOpenDocument o l
     Image attr _ (s,t) -> mkImg attr s t
     Note        l  -> mkNote l
     where
@@ -619,10 +636,6 @@ inlineToOpenDocument o ils
       unhighlighted s = inlinedCode $ preformatted s
       preformatted s = handleSpaces $ escapeStringForXML s
       inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s
-      mkLink   s t = inTags False "text:a" [ ("xlink:type" , "simple")
-                                           , ("xlink:href" , s       )
-                                           , ("office:name", t       )
-                                           ] . inSpanTags "Definition"
       mkImg (_, _, kvs) s _ = do
                id' <- gets stImageId
                modify (\st -> st{ stImageId = id' + 1 })
@@ -659,6 +672,45 @@ inlineToOpenDocument o ils
         addNote nn
         return nn
 
+mkLink :: WriterOptions -> [(Text,ReferenceType)] -> Text -> Text -> Doc Text -> Doc Text
+mkLink o identTypes s t d =
+  let maybeIdentAndType = case T.uncons s of
+                            Just ('#', ident) -> find ((ident ==) . fst) identTypes
+                            _                 -> Nothing
+      d' = inSpanTags "Definition" d
+      ref refType format ident       = inTags False refType
+                                       [ ("text:reference-format", format ),
+                                         ("text:ref-name", ident) ]
+      inlineSpace                    = selfClosingTag "text:s" []
+      bookmarkRef                    = ref "text:bookmark-ref"
+      bookmarkRefNumber ident        = bookmarkRef "number" ident mempty
+      bookmarkRefName ident          = bookmarkRef "text" ident d
+      bookmarkRefNameNumber ident    = bookmarkRefNumber ident <> inlineSpace <> bookmarkRefName ident
+      bookmarkRef'
+        | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = bookmarkRefNameNumber
+        | isEnabled Ext_xrefs_name o                                 = bookmarkRefName
+        | otherwise                                                  = bookmarkRefNumber
+      sequenceRef                    = ref "text:sequence-ref"
+      sequenceRefNumber ident        = sequenceRef "value" ident mempty
+      sequenceRefName ident          = sequenceRef "caption" ident d
+      sequenceRefNameNumber ident    = sequenceRefNumber ident <> inlineSpace <> sequenceRefName ident
+      sequenceRef'
+        | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = sequenceRefNameNumber
+        | isEnabled Ext_xrefs_name o                                 = sequenceRefName
+        | otherwise                                                  = sequenceRefNumber
+      link = inTags False "text:a" [ ("xlink:type" , "simple")
+                                          , ("xlink:href" , s       )
+                                          , ("office:name", t       )
+                                          ] d'
+      linkOrReference = case maybeIdentAndType of
+                          Just (ident, HeaderRef) -> bookmarkRef' ident
+                          Just (ident, TableRef)  -> sequenceRef' ident
+                          Just (ident, ImageRef)  -> sequenceRef' ident
+                          _                       -> link
+      in if isEnabled Ext_xrefs_name o || isEnabled Ext_xrefs_number o
+            then linkOrReference
+            else link
+
 bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text]))
 bulletListStyle l = do
   let doStyles  i = inTags True "text:list-level-style-bullet"
@@ -819,34 +871,33 @@ data TextStyle = Italic
 textStyleAttr :: Map.Map Text Text
               -> TextStyle
               -> Map.Map Text Text
-textStyleAttr m s
-    | Italic <- s = Map.insert "fo:font-style" "italic" .
-                    Map.insert "style:font-style-asian" "italic" .
-                    Map.insert "style:font-style-complex" "italic" $ m
-    | Bold   <- s = Map.insert "fo:font-weight" "bold" .
-                    Map.insert "style:font-weight-asian" "bold" .
-                    Map.insert "style:font-weight-complex" "bold" $ m
-    | Under  <- s = Map.insert "style:text-underline-style" "solid" .
-                    Map.insert "style:text-underline-width" "auto" .
-                    Map.insert "style:text-underline-color" "font-color" $ m
-    | Strike <- s = Map.insert "style:text-line-through-style" "solid" m
-    | Sub    <- s = Map.insert "style:text-position" "sub 58%" m
-    | Sup    <- s = Map.insert "style:text-position" "super 58%" m
-    | SmallC <- s = Map.insert "fo:font-variant" "small-caps" m
-    | Pre    <- s = Map.insert "style:font-name" "Courier New" .
-                    Map.insert "style:font-name-asian" "Courier New" .
-                    Map.insert "style:font-name-complex" "Courier New" $ m
-    | Language lang <- s
-                  = Map.insert "fo:language" (langLanguage lang) .
-                    Map.insert "fo:country" (langRegion lang) $ m
-    | otherwise   = m
+textStyleAttr m = \case
+  Italic -> Map.insert "fo:font-style" "italic" .
+                Map.insert "style:font-style-asian" "italic" .
+                Map.insert "style:font-style-complex" "italic" $ m
+  Bold   -> Map.insert "fo:font-weight" "bold" .
+                Map.insert "style:font-weight-asian" "bold" .
+                Map.insert "style:font-weight-complex" "bold" $ m
+  Under  -> Map.insert "style:text-underline-style" "solid" .
+                Map.insert "style:text-underline-width" "auto" .
+                Map.insert "style:text-underline-color" "font-color" $ m
+  Strike -> Map.insert "style:text-line-through-style" "solid" m
+  Sub    -> Map.insert "style:text-position" "sub 58%" m
+  Sup    -> Map.insert "style:text-position" "super 58%" m
+  SmallC -> Map.insert "fo:font-variant" "small-caps" m
+  Pre    -> Map.insert "style:font-name" "Courier New" .
+            Map.insert "style:font-name-asian" "Courier New" .
+            Map.insert "style:font-name-complex" "Courier New" $ m
+  Language lang ->
+            Map.insert "fo:language" (langLanguage lang) .
+            maybe id (Map.insert "fo:country") (langRegion lang) $ m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
 withLangFromAttr (_,_,kvs) action =
   case lookup "lang" kvs of
        Nothing -> action
        Just l  ->
-         case parseBCP47 l of
+         case parseLang l of
               Right lang -> withTextStyle (Language lang) action
               Left _ -> do
                 report $ InvalidLang l
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 2af93017d..d404f1c8d 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -3,8 +3,8 @@
 {- |
    Module      : Text.Pandoc.Writers.Org
    Copyright   : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
-                   2010-2020 John MacFarlane <jgm@berkeley.edu>
-                   2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+                   2010-2021 John MacFarlane <jgm@berkeley.edu>
+                   2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
@@ -17,8 +17,9 @@ Org-Mode:  <http://orgmode.org>
 -}
 module Text.Pandoc.Writers.Org (writeOrg) where
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum)
+import Data.Char (isAlphaNum, isDigit)
 import Data.List (intersect, intersperse, partition, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -83,12 +84,15 @@ noteToOrg num note = do
 
 -- | Escape special characters for Org.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing
-               [ ('\x2014',"---")
-               , ('\x2013',"--")
-               , ('\x2019',"'")
-               , ('\x2026',"...")
-               ]
+escapeString t
+  | T.all (\c -> c < '\x2013' || c > '\x2026') t = t
+  | otherwise = T.concatMap escChar t
+  where
+   escChar '\x2013' = "--"
+   escChar '\x2014' = "---"
+   escChar '\x2019' = "'"
+   escChar '\x2026' = "..."
+   escChar c        = T.singleton c
 
 isRawFormat :: Format -> Bool
 isRawFormat f =
@@ -163,7 +167,7 @@ blockToOrg (Table _ blkCapt specs thead tbody tfoot) =  do
                    else "#+caption: " <> caption''
   headers' <- mapM blockListToOrg headers
   rawRows <- mapM (mapM blockListToOrg) rows
-  let numChars = maximum . map offset
+  let numChars = maybe 0 maximum . nonEmpty . map offset
   -- FIXME: width is not being used.
   let widthsInChars =
        map numChars $ transpose (headers' : rawRows)
@@ -198,7 +202,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do
                     x         -> x
   let markers = take (length items) $ orderedListMarkers
                                       (start, Decimal, delim')
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToOrg markers' items
@@ -213,25 +217,35 @@ blockToOrg (DefinitionList items) = do
 -- | Convert bullet list item (list of blocks) to Org.
 bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text)
 bulletListItemToOrg items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang 2 "- " contents $$
           if endsWithPlain items
              then cr
              else blankline
 
-
 -- | Convert ordered list item (a list of blocks) to Org.
 orderedListItemToOrg :: PandocMonad m
                      => Text   -- ^ marker for list item
                      -> [Block]  -- ^ list item (list of blocks)
                      -> Org m (Doc Text)
 orderedListItemToOrg marker items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang (T.length marker + 1) (literal marker <> space) contents $$
           if endsWithPlain items
              then cr
              else blankline
 
+-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
+-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@).
+taskListItemToOrg :: Extensions -> [Block] -> [Block]
+taskListItemToOrg = handleTaskListItem toOrg
+  where
+    toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is
+    toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is
+    toOrg is = is
+
 -- | Convert definition list item (label, list of blocks) to Org.
 definitionListItemToOrg :: PandocMonad m
                         => ([Inline], [[Block]]) -> Org m (Doc Text)
@@ -337,16 +351,20 @@ inlineListToOrg :: PandocMonad m
                 => [Inline]
                 -> Org m (Doc Text)
 inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst)
-  where fixMarkers [] = []  -- prevent note refs and list markers from wrapping, see #4171
+  where -- Prevent note refs and list markers from wrapping, see #4171
+        -- and #7132.
+        fixMarkers [] = []
         fixMarkers (Space : x : rest) | shouldFix x =
           Str " " : x : fixMarkers rest
         fixMarkers (SoftBreak : x : rest) | shouldFix x =
           Str " " : x : fixMarkers rest
         fixMarkers (x : rest) = x : fixMarkers rest
 
-        shouldFix Note{} = True -- Prevent footnotes
+        shouldFix Note{} = True    -- Prevent footnotes
         shouldFix (Str "-") = True -- Prevent bullet list items
-        -- TODO: prevent ordered list items
+        shouldFix (Str x)          -- Prevent ordered list items
+          | Just (cs, c) <- T.unsnoc x = T.all isDigit cs &&
+                                         (c == '.' || c == ')')
         shouldFix _ = False
 
 -- | Convert Pandoc inline element to Org.
@@ -386,9 +404,11 @@ inlineToOrg (Str str) = return . literal $ escapeString str
 inlineToOrg (Math t str) = do
   modify $ \st -> st{ stHasMath = True }
   return $ if t == InlineMath
-              then "$" <> literal str <> "$"
-              else "$$" <> literal str <> "$$"
+              then "\\(" <> literal str <> "\\)"
+              else "\\[" <> literal str <> "\\]"
 inlineToOrg il@(RawInline f str)
+  | elem f ["tex", "latex"] && T.isPrefixOf "\\begin" str =
+    return $ cr <> literal str <> cr
   | isRawFormat f = return $ literal str
   | otherwise     = do
       report $ InlineNotRendered il
diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs
index ca3b74a1d..e0573beca 100644
--- a/src/Text/Pandoc/Writers/Powerpoint.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint.hs
@@ -39,5 +39,5 @@ writePowerpoint opts (Pandoc meta blks) = do
   let blks' = walk fixDisplayMath blks
   let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks')
   mapM_ report logMsgs
-  archv <- presentationToArchive opts pres
+  archv <- presentationToArchive opts meta pres
   return $ fromArchive archv
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 603a84acc..157810216 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -20,15 +20,16 @@ import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader
 import Control.Monad.State
 import Codec.Archive.Zip
-import Data.Char (toUpper)
 import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
 import Data.Default
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Read
 import Data.Time (formatTime, defaultTimeLocale)
 import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
-import Text.XML.Light
+import Text.Pandoc.XML.Light as XML
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -37,17 +38,21 @@ import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Options
 import Text.Pandoc.MIME
 import qualified Data.ByteString.Lazy as BL
+import Text.Pandoc.Writers.Shared (metaToContext)
 import Text.Pandoc.Writers.OOXML
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isJust)
 import Text.Pandoc.ImageSize
 import Control.Applicative ((<|>))
 import System.FilePath.Glob
-import Text.DocTemplates (FromContext(lookupContext))
+import Text.DocTemplates (FromContext(lookupContext), Context)
+import Text.DocLayout (literal)
 import Text.TeXMath
 import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
+import Text.Pandoc.Shared (tshow, stringify)
 import Skylighting (fromColor)
+import Data.List.NonEmpty (nonEmpty)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
 type EMU = Integer
@@ -77,19 +82,24 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
 getPresentationSize refArchive distArchive = do
   entry <- findEntryByPath "ppt/presentation.xml" refArchive  `mplus`
            findEntryByPath "ppt/presentation.xml" distArchive
-  presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+  presElement <- either (const Nothing) return $
+                   parseXMLElement $ UTF8.toTextLazy $ fromEntry entry
   let ns = elemToNameSpaces presElement
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
   cyS <- findAttr (QName "cy" Nothing Nothing) sldSize
-  (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String)
-  (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String)
+  cx <- readTextAsInteger cxS
+  cy <- readTextAsInteger cyS
   return (cx `div` 12700, cy `div` 12700)
 
+readTextAsInteger :: Text -> Maybe Integer
+readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal
+
 data WriterEnv = WriterEnv { envRefArchive :: Archive
                            , envDistArchive :: Archive
                            , envUTCTime :: UTCTime
                            , envOpts :: WriterOptions
+                           , envContext :: Context Text
                            , envPresentationSize :: (Integer, Integer)
                            , envSlideHasHeader :: Bool
                            , envInList :: Bool
@@ -115,6 +125,7 @@ instance Default WriterEnv where
                   , envDistArchive = emptyArchive
                   , envUTCTime = posixSecondsToUTCTime 0
                   , envOpts = def
+                  , envContext = mempty
                   , envPresentationSize = (720, 540)
                   , envSlideHasHeader = False
                   , envInList = False
@@ -159,20 +170,16 @@ runP env st p = evalStateT (runReaderT p env) st
 
 --------------------------------------------------------------------
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText n = fmap T.pack . findAttr n
-
 monospaceFont :: Monad m => P m T.Text
 monospaceFont = do
-  vars <- writerVariables <$> asks envOpts
+  vars <- asks envContext
   case lookupContext "monofont" vars of
     Just s -> return s
     Nothing -> return "Courier"
 
--- Kept as string for XML.Light
-fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)]
+fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)]
 fontSizeAttributes RunProps { rPropForceSize = Just sz } =
-  return [("sz", show $ sz * 100)]
+  return [("sz", tshow $ sz * 100)]
 fontSizeAttributes _ = return []
 
 copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive
@@ -301,8 +308,9 @@ makeSpeakerNotesMap (Presentation _ slides) =
                                  then Nothing
                                  else Just n
 
-presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive
-presentationToArchive opts pres = do
+presentationToArchive :: PandocMonad m
+                      => WriterOptions -> Meta -> Presentation -> m Archive
+presentationToArchive opts meta pres = do
   distArchive <- toArchive . BL.fromStrict <$>
                       P.readDefaultDataFile "reference.pptx"
   refArchive <- case writerReferenceDoc opts of
@@ -310,7 +318,7 @@ presentationToArchive opts pres = do
                      Nothing -> toArchive . BL.fromStrict <$>
                         P.readDataFile "reference.pptx"
 
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
 
   presSize <- case getPresentationSize refArchive distArchive of
                 Just sz -> return sz
@@ -318,10 +326,18 @@ presentationToArchive opts pres = do
                            PandocSomeError
                            "Could not determine presentation size"
 
+  -- note, we need writerTemplate to be Just _ or metaToContext does
+  -- nothing
+  context <- metaToContext opts{ writerTemplate =
+                                  writerTemplate opts <|> Just mempty }
+                (return . literal . stringify)
+                (return . literal . stringify) meta
+
   let env = def { envRefArchive = refArchive
                 , envDistArchive = distArchive
                 , envUTCTime = utctime
                 , envOpts = opts
+                , envContext = context
                 , envPresentationSize = presSize
                 , envSlideIdMap = makeSlideIdMap pres
                 , envSpeakerNotesIdMap = makeSpeakerNotesMap pres
@@ -363,7 +379,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
 shapeHasId ns ident element
   | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element
   , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr
-  , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr =
+  , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr =
       nm == ident
   | otherwise = False
 
@@ -394,10 +410,10 @@ getShapeDimensions ns element
       ext <- findChild (elemName ns "a" "ext") xfrm
       cxS <- findAttr (QName "cx" Nothing Nothing) ext
       cyS <- findAttr (QName "cy" Nothing Nothing) ext
-      (x, _) <- listToMaybe $ reads xS
-      (y, _) <- listToMaybe $ reads yS
-      (cx, _) <- listToMaybe $ reads cxS
-      (cy, _) <- listToMaybe $ reads cyS
+      x <- readTextAsInteger xS
+      y <- readTextAsInteger yS
+      cx <- readTextAsInteger cxS
+      cy <- readTextAsInteger cyS
       return ((x `div` 12700, y `div` 12700),
               (cx `div` 12700, cy `div` 12700))
   | otherwise = Nothing
@@ -428,7 +444,7 @@ getContentShapeSize ns layout master
         Nothing -> do let mbSz =
                             findChild (elemName ns "p" "nvSpPr") sp >>=
                             findChild (elemName ns "p" "cNvPr") >>=
-                            findAttrText (QName "id" Nothing Nothing) >>=
+                            findAttr (QName "id" Nothing Nothing) >>=
                             flip getMasterShapeDimensionsById master
                       case mbSz of
                         Just sz' -> return sz'
@@ -437,10 +453,10 @@ getContentShapeSize ns layout master
 getContentShapeSize _ _ _ = throwError $ PandocSomeError
                             "Attempted to find content shape size in non-layout"
 
-buildSpTree :: NameSpaces -> Element -> [Element] -> Element
+buildSpTree :: NameSpaces -> Element -> [Content] -> Element
 buildSpTree ns spTreeElem newShapes =
   emptySpTreeElem { elContent = newContent }
-  where newContent = elContent emptySpTreeElem <> map Elem newShapes
+  where newContent = elContent emptySpTreeElem <> newShapes
         emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) }
         fn :: Content -> Bool
         fn (Elem e) = isElem ns "p" "nvGrpSpPr" e ||
@@ -448,8 +464,8 @@ buildSpTree ns spTreeElem newShapes =
         fn _        = True
 
 replaceNamedChildren :: NameSpaces
-                     -> String
-                     -> String
+                     -> Text
+                     -> Text
                      -> [Element]
                      -> Element
                      -> Element
@@ -472,15 +488,16 @@ registerLink link = do
   linkReg <- gets stLinkIds
   mediaReg <- gets stMediaIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+        Just xs -> maximum xs
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+        Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
       maxId = max maxLinkId maxMediaId
       slideLinks = case M.lookup curSlideId linkReg of
         Just mp -> M.insert (maxId + 1) link mp
@@ -495,20 +512,19 @@ registerMedia fp caption = do
   mediaReg <- gets stMediaIds
   globalIds <- gets stMediaGlobalIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+          Just ks -> maximum ks
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+          Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
       maxLocalId = max maxLinkId maxMediaId
 
-      maxGlobalId = case M.elems globalIds of
-        [] -> 0
-        ids -> maximum ids
+      maxGlobalId = maybe 0 maximum $ nonEmpty $ M.elems globalIds
 
   (imgBytes, mbMt) <- P.fetchItem $ T.pack fp
   let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x))
@@ -521,6 +537,7 @@ registerMedia fp caption = do
                  Just Eps  -> Just ".eps"
                  Just Svg  -> Just ".svg"
                  Just Emf  -> Just ".emf"
+                 Just Tiff -> Just ".tiff"
                  Nothing   -> Nothing
 
   let newGlobalId = fromMaybe (maxGlobalId + 1) (M.lookup fp globalIds)
@@ -652,10 +669,10 @@ createCaption contentShapeDimensions paraElements = do
                        ]
                      , mknode "p:spPr" []
                        [ mknode "a:xfrm" []
-                         [ mknode "a:off" [("x", show $ 12700 * x),
-                                           ("y", show $ 12700 * (y + cy - captionHeight))] ()
-                         , mknode "a:ext" [("cx", show $ 12700 * cx),
-                                           ("cy", show $ 12700 * captionHeight)] ()
+                         [ mknode "a:off" [("x", tshow $ 12700 * x),
+                                           ("y", tshow $ 12700 * (y + cy - captionHeight))] ()
+                         , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                                           ("cy", tshow $ 12700 * captionHeight)] ()
                          ]
                        , mknode "a:prstGeom" [("prst", "rect")]
                          [ mknode "a:avLst" [] ()
@@ -704,11 +721,13 @@ makePicElements layout picProps mInfo alt = do
                                      ,("noChangeAspect","1")] ()
   -- cNvPr will contain the link information so we do that separately,
   -- and register the link if necessary.
-  let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")]
+  let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo),
+                   ("id","0"),
+                   ("name","Picture 1")]
   cNvPr <- case picPropLink picProps of
     Just link -> do idNum <- registerLink link
                     return $ mknode "p:cNvPr" cNvPrAttr $
-                      mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] ()
+                      mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] ()
     Nothing   -> return $ mknode "p:cNvPr" cNvPrAttr ()
   let nvPicPr  = mknode "p:nvPicPr" []
                  [ cNvPr
@@ -716,13 +735,13 @@ makePicElements layout picProps mInfo alt = do
                  , mknode "p:nvPr" [] ()]
   let blipFill = mknode "p:blipFill" []
                  [ mknode "a:blip" [("r:embed", "rId" <>
-          show (mInfoLocalId mInfo))] ()
+                     tshow (mInfoLocalId mInfo))] ()
                  , mknode "a:stretch" [] $
                    mknode "a:fillRect" [] () ]
   let xfrm =    mknode "a:xfrm" []
-                [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] ()
-                , mknode "a:ext" [("cx",show dimX')
-                                 ,("cy",show dimY')] () ]
+                [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] ()
+                , mknode "a:ext" [("cx", tshow dimX')
+                                 ,("cy", tshow dimY')] () ]
   let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                  mknode "a:avLst" [] ()
   let ln =      mknode "a:ln" [("w","9525")]
@@ -744,8 +763,8 @@ makePicElements layout picProps mInfo alt = do
     else return [picShape]
 
 
-paraElemToElements :: PandocMonad m => ParaElem -> P m [Element]
-paraElemToElements Break = return [mknode "a:br" [] ()]
+paraElemToElements :: PandocMonad m => ParaElem -> P m [Content]
+paraElemToElements Break = return [Elem $ mknode "a:br" [] ()]
 paraElemToElements (Run rpr s) = do
   sizeAttrs <- fontSizeAttributes rpr
   let attrs = sizeAttrs <>
@@ -761,7 +780,7 @@ paraElemToElements (Run rpr s) = do
             Just DoubleStrike -> [("strike", "dblStrike")]
             Nothing -> []) <>
         (case rBaseline rpr of
-            Just n -> [("baseline", show n)]
+            Just n -> [("baseline", tshow n)]
             Nothing -> []) <>
         (case rCap rpr of
             Just NoCapitals -> [("cap", "none")]
@@ -778,42 +797,44 @@ paraElemToElements (Run rpr s) = do
                    return $ case link of
                      InternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              , ("action", "ppaction://hlinksldjump")
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                      -- external
                      ExternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                  Nothing -> return []
   let colorContents = case rSolidFill rpr of
                         Just color ->
                           case fromColor color of
-                            '#':hx ->  [mknode "a:solidFill" []
-                                        [mknode "a:srgbClr" [("val", map toUpper hx)] ()]
-                                       ]
+                            '#':hx ->
+                              [mknode "a:solidFill" []
+                                [mknode "a:srgbClr"
+                                  [("val", T.toUpper $ T.pack hx)] ()]]
                             _ -> []
                         Nothing -> []
   codeFont <- monospaceFont
   let codeContents =
-        [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr]
+        [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr]
   let propContents = linkProps <> colorContents <> codeContents
-  return [mknode "a:r" [] [ mknode "a:rPr" attrs propContents
-                          , mknode "a:t" [] $ T.unpack s
-                          ]]
+  return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents
+                                 , mknode "a:t" [] s
+                                 ]]
 paraElemToElements (MathElem mathType texStr) = do
   isInSpkrNotes <- asks envInSpeakerNotes
   if isInSpkrNotes
     then paraElemToElements $ Run def $ unTeXString texStr
     else do res <- convertMath writeOMML mathType (unTeXString texStr)
-            case res of
-              Right r -> return [mknode "a14:m" [] $ addMathInfo r]
+            case fromXLElement <$> res of
+              Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r]
               Left (Str s) -> paraElemToElements (Run def s)
               Left _       -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
-paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str ]
+paraElemToElements (RawOOXMLParaElem str) = return
+  [Text (CData CDataRaw str Nothing)]
 
 
 -- This is a bit of a kludge -- really requires adding an option to
@@ -821,9 +842,10 @@ paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str
 -- step at a time.
 addMathInfo :: Element -> Element
 addMathInfo element =
-  let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns")
-                       , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
-                       }
+  let mathspace =
+        Attr { attrKey = QName "m" Nothing (Just "xmlns")
+             , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
+             }
   in add_attr mathspace element
 
 -- We look through the element to see if it contains an a14:m
@@ -846,13 +868,13 @@ surroundWithMathAlternate element =
 paragraphToElement :: PandocMonad m => Paragraph -> P m Element
 paragraphToElement par = do
   let
-    attrs = [("lvl", show $ pPropLevel $ paraProps par)] <>
+    attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <>
             (case pPropMarginLeft (paraProps par) of
-               Just px -> [("marL", show $ pixelsToEmu px)]
+               Just px -> [("marL", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropIndent (paraProps par) of
-               Just px -> [("indent", show $ pixelsToEmu px)]
+               Just px -> [("indent", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropAlign (paraProps par) of
@@ -864,7 +886,7 @@ paragraphToElement par = do
     props = [] <>
             (case pPropSpaceBefore $ paraProps par of
                Just px -> [mknode "a:spcBef" [] [
-                              mknode "a:spcPts" [("val", show $ 100 * px)] ()
+                              mknode "a:spcPts" [("val", tshow $ 100 * px)] ()
                               ]
                           ]
                Nothing -> []
@@ -875,8 +897,9 @@ paragraphToElement par = do
                  [mknode "a:buAutoNum" (autoNumAttrs attrs') ()]
                Nothing -> [mknode "a:buNone" [] ()]
             )
-  paras <- concat <$> mapM paraElemToElements (paraElems par)
-  return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras
+  paras <- mapM paraElemToElements (paraElems par)
+  return $ mknode "a:p" [] $
+    [Elem $ mknode "a:pPr" attrs props] <> concat paras
 
 shapeToElement :: PandocMonad m => Element -> Shape -> P m Element
 shapeToElement layout (TextBox paras)
@@ -896,21 +919,22 @@ shapeToElement layout (TextBox paras)
 -- GraphicFrame and Pic should never reach this.
 shapeToElement _ _ = return $ mknode "p:sp" [] ()
 
-shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element]
+shapeToElements :: PandocMonad m => Element -> Shape -> P m [Content]
 shapeToElements layout (Pic picProps fp alt) = do
   mInfo <- registerMedia fp alt
   case mInfoExt mInfo of
-    Just _ ->
+    Just _ -> map Elem <$>
       makePicElements layout picProps mInfo alt
     Nothing -> shapeToElements layout $ TextBox [Paragraph def alt]
-shapeToElements layout (GraphicFrame tbls cptn) =
+shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$>
   graphicFrameToElements layout tbls cptn
-shapeToElements _ (RawOOXMLShape str) = return [ x | Elem x <- parseXML str ]
+shapeToElements _ (RawOOXMLShape str) = return
+  [Text (CData CDataRaw str Nothing)]
 shapeToElements layout shp = do
   element <- shapeToElement layout shp
-  return [element]
+  return [Elem element]
 
-shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element]
+shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Content]
 shapesToElements layout shps =
  concat <$> mapM (shapeToElements layout) shps
 
@@ -937,8 +961,10 @@ graphicFrameToElements layout tbls caption = do
             [mknode "p:ph" [("idx", "1")] ()]
           ]
         , mknode "p:xfrm" []
-          [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] ()
-          , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] ()
+          [ mknode "a:off" [("x", tshow $ 12700 * x),
+                            ("y", tshow $ 12700 * y)] ()
+          , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                            ("cy", tshow $ 12700 * cy)] ()
           ]
         ] <> elements
 
@@ -952,7 +978,7 @@ getDefaultTableStyle = do
   refArchive <- asks envRefArchive
   distArchive <- asks envDistArchive
   tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml"
-  return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst
+  return $ findAttr (QName "def" Nothing Nothing) tblStyleLst
 
 graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element
 graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
@@ -990,7 +1016,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
   let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells
 
   let mkgridcol w = mknode "a:gridCol"
-                       [("w", show ((12700 * w) :: Integer))] ()
+                       [("w", tshow ((12700 * w) :: Integer))] ()
   let hasHeader = not (all null hdrCells)
 
   mbDefTblStyle <- getDefaultTableStyle
@@ -999,7 +1025,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
                  , ("bandRow", if tblPrBandRow tblPr then "1" else "0")
                  ] (case mbDefTblStyle of
                       Nothing -> []
-                      Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty])
+                      Just sty -> [mknode "a:tableStyleId" [] sty])
 
   return $ mknode "a:graphic" []
     [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")]
@@ -1032,7 +1058,7 @@ findPHType ns spElem phType
         -- if it's a named PHType, we want to check that the attribute
         -- value matches.
         Just phElem | (PHType tp) <- phType ->
-                        case findAttrText (QName "type" Nothing Nothing) phElem of
+                        case findAttr (QName "type" Nothing Nothing) phElem of
                           Just tp' -> tp == tp'
                           Nothing -> False
         -- if it's an ObjType, we want to check that there is NO
@@ -1083,7 +1109,7 @@ contentToElement layout hdrShape shapes
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title"] hdrShape
-      let hdrShapeElements = [element | not (null hdrShape)]
+      let hdrShapeElements = [Elem element | not (null hdrShape)]
       contentElements <- local
                          (\env -> env {envContentType = NormalContent})
                          (shapesToElements layout shapes)
@@ -1096,7 +1122,7 @@ twoColumnToElement layout hdrShape shapesL shapesR
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title"] hdrShape
-      let hdrShapeElements = [element | not (null hdrShape)]
+      let hdrShapeElements = [Elem element | not (null hdrShape)]
       contentElementsL <- local
                           (\env -> env {envContentType =TwoColumnLeftContent})
                           (shapesToElements layout shapesL)
@@ -1105,7 +1131,8 @@ twoColumnToElement layout hdrShape shapesL shapesR
                           (shapesToElements layout shapesR)
       -- let contentElementsL' = map (setIdx ns "1") contentElementsL
       --     contentElementsR' = map (setIdx ns "2") contentElementsR
-      return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR)
+      return $ buildSpTree ns spTree $
+        hdrShapeElements <> contentElementsL <> contentElementsR
 twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] ()
 
 
@@ -1115,7 +1142,7 @@ titleToElement layout titleElems
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems
-      let titleShapeElements = [element | not (null titleElems)]
+      let titleShapeElements = [Elem element | not (null titleElems)]
       return $ buildSpTree ns spTree titleShapeElements
 titleToElement _ _ = return $ mknode "p:sp" [] ()
 
@@ -1135,7 +1162,8 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems
       dateShapeElements <- if null dateElems
                            then return []
                            else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems]
-      return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements)
+      return . buildSpTree ns spTree . map Elem $
+        (titleShapeElements <> subtitleShapeElements <> dateShapeElements)
 metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] ()
 
 slideToElement :: PandocMonad m => Slide -> P m Element
@@ -1197,7 +1225,7 @@ getSlideNumberFieldId notesMaster
   , Just txBody <- findChild (elemName ns "p" "txBody") sp
   , Just p <- findChild (elemName ns "a" "p") txBody
   , Just fld <- findChild (elemName ns "a" "fld") p
-  , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld =
+  , Just fldId <- findAttr (QName "id" Nothing Nothing) fld =
       return fldId
   | otherwise = throwError $
                 PandocSomeError
@@ -1276,11 +1304,11 @@ speakerNotesSlideNumber pgNum fieldId =
     [ mknode "a:bodyPr" [] ()
     , mknode "a:lstStyle" [] ()
     , mknode "a:p" []
-      [ mknode "a:fld" [ ("id", T.unpack fieldId)
+      [ mknode "a:fld" [ ("id", fieldId)
                        , ("type", "slidenum")
                        ]
         [ mknode "a:rPr" [("lang", "en-US")] ()
-        , mknode "a:t" [] (show pgNum)
+        , mknode "a:t" [] (tshow pgNum)
         ]
       , mknode "a:endParaRPr" [("lang", "en-US")] ()
       ]
@@ -1332,7 +1360,7 @@ getSlideIdNum sldId = do
     Just n -> return n
     Nothing -> throwError $
                PandocShouldNeverHappenError $
-               "Slide Id " <> T.pack (show sldId) <> " not found."
+               "Slide Id " <> tshow sldId <> " not found."
 
 slideNum :: PandocMonad m => Slide -> P m Int
 slideNum slide = getSlideIdNum $ slideId slide
@@ -1349,7 +1377,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text
 slideToRelId slide = do
   n <- slideNum slide
   offset <- asks envSlideIdOffset
-  return $ "rId" <> T.pack (show $ n + offset)
+  return $ "rId" <> tshow (n + offset)
 
 
 data Relationship = Relationship { relId :: Int
@@ -1361,13 +1389,11 @@ elementToRel :: Element -> Maybe Relationship
 elementToRel element
   | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing =
       do rId <- findAttr (QName "Id" Nothing Nothing) element
-         numStr <- stripPrefix "rId" rId
-         num <- case reads numStr :: [(Int, String)] of
-           (n, _) : _ -> Just n
-           []         -> Nothing
-         type' <- findAttrText (QName "Type" Nothing Nothing) element
+         numStr <- T.stripPrefix "rId" rId
+         num <- fromIntegral <$> readTextAsInteger numStr
+         type' <- findAttr (QName "Type" Nothing Nothing) element
          target <- findAttr (QName "Target" Nothing Nothing) element
-         return $ Relationship num type' target
+         return $ Relationship num type' (T.unpack target)
   | otherwise = Nothing
 
 slideToPresRel :: PandocMonad m => Slide -> P m Relationship
@@ -1416,11 +1442,8 @@ presentationToRels pres@(Presentation _ slides) = do
   -- all relWithoutSlide rels (unless they're 1)
   -- 3. If we have a notesmaster slide, we make space for that as well.
 
-  let minRelNotOne = case filter (1<) $ map relId relsWeKeep of
-        [] -> 0                 -- doesn't matter in this case, since
-                                -- there will be nothing to map the
-                                -- function over
-        l  -> minimum l
+  let minRelNotOne = maybe 0 minimum $ nonEmpty
+                                     $ filter (1 <) $ map relId relsWeKeep
 
       modifyRelNum :: Int -> Int
       modifyRelNum 1 = 1
@@ -1456,10 +1479,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry
 topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels
 
 relToElement :: Relationship -> Element
-relToElement rel = mknode "Relationship" [ ("Id", "rId" <>
-    show (relId rel))
-                                         , ("Type", T.unpack $ relType rel)
-                                         , ("Target", relTarget rel) ] ()
+relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel))
+                                         , ("Type", relType rel)
+                                         , ("Target", T.pack $ relTarget rel) ] ()
 
 relsToElement :: [Relationship] -> Element
 relsToElement rels = mknode "Relationships"
@@ -1494,7 +1516,8 @@ slideToSpeakerNotesEntry slide = do
       Just element | Just notesIdNum <- mbNotesIdNum ->
                        Just <$>
                        elemToEntry
-                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml")
+                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <>
+                        ".xml")
                        element
       _ -> return Nothing
 
@@ -1507,7 +1530,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do
     [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
     [ mknode "Relationship" [ ("Id", "rId2")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                            , ("Target", "../slides/slide" <> show idNum <> ".xml")
+                            , ("Target", "../slides/slide" <> tshow idNum <> ".xml")
                             ] ()
     , mknode "Relationship" [ ("Id", "rId1")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
@@ -1540,15 +1563,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element
 linkRelElement (rIdNum, InternalTarget targetId) = do
   targetIdNum <- getSlideIdNum targetId
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                          , ("Target", "slide" <> show targetIdNum <> ".xml")
+                          , ("Target", "slide" <> tshow targetIdNum <> ".xml")
                           ] ()
 linkRelElement (rIdNum, ExternalTarget (url, _)) =
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink")
-                          , ("Target", T.unpack url)
+                          , ("Target", url)
                           , ("TargetMode", "External")
                           ] ()
 
@@ -1560,10 +1583,10 @@ mediaRelElement mInfo =
   let ext = fromMaybe "" (mInfoExt mInfo)
   in
     mknode "Relationship" [ ("Id", "rId" <>
-      show (mInfoLocalId mInfo))
+      tshow (mInfoLocalId mInfo))
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image")
                           , ("Target", "../media/image" <>
-      show (mInfoGlobalId mInfo) <> T.unpack ext)
+      tshow (mInfoGlobalId mInfo) <> ext)
                           ] ()
 
 speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
@@ -1573,7 +1596,7 @@ speakerNotesSlideRelElement slide = do
   return $ case M.lookup idNum mp of
     Nothing -> Nothing
     Just n ->
-      let target = "../notesSlides/notesSlide" <> show n <> ".xml"
+      let target = "../notesSlides/notesSlide" <> tshow n <> ".xml"
       in Just $
          mknode "Relationship" [ ("Id", "rId2")
                                , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide")
@@ -1612,9 +1635,9 @@ slideToSlideRelElement slide = do
 slideToSldIdElement :: PandocMonad m => Slide -> P m Element
 slideToSldIdElement slide = do
   n <- slideNum slide
-  let id' = show $ n + 255
+  let id' = tshow $ n + 255
   rId <- slideToRelId slide
-  return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] ()
+  return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] ()
 
 presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element
 presentationToSldIdLst (Presentation _ slides) = do
@@ -1639,7 +1662,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do
       notesMasterElem =  mknode "p:notesMasterIdLst" []
                          [ mknode
                            "p:NotesMasterId"
-                           [("r:id", "rId" <> show notesMasterRId)]
+                           [("r:id", "rId" <> tshow notesMasterRId)]
                            ()
                          ]
 
@@ -1695,17 +1718,17 @@ docPropsElement docProps = do
     ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
     ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
     $
-      mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps)
+      mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps)
     :
-      mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps)
+      mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps)
     :
-      mknode "cp:keywords" [] (T.unpack keywords)
-    : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)])
-    <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)])
-    <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)])
+      mknode "cp:keywords" [] keywords
+    : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)])
+    <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)])
+    <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)])
     <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
-             , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-             ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+              , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+              ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
 
 docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
 docPropsToEntry docProps = docPropsElement docProps >>=
@@ -1716,8 +1739,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
 docCustomPropsElement docProps = do
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
-         ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v)
+         ,("pid", tshow pid)
+         ,("name", k)] $ mknode "vt:lpwstr" [] v
   return $ mknode "Properties"
           [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
           ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
@@ -1736,7 +1759,7 @@ viewPropsElement = do
   distArchive <- asks envDistArchive
   viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml"
   -- remove  "lastView" if it exists:
-  let notLastView :: Text.XML.Light.Attr -> Bool
+  let notLastView :: XML.Attr -> Bool
       notLastView attr =
           qName (attrKey attr) /= "lastView"
   return $
@@ -1748,15 +1771,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml"
 defaultContentTypeToElem :: DefaultContentType -> Element
 defaultContentTypeToElem dct =
   mknode "Default"
-  [("Extension", T.unpack $ defContentTypesExt dct),
-    ("ContentType", T.unpack $ defContentTypesType dct)]
+  [("Extension", defContentTypesExt dct),
+    ("ContentType", defContentTypesType dct)]
   ()
 
 overrideContentTypeToElem :: OverrideContentType -> Element
 overrideContentTypeToElem oct =
   mknode "Override"
-  [("PartName", overrideContentTypesPart oct),
-    ("ContentType", T.unpack $ overrideContentTypesType oct)]
+  [("PartName", T.pack $ overrideContentTypesPart oct),
+   ("ContentType", overrideContentTypesType oct)]
   ()
 
 contentTypesToElement :: ContentTypes -> Element
@@ -1814,7 +1837,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath]
 getSpeakerNotesFilePaths = do
   mp <- asks envSpeakerNotesIdMap
   let notesIdNums = M.elems mp
-  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums
+  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml")
+               notesIdNums
 
 presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes
 presentationToContentTypes p@(Presentation _ slides) = do
@@ -1878,11 +1902,11 @@ getContentType fp
   | otherwise = Nothing
 
 -- Kept as String for XML.Light
-autoNumAttrs :: ListAttributes -> [(String, String)]
+autoNumAttrs :: ListAttributes -> [(Text, Text)]
 autoNumAttrs (startNum, numStyle, numDelim) =
   numAttr <> typeAttr
   where
-    numAttr = [("startAt", show startNum) | startNum /= 1]
+    numAttr = [("startAt", tshow startNum) | startNum /= 1]
     typeAttr = [("type", typeString <> delimString)]
     typeString = case numStyle of
       Decimal -> "arabic"
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index affec38aa..9246a93e9 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.List (intercalate)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Default
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -363,9 +364,7 @@ inlineToParElems (Note blks) = do
     then return []
     else do
     notes <- gets stNoteIds
-    let maxNoteId = case M.keys notes of
-          [] -> 0
-          lst -> maximum lst
+    let maxNoteId = maybe 0 maximum $ nonEmpty $ M.keys notes
         curNoteId = maxNoteId + 1
     modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes }
     local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 43bf382b7..983ef412a 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -16,7 +16,8 @@ reStructuredText:  <http://docutils.sourceforge.net/rst.html>
 module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
-import Data.List (transpose, intersperse)
+import Data.List (transpose, intersperse, foldl')
+import qualified Data.List.NonEmpty as NE
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -143,9 +144,12 @@ pictToRST (label, (attr, src, _, mbtarget)) = do
   let (_, cls, _) = attr
       classes = case cls of
                    []               -> empty
-                   ["align-right"]  -> ":align: right"
-                   ["align-left"]   -> ":align: left"
-                   ["align-center"] -> ":align: center"
+                   ["align-top"]    -> ":align: top"
+                   ["align-middle"] -> ":align: middle"
+                   ["align-bottom"] -> ":align: bottom"
+                   ["align-center"] -> empty
+                   ["align-right"]  -> empty
+                   ["align-left"]   -> empty
                    _                -> ":class: " <> literal (T.unwords cls)
   return $ nowrap
          $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims)
@@ -215,19 +219,28 @@ blockToRST (Div (ident,classes,_kvs) bs) = do
            nest 3 contents $$
            blankline
 blockToRST (Plain inlines) = inlineListToRST inlines
--- title beginning with fig: indicates that the image is a figure
-blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do
-  capt <- inlineListToRST txt
+blockToRST (Para [Image attr txt (src, rawtit)]) = do
+  description <- inlineListToRST txt
   dims <- imageDimsToRST attr
-  let fig = "figure:: " <> literal src
-      alt = ":alt: " <> if T.null tit then capt else literal tit
+  -- title beginning with fig: indicates that the image is a figure
+  let (isfig, tit) = case T.stripPrefix "fig:" rawtit of
+                          Nothing   -> (False, rawtit)
+                          Just tit' -> (True, tit')
+  let fig | isfig = "figure:: " <> literal src
+          | otherwise = "image:: " <> literal src
+      alt | isfig = ":alt: " <> if T.null tit then description else literal tit
+          | null txt = empty
+          | otherwise = ":alt: " <> description
+      capt | isfig = description
+           | otherwise = empty
       (_,cls,_) = attr
       classes = case cls of
                    []               -> empty
                    ["align-right"]  -> ":align: right"
                    ["align-left"]   -> ":align: left"
                    ["align-center"] -> ":align: center"
-                   _                -> ":figclass: " <> literal (T.unwords cls)
+                   _ | isfig        -> ":figclass: " <> literal (T.unwords cls)
+                     | otherwise    -> ":class: " <> literal (T.unwords cls)
   return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline
 blockToRST (Para inlines)
   | LineBreak `elem` inlines =
@@ -323,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do
                    then replicate (length items) "#."
                    else take (length items) $ orderedListMarkers
                                               (start, style', delim)
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum $ NE.nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToRST markers' items
@@ -497,7 +510,7 @@ flatten outer
   | null contents = [outer]
   | otherwise     = combineAll contents
   where contents = dropInlineParent outer
-        combineAll = foldl combine []
+        combineAll = foldl' combine []
 
         combine :: [Inline] -> Inline -> [Inline]
         combine f i =
@@ -507,8 +520,8 @@ flatten outer
           (Quoted _ _, _)          -> keep f i
           (_, Quoted _ _)          -> keep f i
           -- spans are not rendered using RST inlines, so we can keep them
-          (Span ("",[],[]) _, _)   -> keep f i
-          (_, Span ("",[],[]) _)   -> keep f i
+          (Span (_,_,[]) _, _)   -> keep f i
+          (_, Span (_,_,[]) _)   -> keep f i
           -- inlineToRST handles this case properly so it's safe to keep
           ( Link{}, Image{})       -> keep f i
           -- parent inlines would prevent links from being correctly
@@ -525,11 +538,15 @@ flatten outer
         collapse f i = appendToLast f $ dropInlineParent i
 
         appendToLast :: [Inline] -> [Inline] -> [Inline]
-        appendToLast [] toAppend = [setInlineChildren outer toAppend]
-        appendToLast flattened toAppend
-          | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend]
-          | otherwise =  flattened <> [setInlineChildren outer toAppend]
-          where lastFlat = last flattened
+        appendToLast flattened toAppend =
+          case NE.nonEmpty flattened of
+            Nothing -> [setInlineChildren outer toAppend]
+            Just xs ->
+              if isOuter lastFlat
+                 then NE.init xs <> [appendTo lastFlat toAppend]
+                 else flattened <> [setInlineChildren outer toAppend]
+               where
+                lastFlat = NE.last xs
                 appendTo o i = mapNested (<> i) o
                 isOuter i = emptyParent i == emptyParent outer
                 emptyParent i = setInlineChildren i []
@@ -749,8 +766,7 @@ simpleTable opts blocksToDoc headers rows = do
                    then return []
                    else fixEmpties <$> mapM (blocksToDoc opts) headers
   rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows
-  let numChars [] = 0
-      numChars xs = maximum . map offset $ xs
+  let numChars = maybe 0 maximum . NE.nonEmpty . map offset
   let colWidths = map numChars $ transpose (headerDocs : rowDocs)
   let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths
   let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths)
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index e3966ed07..3527949b4 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.RTF
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad
 import qualified Data.ByteString as B
-import Data.Char (chr, isDigit, ord)
+import Data.Char (chr, isDigit, ord, isAlphaNum)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
 
 -- | Escape special characters.
 escapeSpecial :: Text -> Text
-escapeSpecial = escapeStringUsing $
-  [ ('\t',"\\tab ")
-  , ('\8216',"\\u8216'")
-  , ('\8217',"\\u8217'")
-  , ('\8220',"\\u8220\"")
-  , ('\8221',"\\u8221\"")
-  , ('\8211',"\\u8211-")
-  , ('\8212',"\\u8212-")
-  ] <> backslashEscapes "{\\}"
+escapeSpecial t
+  | T.all isAlphaNum t = t
+  | otherwise          = T.concatMap escChar t
+ where
+  escChar '\t' = "\\tab "
+  escChar '\8216' = "\\u8216'"
+  escChar '\8217' = "\\u8217'"
+  escChar '\8220' = "\\u8220\""
+  escChar '\8221' = "\\u8221\""
+  escChar '\8211' = "\\u8211-"
+  escChar '\8212' = "\\u8212-"
+  escChar '{'     = "\\{"
+  escChar '}'     = "\\}"
+  escChar '\\'    = "\\\\"
+  escChar c       = T.singleton c
 
 -- | Escape strings as needed for rich text format.
 stringToRTF :: Text -> Text
diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs
index 00b027cc9..6af56242f 100644
--- a/src/Text/Pandoc/Writers/Roff.hs
+++ b/src/Text/Pandoc/Writers/Roff.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Roff
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index b399afbf3..0b7c6bee0 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Shared
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared (
                      , setField
                      , resetField
                      , defField
+                     , getLang
                      , tagWithAttrs
                      , isDisplayMath
                      , fixDisplayMath
@@ -44,6 +45,7 @@ import Control.Monad (zipWithM)
 import Data.Aeson (ToJSON (..), encode)
 import Data.Char (chr, ord, isSpace)
 import Data.List (groupBy, intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text.Conversions (FromText(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
@@ -109,8 +111,7 @@ metaValueToVal blockWriter inlineWriter (MetaMap metamap) =
   MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap
 metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$>
   mapM (metaValueToVal blockWriter inlineWriter) xs
-metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true"
-metaValueToVal _ _ (MetaBool False) = return NullVal
+metaValueToVal _ _ (MetaBool b) = return $ BoolVal b
 metaValueToVal _ inlineWriter (MetaString s) =
    SimpleVal <$> inlineWriter (Builder.toList (Builder.text s))
 metaValueToVal blockWriter _ (MetaBlocks bs) = SimpleVal <$> blockWriter bs
@@ -147,6 +148,19 @@ defField field val (Context m) =
   where
     f _newval oldval = oldval
 
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: WriterOptions -> Meta -> Maybe T.Text
+getLang opts meta =
+  case lookupContext "lang" (writerVariables opts) of
+        Just s -> Just s
+        _      ->
+          case lookupMeta "lang" meta of
+               Just (MetaBlocks [Para [Str s]])  -> Just s
+               Just (MetaBlocks [Plain [Str s]]) -> Just s
+               Just (MetaInlines [Str s])        -> Just s
+               Just (MetaString s)               -> Just s
+               _                                 -> Nothing
+
 -- | Produce an HTML tag with the given pandoc attributes.
 tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a
 tagWithAttrs tag (ident,classes,kvs) = hsep
@@ -225,7 +239,7 @@ gridTable :: (Monad m, HasChars a)
           -> m (Doc a)
 gridTable opts blocksToDoc headless aligns widths headers rows = do
   -- the number of columns will be used in case of even widths
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   let officialWidthsInChars widths' = map (
                         (\x -> if x < 1 then 1 else x) .
@@ -254,8 +268,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do
   let handleFullWidths widths' = do
         rawHeaders' <- mapM (blocksToDoc opts) headers
         rawRows' <- mapM (mapM (blocksToDoc opts)) rows
-        let numChars [] = 0
-            numChars xs = maximum . map offset $ xs
+        let numChars = maybe 0 maximum . nonEmpty . map offset
         let minWidthsInChars =
                 map numChars $ transpose (rawHeaders' : rawRows')
         let widthsInChars' = zipWith max
@@ -381,6 +394,7 @@ toSuperscript '2' = Just '\x00B2'
 toSuperscript '3' = Just '\x00B3'
 toSuperscript '+' = Just '\x207A'
 toSuperscript '-' = Just '\x207B'
+toSuperscript '\x2212' = Just '\x207B' -- unicode minus
 toSuperscript '=' = Just '\x207C'
 toSuperscript '(' = Just '\x207D'
 toSuperscript ')' = Just '\x207E'
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index a9ee5eece..18015259d 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE PatternGuards     #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -146,16 +146,17 @@ blockToTEI opts (LineBlock lns) =
   blockToTEI opts $ linesToPara lns
 blockToTEI opts (BlockQuote blocks) =
   inTagsIndented "quote" <$> blocksToTEI opts blocks
-blockToTEI _ (CodeBlock (_,classes,_) str) =
+blockToTEI opts (CodeBlock (_,classes,_) str) =
   return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> text "</ab>")
     where lang  = if null langs
                      then ""
                      else escapeStringForXML (head langs)
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToTEI opts (BulletList lst) = do
   let attribs = [("type", "unordered")]
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index c6debd9ce..6a33b4283 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Texinfo
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
                                2012 Peter Wang
    License     : GNU GPL, version 2 or above
 
@@ -14,8 +14,9 @@ Conversion of 'Pandoc' format into Texinfo.
 module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.Char (chr, ord)
-import Data.List (maximumBy, transpose)
+import Data.Char (chr, ord, isAlphaNum)
+import Data.List (maximumBy, transpose, foldl')
+import Data.List.NonEmpty (nonEmpty)
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -84,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
 
 -- | Escape things as needed for Texinfo.
 stringToTexinfo :: Text -> Text
-stringToTexinfo = escapeStringUsing texinfoEscapes
-  where texinfoEscapes = [ ('{', "@{")
-                         , ('}', "@}")
-                         , ('@', "@@")
-                         , ('\160', "@ ")
-                         , ('\x2014', "---")
-                         , ('\x2013', "--")
-                         , ('\x2026', "@dots{}")
-                         , ('\x2019', "'")
-                         ]
+stringToTexinfo t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+  where escChar '{'      = "@{"
+        escChar '}'      = "@}"
+        escChar '@'      = "@@"
+        escChar '\160'   = "@ "
+        escChar '\x2014' = "---"
+        escChar '\x2013' = "--"
+        escChar '\x2026' = "@dots{}"
+        escChar '\x2019' = "'"
+        escChar c        = T.singleton c
 
 escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
 escapeCommas parser = do
@@ -238,9 +241,13 @@ blockToTexinfo (Table _ blkCapt specs thead tbody tfoot) = do
   colDescriptors <-
     if all (== 0) widths
        then do -- use longest entry instead of column widths
-            cols <- mapM (mapM (liftM (T.unpack . render Nothing . hcat) . mapM blockToTexinfo)) $
+            cols <- mapM (mapM (fmap (T.unpack . render Nothing . hcat) .
+                           mapM blockToTexinfo)) $
                         transpose $ heads : rows
-            return $ concatMap ((\x -> "{"++x++"} ") .  maximumBy (comparing length)) cols
+            return $ concatMap
+                ((\x -> "{"++x++"} ") .
+                        maybe "" (maximumBy (comparing length)) . nonEmpty)
+                cols
        else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
   let tableBody = text ("@multitable " ++ colDescriptors) $$
                   headers $$
@@ -271,7 +278,7 @@ tableAnyRowToTexinfo :: PandocMonad m
                      -> [[Block]]
                      -> TI m (Doc Text)
 tableAnyRowToTexinfo itemtype aligns cols =
-  (literal itemtype $$) . foldl (\row item -> row $$
+  (literal itemtype $$) . foldl' (\row item -> row $$
   (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols
 
 alignedBlock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 61ddb7497..03d030477 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Textile
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 902b093d3..df914f590 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.ZimWiki
-   Copyright   : © 2008-2020 John MacFarlane,
+   Copyright   : © 2008-2021 John MacFarlane,
                    2017-2019 Alex Ivkin
    License     : GNU GPL, version 2 or above
 
@@ -20,6 +20,7 @@ import Control.Monad (zipWithM)
 import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Text.DocLayout (render, literal)
 import Data.Maybe (fromMaybe)
@@ -115,7 +116,7 @@ blockToZimWiki opts b@(RawBlock f str)
 blockToZimWiki _ HorizontalRule = return "\n----\n"
 
 blockToZimWiki opts (Header level _ inlines) = do
-  contents <- inlineListToZimWiki opts $ removeFormatting inlines   -- emphasis, links etc. not allowed in headers
+  contents <- inlineListToZimWiki opts inlines
   let eqs = T.replicate ( 7 - level ) "="
   return $ eqs <> " " <> contents <> " " <> eqs <> "\n"
 
@@ -143,7 +144,8 @@ blockToZimWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then zipWithM (tableItemToZimWiki opts) aligns (head rows)
                  else mapM (inlineListToZimWiki opts . removeFormatting)headers  -- emphasis, links etc. are not allowed in table headers
   rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length) $
+                  transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 4b71d7b69..79b4768ec 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -1,9 +1,8 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.XML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -14,6 +13,7 @@ Functions for escaping and formatting XML.
 -}
 module Text.Pandoc.XML ( escapeCharForXML,
                          escapeStringForXML,
+                         escapeNCName,
                          inTags,
                          selfClosingTag,
                          inTagsSimple,
@@ -25,7 +25,7 @@ module Text.Pandoc.XML ( escapeCharForXML,
                          html5Attributes,
                          rdfaAttributes ) where
 
-import Data.Char (isAscii, isSpace, ord)
+import Data.Char (isAscii, isSpace, ord, isLetter, isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities)
@@ -120,26 +120,48 @@ html5EntityMap = foldr go mempty htmlEntities
              where ent' = T.takeWhile (/=';') (T.pack ent)
            _   -> entmap
 
+-- | Converts a string into an NCName, i.e., an XML name without colons.
+-- Disallowed characters are escaped using @ux%x@, where @%x@ is the
+-- hexadecimal unicode identifier of the escaped character.
+escapeNCName :: Text -> Text
+escapeNCName t = case T.uncons t of
+  Nothing -> T.empty
+  Just (c, cs) -> escapeStartChar c <> T.concatMap escapeNCNameChar cs
+  where
+    escapeStartChar :: Char -> Text
+    escapeStartChar c = if isLetter c || c == '_'
+                        then T.singleton c
+                        else escapeChar c
 
--- Unescapes XML entities
+    escapeNCNameChar :: Char -> Text
+    escapeNCNameChar c = if isNCNameChar c
+                         then T.singleton c
+                         else escapeChar c
+
+    isNCNameChar :: Char -> Bool
+    isNCNameChar c = isLetter c || c `elem` ("_-.·" :: String) || isDigit c
+      || '\x0300' <= c && c <= '\x036f'
+      || '\x203f' <= c && c <= '\x2040'
+
+    escapeChar :: Char -> Text
+    escapeChar = T.pack . printf "U%04X" . ord
+
+-- | Unescapes XML entities
 fromEntities :: Text -> Text
-fromEntities = T.pack . fromEntities'
+fromEntities t
+  = let (x, y) = T.break (== '&') t
+     in if T.null y
+           then t
+           else x <>
+             let (ent, rest) = T.break (\c -> isSpace c || c == ';') y
+                 rest' = case T.uncons rest of
+                            Just (';',ys) -> ys
+                            _ -> rest
+                 ent' = T.drop 1 ent <> ";"
+              in case T.pack <$> lookupEntity (T.unpack ent') of
+                   Just c  -> c <> fromEntities rest'
+                   Nothing  -> ent <> fromEntities rest
 
-fromEntities' :: Text -> String
-fromEntities' (T.uncons -> Just ('&', xs)) =
-  case lookupEntity $ T.unpack ent' of
-        Just c  -> c <> fromEntities' rest
-        Nothing -> "&" <> fromEntities' xs
-    where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of
-                          (zs,T.uncons -> Just (';',ys)) -> (zs,ys)
-                          (zs, ys) -> (zs,ys)
-          ent'
-            | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys  -- workaround tagsoup bug
-            | Just ('#', _) <- T.uncons ent     = ent
-            | otherwise                         = ent <> ";"
-fromEntities' t = case T.uncons t of
-  Just (x, xs) -> x : fromEntities' xs
-  Nothing      -> ""
 
 html5Attributes :: Set.Set Text
 html5Attributes = Set.fromList
diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs
new file mode 100644
index 000000000..07113ea92
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light.hs
@@ -0,0 +1,89 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+xml-light, which we used in pandoc's the XML-based readers, has
+some limitations: in particular, it produces nodes with String
+instead of Text, and the parser falls over on processing instructions
+(see #7091).
+
+This module exports much of the API of xml-light, but using Text instead
+of String. In addition, the xml-light parsers are replaced by xml-conduit's
+well-tested parser.  (The xml-conduit types are mapped to types
+isomorphic to xml-light's, to avoid the need for massive code modifications
+elsewhere.)  Bridge functions to map xml-light types to this module's
+types are also provided (since libraries like texmath still use xml-light).
+
+Another advantage of the xml-conduit parser is that it gives us
+detailed information on xml parse errors.
+
+In the future we may want to move to using xml-conduit or another
+xml library in the code base, but this change gives us
+better performance and accuracy without much change in the
+code that used xml-light.
+-}
+module Text.Pandoc.XML.Light
+  ( module Text.Pandoc.XML.Light.Types
+  , module Text.Pandoc.XML.Light.Proc
+  , module Text.Pandoc.XML.Light.Output
+    -- * Replacement for xml-light's Text.XML.Input
+  , parseXMLElement
+  , parseXMLContents
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe)
+import Text.Pandoc.XML.Light.Types
+import Text.Pandoc.XML.Light.Proc
+import Text.Pandoc.XML.Light.Output
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Attr (nameToQname n) v) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing ->
+        case T.stripPrefix "xmlns:" localName of
+          Just rest -> QName rest mbns (Just "xmlns")
+          Nothing   -> QName localName mbns mbpref
+      _ -> QName localName mbns mbpref
+
+nodeToContent :: Conduit.Node -> Maybe Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Text (CData CDataText t Nothing))
+nodeToContent _ = Nothing
+
diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs
new file mode 100644
index 000000000..8182ef2ec
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Output.hs
@@ -0,0 +1,234 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Output
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+   This code is based on code from xml-light, released under the BSD3 license.
+   We use a text Builder instead of ShowS.
+-}
+module Text.Pandoc.XML.Light.Output
+  ( -- * Replacement for xml-light's Text.XML.Output
+    ppTopElement
+  , ppElement
+  , ppContent
+  , ppcElement
+  , ppcContent
+  , showTopElement
+  , showElement
+  , showContent
+  , useShortEmptyTags
+  , defaultConfigPP
+  , ConfigPP(..)
+  ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import Text.Pandoc.XML.Light.Types
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+  { shortEmptyTag :: QName -> Bool
+  , prettify      :: Bool
+  }
+
+-- | Default pretty orinting configuration.
+--  * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+                           , prettify      = False
+                           }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />.  This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c  = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP     :: ConfigPP
+prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> Text
+ppTopElement        = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement          :: Element -> Text
+ppElement           = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent          :: Content -> Text
+ppContent           = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement      :: ConfigPP -> Element -> Text
+ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement         :: ConfigPP -> Element -> Text
+ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent         :: ConfigPP -> Content -> Text
+ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData           :: ConfigPP -> CData -> Text
+ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS         :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+                     Elem e -> ppElementS c i e
+                     Text t -> ppCDataS c i t
+                     CRef r -> showCRefS r
+
+ppElementS         :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+  (case elContent e of
+    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+       | shortEmptyTag c name  -> fromText " />"
+    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+    cs -> singleton '>' <> nl <>
+          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+          i <> tagEnd name
+      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
+  )
+  where name = elName e
+
+ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
+                             then showCDataS t
+                             else foldr cons mempty (T.unpack (showCData t))
+  where cons         :: Char -> Builder -> Builder
+        cons '\n' ys  = singleton '\n' <> i <> ys
+        cons y ys     = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> Text
+showTopElement c    = xmlHeader <> showElement c
+
+showContent        :: Content -> Text
+showContent         = ppcContent defaultConfigPP
+
+showElement        :: Element -> Text
+showElement         = ppcElement defaultConfigPP
+
+showCData          :: CData -> Text
+showCData           = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: Text -> Builder
+showCRefS r         = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+                    fromText "]]>"
+   CDataRaw      -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData           :: Text -> Builder
+escCData t
+  | "]]>" `T.isPrefixOf` t =
+     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+  = case T.uncons t of
+      Nothing     -> mempty
+      Just (c,t') -> singleton c <> escCData t'
+
+escChar            :: Char -> Builder
+escChar c = case c of
+  '<'   -> fromText "&lt;"
+  '>'   -> fromText "&gt;"
+  '&'   -> fromText "&amp;"
+  '"'   -> fromText "&quot;"
+  -- we use &#39 instead of &apos; because IE apparently has difficulties
+  -- rendering &apos; in xhtml.
+  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+  '\''  -> fromText "&#39;"
+  _     -> singleton c
+
+  {- original xml-light version:
+  -- NOTE: We escape '\r' explicitly because otherwise they get lost
+  -- when parsed back in because of then end-of-line normalization rules.
+  _ | isPrint c || c == '\n' -> singleton c
+    | otherwise -> showText "&#" . showsT oc . singleton ';'
+      where oc = ord c
+  -}
+
+escStr             :: Text -> Builder
+escStr cs          = if T.any needsEscape cs
+                        then mconcat (map escChar (T.unpack cs))
+                        else fromText cs
+ where
+  needsEscape '<' = True
+  needsEscape '>' = True
+  needsEscape '&' = True
+  needsEscape '"' = True
+  needsEscape '\'' = True
+  needsEscape _ = False
+
+tagEnd             :: QName -> Builder
+tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart           :: QName -> [Attr] -> Builder
+tagStart qn as      = singleton '<' <> showQName qn <> as_str
+ where as_str       = if null as
+                         then mempty
+                         else mconcat (map showAttr as)
+
+showAttr           :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+                       singleton '=' <>
+                       singleton '"' <> escStr v <> singleton '"'
+
+showQName          :: QName -> Builder
+showQName q         =
+  case qPrefix q of
+    Nothing -> fromText (qName q)
+    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs
new file mode 100644
index 000000000..a1fb200ff
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Proc.hs
@@ -0,0 +1,139 @@
+{-# LANGUAGE FlexibleInstances #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Proc
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+   This code is taken from xml-light, released under the BSD3 license.
+-}
+module Text.Pandoc.XML.Light.Proc
+  ( 
+    -- * Replacement for xml-light's Text.XML.Proc
+    strContent
+  , onlyElems
+  , elChildren
+  , onlyText
+  , findChildren
+  , filterChildren
+  , filterChildrenName
+  , findChild
+  , filterChild
+  , filterChildName
+  , findElement
+  , filterElement
+  , filterElementName
+  , findElements
+  , filterElements
+  , filterElementsName
+  , findAttr
+  , lookupAttr
+  , lookupAttrBy
+  , findAttrBy
+  ) where
+
+import Data.Text (Text)
+import Data.Maybe (listToMaybe)
+import Data.List(find)
+import Text.Pandoc.XML.Light.Types
+
+--
+-- copied from xml-light Text.XML.Proc
+--
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> Text
+strContent          = mconcat . map cdData . onlyText . elContent
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr          :: QName -> Element -> Maybe Text
+findAttr x e       = lookupAttr x (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr        :: QName -> [Attr] -> Maybe Text
+lookupAttr x       = lookupAttrBy (x ==)
+
+-- | Lookup the first attribute whose name satisfies the given predicate.
+lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
+lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
+
+-- | Lookup the value of the first attribute whose name
+-- satisfies the given predicate.
+findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
+findAttrBy p e      = lookupAttrBy p (elAttribs e)
+
+
diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs
new file mode 100644
index 000000000..ba602ac1f
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Types.hs
@@ -0,0 +1,193 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Types
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+   This code is taken from xml-light, released under the BSD3 license.
+   It has been modified to use Text instead of String, and the fromXL*
+   functions have been added.
+-}
+module Text.Pandoc.XML.Light.Types
+  ( -- * Basic types, duplicating those from xml-light but with Text
+    -- instead of String
+    Line
+  , Content(..)
+  , Element(..)
+  , Attr(..)
+  , CData(..)
+  , CDataKind(..)
+  , QName(..)
+  , Node(..)
+  , unode
+  , unqual
+  , add_attr
+  , add_attrs
+  -- * Conversion functions from xml-light types
+  , fromXLQName
+  , fromXLCData
+  , fromXLElement
+  , fromXLAttr
+  , fromXLContent
+  ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import qualified Text.XML.Light as XL
+
+--
+-- type definitions lightly modified from xml-light
+--
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef Text
+                deriving (Show, Typeable, Data, Ord, Eq)
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving (Show, Typeable, Data, Ord, Eq)
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: Text
+                } deriving (Eq, Ord, Show, Typeable, Data)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: Text,
+                  cdLine      :: Maybe Line
+                } deriving (Show, Typeable, Data, Ord, Eq)
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Ord, Show, Typeable, Data )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: Text,
+                  qURI    :: Maybe Text,
+                  qPrefix :: Maybe Text
+                } deriving (Show, Typeable, Data)
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = Element { elName     = n
+                                , elAttribs  = attrs
+                                , elContent  = cont
+                                , elLine     = Nothing
+                                }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node [Element]          where node n es     = node n ([]::[Attr],es)
+instance Node Element            where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],Text)      where
+  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
+                                    , cdData = t
+                                    , cdLine = Nothing })
+
+instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
+instance Node Text               where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => Text -> t -> Element
+unode = node . unqual
+
+unqual :: Text -> QName
+unqual x = QName x Nothing Nothing
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+--
+-- conversion from xml-light
+--
+
+fromXLQName :: XL.QName -> QName
+fromXLQName qn = QName { qName = T.pack $ XL.qName qn
+                       , qURI = T.pack <$> XL.qURI qn
+                       , qPrefix = T.pack <$> XL.qPrefix qn }
+
+fromXLCData :: XL.CData -> CData
+fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
+                                        XL.CDataText -> CDataText
+                                        XL.CDataVerbatim -> CDataVerbatim
+                                        XL.CDataRaw -> CDataRaw
+                       , cdData = T.pack $ XL.cdData cd
+                       , cdLine = XL.cdLine cd }
+
+fromXLElement :: XL.Element -> Element
+fromXLElement el = Element { elName = fromXLQName $ XL.elName el
+                           , elAttribs = map fromXLAttr $ XL.elAttribs el
+                           , elContent = map fromXLContent $ XL.elContent el
+                           , elLine = XL.elLine el }
+
+fromXLAttr :: XL.Attr -> Attr
+fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
+
+fromXLContent :: XL.Content -> Content
+fromXLContent (XL.Elem el) = Elem $ fromXLElement el
+fromXLContent (XL.Text cd) = Text $ fromXLCData cd
+fromXLContent (XL.CRef s)  = CRef (T.pack s)
+
+
author	Igor Pashev <pashev.igor@gmail.com>	2021-07-17 18:10:34 +0200
committer	Igor Pashev <pashev.igor@gmail.com>	2021-07-17 18:46:16 +0200
commit	48459559a13a20083fc9b31eb523b8ea2bf0a63f (patch)
tree	1c04e75709457403110a6f8c5c90099f22369de3 /src/Text/Pandoc
parent	0c39509d9b6a58958228cebf5d643598e5c98950 (diff)
parent	46099e79defe662e541b12548200caf29063c1c6 (diff)
download	pandoc-48459559a13a20083fc9b31eb523b8ea2bf0a63f.tar.gz