From 8c38390038edcebd55f9dec8359ef983f3813425 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 27 Nov 2020 21:21:25 +0100
Subject: HTML reader tests: improve test coverage of new features

---
 src/Text/Pandoc/Readers/HTML/Table.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 5a783988f..91639fa4c 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -214,7 +214,8 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
           -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
 normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
-  let rowLength = length . rowCells
+  let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
+  let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
   let ncols = maximum (map rowLength rows)
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
-- 
cgit v1.2.3


From 83d63b72e1b9eff9f2aa3b9f36b56d348f0909a2 Mon Sep 17 00:00:00 2001
From: Tassos Manganaris <tassos.manganaris@gmail.com>
Date: Fri, 27 Nov 2020 23:42:53 -0500
Subject: Fix a tiny Typo in the CSV reader module

Header comment in the CSV reader module says "RST" instead of "CSV".
---
 src/Text/Pandoc/Readers/CSV.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index f0edcaa16..45f4d88d4 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -2,7 +2,7 @@
 
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-   Module      : Text.Pandoc.Readers.RST
+   Module      : Text.Pandoc.Readers.CSV
    Copyright   : Copyright (C) 2006-2020 John MacFarlane
    License     : GNU GPL, version 2 or above
 
-- 
cgit v1.2.3


From bff9c129c3579f928a0067759d0a784eb5c07d30 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 29 Nov 2020 10:29:51 -0800
Subject: LaTeX reader: don't parse `\rule` with width 0 as horizontal rule.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2a9bff746..15a1a19fc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1225,6 +1225,16 @@ preamble = mconcat <$> many preambleBlock
                              anyTok
                              return mempty)
 
+rule :: PandocMonad m => LP m Blocks
+rule = do
+  skipopts
+  width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok
+  _thickness <- tok
+  -- 0-width rules are used to fix spacing issues:
+  case safeRead width of
+    Just (0 :: Double) -> return mempty
+    _ -> return horizontalRule
+
 paragraph :: PandocMonad m => LP m Blocks
 paragraph = do
   x <- trimInlines . mconcat <$> many1 inline
@@ -1595,7 +1605,7 @@ blockCommands = M.fromList
    --
    , ("hrule", pure horizontalRule)
    , ("strut", pure mempty)
-   , ("rule", skipopts *> tok *> tok $> horizontalRule)
+   , ("rule", rule)
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-- 
cgit v1.2.3


From ddb76cb356a82f6a9e51a6f3626dd154816e9205 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 5 Dec 2020 09:53:39 -0800
Subject: LaTeX reader: don't apply theorem default styling to a figure inside.

If we put an image in italics, then when rendering to Markdown
we no longer get an implicit figure.

Closes #6925.
---
 src/Text/Pandoc/Readers/LaTeX.hs |  1 +
 test/command/6925.md             | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 test/command/6925.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 15a1a19fc..afe960454 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1812,6 +1812,7 @@ theoremEnvironment name = do
                   _          -> bs
 
 italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
 italicize (Para ils) = Para [Emph ils]
 italicize (Plain ils) = Plain [Emph ils]
 italicize x = x
diff --git a/test/command/6925.md b/test/command/6925.md
new file mode 100644
index 000000000..458a0b91d
--- /dev/null
+++ b/test/command/6925.md
@@ -0,0 +1,34 @@
+```
+% pandoc -f latex -t markdown
+\documentclass{amsart}
+\newtheorem{thm}{Theorem}[section]
+\theoremstyle{definition}
+\newtheorem{thm2}[section]{Theorem}
+\begin{document}
+\begin{thm}
+a
+\begin{figure}
+\includegraphics[]{1.png}
+\end{figure}
+\end{thm}
+
+\begin{thm2}
+a
+\begin{figure}
+\includegraphics[]{1.png}
+\end{figure}
+\end{thm2}
+\end{document}
+^D
+::: {.thm}
+**Theorem 1**. *a*
+
+![image](1.png)
+:::
+
+::: {.thm2}
+**Theorem 1**. a
+
+![image](1.png)
+:::
+```
-- 
cgit v1.2.3


From acf932825bfe40d9a18046c9d304f4f14363a88a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 5 Dec 2020 22:05:37 +0100
Subject: Org reader: preserve targets of spurious links

Links with (internal) targets that the reader doesn't know about are
converted into emphasized text. Information on the link target is now
preserved by wrapping the text in a Span of class `spurious-link`, with
an attribute `target` set to the link's original target. This allows to
recover and fix broken or unknown links with filters.

See: #6916
---
 src/Text/Pandoc/Readers/Org/Inlines.hs | 9 ++++-----
 test/Tests/Readers/Org/Meta.hs         | 6 ++++--
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b234bee58..0330cf55f 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -477,17 +477,17 @@ linkToInlinesF linkStr =
 
 internalLink :: Text -> Inlines -> F Inlines
 internalLink link title = do
-  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
-  if anchorB
+  ids <- asksF orgStateAnchorIds
+  if link `elem` ids
     then return $ B.link ("#" <> link) "" title
-    else return $ B.emph title
+    else let attr' = ("", ["spurious-link"] , [("target", link)])
+         in return $ B.spanWith attr' (B.emph title)
 
 -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
 -- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
 -- @org-target-regexp@, which is fairly liberal.  Since no link is created if
 -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
 -- an anchor.
-
 anchor :: PandocMonad m => OrgParser m (F Inlines)
 anchor =  try $ do
   anchorId <- parseAnchor
@@ -501,7 +501,6 @@ anchor =  try $ do
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
-
 solidify :: Text -> Text
 solidify = T.map replaceSpecialChar
  where replaceSpecialChar c
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index 041016f64..bc167f2a5 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -270,7 +270,8 @@ tests =
 
   , "Search links are read as emph" =:
       "[[Wally][Where's Wally?]]" =?>
-      para (emph $ "Where's" <> space <> "Wally?")
+      para (spanWith ("", ["spurious-link"], [("target", "Wally")])
+                     (emph $ "Where's" <> space <> "Wally?"))
 
   , "Link to nonexistent anchor" =:
       T.unlines [ "<<link-here>> Target."
@@ -278,5 +279,6 @@ tests =
                 , "[[link$here][See here!]]"
                 ] =?>
       (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
-       para (emph ("See" <> space <> "here!")))
+       para (spanWith ("", ["spurious-link"], [("target", "link$here")])
+                      (emph ("See" <> space <> "here!"))))
   ]
-- 
cgit v1.2.3


From 501ea7f0c4735acdf1457da44fe04d811ac776d7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 12:15:14 -0800
Subject: Dokuwiki reader:  handle unknown interwiki links better.

DokuWiki lets the user define his own Interwiki links.
Previously pandoc reacted to these by emitting a
google search link, which is not helpful. Instead,
we now just emit the full URL including the
wikilink prefix, e.g. `faquk>FAQ-mathml`.
This at least gives users the ability to
modify the links using filters.

Closes #6932.
---
 src/Text/Pandoc/Readers/DokuWiki.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 336be09e5..dedc1f03f 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -317,7 +317,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
 interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
-interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
+interwikiToUrl unknown page = unknown <> ">" <> page
 
 linkText :: PandocMonad m => DWParser m B.Inlines
 linkText = parseLink fromRaw "[[" "]]"
-- 
cgit v1.2.3


From 8c9010864cd818031d7eff161a57459709751517 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 9 Dec 2020 21:05:40 -0800
Subject: Commonmark reader: refactor specFor, set input name to "".

---
 src/Text/Pandoc/Readers/CommonMark.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index c1773eaab..d32a38342 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
@@ -27,15 +28,20 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
+import Data.Typeable
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s = do
-  let res = runIdentity $
-              commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s
+  let res = runIdentity $ commonmarkWith (specFor opts) "" s
   case res of
     Left err -> throwError $ PandocParsecError s err
     Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+specFor :: (Monad m, Typeable m, Typeable a,
+            Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
+        => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks)
+specFor opts = foldr ($) defaultSyntaxSpec exts
  where
   exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++
          [ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++
-- 
cgit v1.2.3


From a3eb87b2eab9def3e28364b43300043f5e13268d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 9 Dec 2020 21:14:11 -0800
Subject: Add sourcepos extension for commonmarke

* Add `Ext_sourcepos` constructor for `Extension`.
* Add `sourcepos` extension (only for commonmark).
* Bump to 2.11.3

With the `sourcepos` extension set set, `data-pos` attributes are added
to the AST by the commonmark reader. No other readers are affected.  The
`data-pos` attributes are put on elements that accept attributes; for
other elements, an enlosing Div or Span is added to hold the attributes.

Closes #4565.
---
 MANUAL.txt                            |  7 +++++++
 pandoc.cabal                          |  2 +-
 src/Text/Pandoc/Extensions.hs         |  2 ++
 src/Text/Pandoc/Options.hs            |  1 +
 src/Text/Pandoc/Readers/CommonMark.hs | 14 +++++++++-----
 5 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 0a1300947..461ebf54d 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5127,6 +5127,13 @@ for regular emphasis, add extra blank space around headings.
 
   [Project Gutenberg]: https://www.gutenberg.org
 
+#### Extension: `sourcepos` ####
+
+Include source position attributes when parsing `commonmark`.
+For elements that accept attributes, a `data-pos` attribute
+is added; other elements are placed in a surrounding
+Div or Span elemnet with a `data-pos` attribute.
+
 ## Markdown variants
 
 In addition to pandoc's extended Markdown, the following Markdown
diff --git a/pandoc.cabal b/pandoc.cabal
index c02dfeb38..5829856da 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -1,6 +1,6 @@
 cabal-version:   2.2
 name:            pandoc
-version:         2.11.2
+version:         2.11.3
 build-type:      Simple
 license:         GPL-2.0-or-later
 license-file:    COPYING.md
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index a94e24e2c..9865f897b 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -154,6 +154,7 @@ data Extension =
     | Ext_yaml_metadata_block -- ^ YAML metadata block
     | Ext_gutenberg           -- ^ Use Project Gutenberg conventions for plain
     | Ext_attributes          -- ^ Generic attribute syntax
+    | Ext_sourcepos           -- ^ Include source position attributes
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
 -- | Extensions to be used with pandoc-flavored markdown.
@@ -503,6 +504,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_implicit_header_references
     , Ext_attributes
     , Ext_fenced_code_attributes
+    , Ext_sourcepos
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c7f1a56fa..ecd65a54d 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -65,6 +65,7 @@ data ReaderOptions = ReaderOptions{
        , readerDefaultImageExtension :: Text -- ^ Default extension for images
        , readerTrackChanges          :: TrackChanges -- ^ Track changes setting for docx
        , readerStripComments         :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
+                                             -- (only implemented in commonmark)
 } deriving (Show, Read, Data, Typeable, Generic)
 
 instance HasSyntaxExtensions ReaderOptions where
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index d32a38342..9eef498e1 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -32,11 +32,15 @@ import Data.Typeable
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMark opts s = do
-  let res = runIdentity $ commonmarkWith (specFor opts) "" s
-  case res of
-    Left err -> throwError $ PandocParsecError s err
-    Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMark opts s
+  | isEnabled Ext_sourcepos opts =
+    case runIdentity (commonmarkWith (specFor opts) "" s) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+  | otherwise =
+    case runIdentity (commonmarkWith (specFor opts) "" s) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
 specFor :: (Monad m, Typeable m, Typeable a,
             Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
-- 
cgit v1.2.3


From 0a502e5ff52b251bbf3da69fd1f9a88d5e0fe92c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Dec 2020 15:44:10 -0800
Subject: HTML reader:  retain attribute prefixes and avoid duplicates.

Previously we stripped attribute prefixes, reading
`xml:lang` as `lang` for example. This resulted in
two duplicate `lang` attributes when `xml:lang` and
`lang` were both used.  This commit causes the prefixes
to be retained, and also avoids invald duplicate
attributes.

Closes #6938.
---
 src/Text/Pandoc/Readers/HTML.hs         | 28 +++++++++++-----------------
 src/Text/Pandoc/Readers/HTML/Parsing.hs | 20 +++++++++++++-------
 test/command/5986.md                    |  2 +-
 test/epub/wasteland.native              |  8 ++++----
 4 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..f870a241d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -74,7 +74,7 @@ readHtml :: PandocMonad m
          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
          -> m Pandoc
 readHtml opts inp = do
-  let tags = stripPrefixes . canonicalizeTags $
+  let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
              (crFilter inp)
       parseDoc = do
@@ -95,6 +95,15 @@ readHtml opts inp = do
     Right doc -> return doc
     Left  err -> throwError $ PandocParseError $ T.pack $ getError err
 
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s)   = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
 replaceNotes bs = do
   st <- getState
@@ -114,7 +123,7 @@ setInPlain = local (\s -> s {inPlain = True})
 pHtml :: PandocMonad m => TagParser m Blocks
 pHtml = try $ do
   (TagOpen "html" attr) <- lookAhead pAny
-  for_ (lookup "lang" attr) $
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
@@ -1024,21 +1033,6 @@ htmlTag f = try $ do
          handleTag tagname
        _ -> mzero
 
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
-    TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
-  if T.null t then s else T.drop 1 t
-  where (_, t) = T.span (/= ':') s
-
 -- Utilities
 
 -- | Adjusts a url according to the document's base URL.
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 2d58319da..e28ebe77b 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -193,14 +193,20 @@ t1 `closes` t2 |
 _ `closes` _ = False
 
 toStringAttr :: [(Text, Text)] -> [(Text, Text)]
-toStringAttr = map go
+toStringAttr = foldr go []
   where
-   go (x,y) =
-     case T.stripPrefix "data-" x of
-       Just x' | x' `Set.notMember` (html5Attributes <>
-                                     html4Attributes <> rdfaAttributes)
-         -> (x',y)
-       _ -> (x,y)
+   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
+   -- treat xml:lang as lang
+   go ("xml:lang",y) ats = go ("lang",y) ats
+   -- prevent duplicate attributes
+   go (x,y) ats
+     | any (\(x',_) -> x == x') ats = ats
+     | otherwise      =
+        case T.stripPrefix "data-" x of
+          Just x' | x' `Set.notMember` (html5Attributes <>
+                                        html4Attributes <> rdfaAttributes)
+            -> go (x',y) ats
+          _ -> (x,y):ats
 
 -- Unlike fromAttrib from tagsoup, this distinguishes
 -- between a missing attribute and an attribute with empty content.
diff --git a/test/command/5986.md b/test/command/5986.md
index ea0ca70c1..ed8dd30c9 100644
--- a/test/command/5986.md
+++ b/test/command/5986.md
@@ -4,7 +4,7 @@
 ^D
 <p><span id="title_page.xhtml"></span></p>
 <p><span id="nav.xhtml"></span></p>
-<nav type="landmarks" id="landmarks" hidden="hidden">
+<nav epub:type="landmarks" id="landmarks" hidden="hidden">
 <ol>
 <li><a href="text/title_page.xhtml">Title Page</a></li>
 <li><a href="#nav.xhtml#toc">Table of Contents</a></li>
diff --git a/test/epub/wasteland.native b/test/epub/wasteland.native
index 0ed23eba2..ff59182a6 100644
--- a/test/epub/wasteland.native
+++ b/test/epub/wasteland.native
@@ -1,8 +1,8 @@
 [Para [Image ("",[],[]) [] ("wasteland-cover.jpg","")]
 ,Para [Span ("wasteland-content.xhtml",[],[]) []]
-,Div ("wasteland-content.xhtml#frontmatter",["section"],[("type","frontmatter")])
+,Div ("wasteland-content.xhtml#frontmatter",["section","frontmatter"],[])
  []
-,Div ("wasteland-content.xhtml#bodymatter",["section"],[("type","bodymatter")])
+,Div ("wasteland-content.xhtml#bodymatter",["section","bodymatter"],[])
  [Div ("wasteland-content.xhtml#ch1",["section"],[])
   [Header 2 ("",[],[]) [Str "I.",Space,Str "THE",Space,Str "BURIAL",Space,Str "OF",Space,Str "THE",Space,Str "DEAD"]
   ,Div ("",["linegroup"],[])
@@ -922,8 +922,8 @@
     [Plain [Str "Datta.",Space,Str "Dayadhvam.",Space,Str "Damyata."]]
    ,Div ("wasteland-content.xhtml#ln434",["linegroup","indent"],[])
     [Plain [Span ("",[],[("lang","sa")]) [Str "Shantih",Space,Str "shantih",Space,Str "shantih",Note [Para [Link ("",[],[]) [Str "434."] ("#wasteland-content.xhtml#ln434",""),Space,Str "Shantih.",Space,Str "Repeated",Space,Str "as",Space,Str "here,",Space,Str "a",Space,Str "formal",Space,Str "ending",Space,Str "to",Space,Str "an",Space,Str "Upanishad.",Space,Str "'The",SoftBreak,Str "Peace",Space,Str "which",Space,Str "passeth",Space,Str "understanding'",Space,Str "is",Space,Str "a",Space,Str "feeble",Space,Str "translation",Space,Str "of",Space,Str "the",SoftBreak,Str "content",Space,Str "of",Space,Str "this",Space,Str "word."]],SoftBreak]]]]]]
-,Div ("wasteland-content.xhtml#backmatter",["section"],[("type","backmatter")])
- [Div ("wasteland-content.xhtml#rearnotes",["section"],[("type","rearnotes")])
+,Div ("wasteland-content.xhtml#backmatter",["section","backmatter"],[])
+ [Div ("wasteland-content.xhtml#rearnotes",["section","rearnotes"],[])
   [Header 2 ("",[],[]) [Str "NOTES",Space,Str "ON",Space,Str "\"THE",Space,Str "WASTE",Space,Str "LAND\""]
   ,Para [Str "Not",Space,Str "only",Space,Str "the",Space,Str "title,",Space,Str "but",Space,Str "the",Space,Str "plan",Space,Str "and",Space,Str "a",Space,Str "good",Space,Str "deal",Space,Str "of",Space,Str "the",Space,Str "incidental",Space,Str "symbolism",Space,Str "of",SoftBreak,Str "the",Space,Str "poem",Space,Str "were",Space,Str "suggested",Space,Str "by",Space,Str "Miss",Space,Str "Jessie",Space,Str "L.",Space,Str "Weston's",Space,Str "book",Space,Str "on",Space,Str "the",Space,Str "Grail",Space,Str "legend:",SoftBreak,Str "From",Space,Str "Ritual",Space,Str "to",Space,Str "Romance"]
   ,Para [Str "Indeed,",Space,Str "so",Space,Str "deeply",Space,Str "am",Space,Str "I",Space,Str "indebted,",Space,Str "Miss",Space,Str "Weston's",Space,Str "book",Space,Str "will",Space,Str "elucidate",Space,Str "the",SoftBreak,Str "difficulties",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "much",Space,Str "better",Space,Str "than",Space,Str "my",Space,Str "notes",Space,Str "can",Space,Str "do;",Space,Str "and",Space,Str "I",Space,Str "recommend",Space,Str "it",SoftBreak,Str "(apart",Space,Str "from",Space,Str "the",Space,Str "great",Space,Str "interest",Space,Str "of",Space,Str "the",Space,Str "book",Space,Str "itself)",Space,Str "to",Space,Str "any",Space,Str "who",Space,Str "think",Space,Str "such",SoftBreak,Str "elucidation",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "worth",Space,Str "the",Space,Str "trouble.",Space,Str "To",Space,Str "another",Space,Str "work",Space,Str "of",Space,Str "anthropology",Space,Str "I",Space,Str "am",SoftBreak,Str "indebted",Space,Str "in",Space,Str "general,",Space,Str "one",Space,Str "which",Space,Str "has",Space,Str "influenced",Space,Str "our",Space,Str "generation",Space,Str "profoundly;",Space,Str "I",Space,Str "mean",SoftBreak,Str "The",Space,Str "Golden",Space,Str "Bough;",Space,Str "I",Space,Str "have",Space,Str "used",Space,Str "especially",Space,Str "the",Space,Str "two",Space,Str "volumes",Space,Str "Adonis,",Space,Str "Attis,",Space,Str "Osiris.",SoftBreak,Str "Anyone",Space,Str "who",Space,Str "is",Space,Str "acquainted",Space,Str "with",Space,Str "these",Space,Str "works",Space,Str "will",Space,Str "immediately",Space,Str "recognise",Space,Str "in",Space,Str "the",Space,Str "poem",SoftBreak,Str "certain",Space,Str "references",Space,Str "to",Space,Str "vegetation",Space,Str "ceremonies."]
-- 
cgit v1.2.3


From fcd065818901e57f01aca4c919f6102f9a047ba0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Dec 2020 15:51:20 -0800
Subject: HTML reader: pay attention to lang attributes on body.

These (as well as lang attributes on html) should update
lang in metadata. See #6938.
---
 src/Text/Pandoc/Readers/HTML.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f870a241d..f8a17bb78 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -25,7 +25,6 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 ) where
 
 import Control.Applicative ((<|>))
-import Control.Arrow (first)
 import Control.Monad (guard, msum, mzero, unless, void)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
@@ -121,14 +120,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
 setInPlain = local (\s -> s {inPlain = True})
 
 pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
   (TagOpen "html" attr) <- lookAhead pAny
   for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
 pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+  (TagOpen "body" attr) <- lookAhead pAny
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+    updateState . B.setMeta "lang" . B.text
+  pInTags "body" block
 
 pHead :: PandocMonad m => TagParser m Blocks
 pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
-- 
cgit v1.2.3


From e837ed772efe14a1c05880ec9fbe025862bb303a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 28 Dec 2020 14:48:55 +0100
Subject: HTML reader: use renderTags' from Text.Pandoc.Shared.

The `renderTags'` function was duplicated when the reader used `Text` as
its string type. The duplication is no longer necessary.

A side effect of this change is that empty `<col>` elements are written
as self-closing tags in raw HTML blocks.
---
 src/Text/Pandoc/Readers/HTML.hs | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f8a17bb78..8e94a0812 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -61,8 +61,9 @@ import Text.Pandoc.Options (
     ReaderOptions (readerExtensions, readerStripComments),
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
@@ -1045,26 +1046,3 @@ canonicalizeUrl url = do
   return $ case (parseURIReference (T.unpack url), mbBaseHref) of
                 (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
                 _                   -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
-               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
-                                                       "meta", "link"]
-                            , optRawTag   = matchTags ["script", "style"] }
-              where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types =  -- Document divisions
-   map (\s -> (s, (["section", "body"], 0)))
-    ["volume", "part", "chapter", "division"]
-  <> -- Document section and components
-  [
-    ("abstract",  ([], 0))]
--}
-- 
cgit v1.2.3


From 419190213a7e6b07e2ac66f47d53e9f023af757d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 15:38:48 -0800
Subject: Hlint fixes

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 2 +-
 src/Text/Pandoc/Writers/Docx.hs     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 474e4fac0..08083b177 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -564,7 +564,7 @@ getTarget = do
               _                               -> "html"
 
 atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+atStart = getPosition >>= guard . (== 1) . sourceColumn
 
 ignoreSpacesCap :: T2T Text -> T2T Text
 ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 65946ec88..1818ed81d 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -28,6 +28,7 @@ import Data.Char (isSpace, isLetter)
 import Data.List (intercalate, isPrefixOf, isSuffixOf)
 import Data.String (fromString)
 import qualified Data.Map as M
+import Data.Either (fromRight)
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -1402,7 +1403,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     generateImgElt (ident, _, _, img) =
       let
         (xpt,ypt) = desiredSizeInPoints opts attr
-               (either (const def) id (imageSize opts img))
+               (fromRight def (imageSize opts img))
         -- 12700 emu = 1 pt
         (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700)
                                 (pageWidth * 12700)
-- 
cgit v1.2.3


From 23f964b9076f9795b00e740d20feebeb1d6d91db Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 21:31:28 -0800
Subject: Mediawiki reader: allow space around storng/emph delimiters.

Closes #6993.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 10 ++++------
 test/command/6993.md                 | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 test/command/6993.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e8985ab2c..d712b1120 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -678,19 +678,17 @@ url = do
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
 inlinesBetween start end =
-  trimInlines . mconcat <$> try (start >> many1Till inner end)
-    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace <* notFollowedBy' end
+  trimInlines . mconcat <$> try (start >> many1Till inline end)
 
 emph :: PandocMonad m => MWParser m Inlines
 emph = B.emph <$> nested (inlinesBetween start end)
-    where start = sym "''" >> lookAhead nonspaceChar
+    where start = sym "''"
           end   = try $ notFollowedBy' (() <$ strong) >> sym "''"
 
 strong :: PandocMonad m => MWParser m Inlines
 strong = B.strong <$> nested (inlinesBetween start end)
-    where start = sym "'''" >> lookAhead nonspaceChar
-          end   = try $ sym "'''"
+    where start = sym "'''"
+          end   = sym "'''"
 
 doubleQuotes :: PandocMonad m => MWParser m Inlines
 doubleQuotes = do
diff --git a/test/command/6993.md b/test/command/6993.md
new file mode 100644
index 000000000..b7f7f5384
--- /dev/null
+++ b/test/command/6993.md
@@ -0,0 +1,21 @@
+```
+% pandoc -f mediawiki -t native
+'''Should be bold '''
+^D
+[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]]
+```
+
+```
+% pandoc -f mediawiki -t native
+''' Should be bold'''
+^D
+[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]]
+```
+
+```
+% pandoc -f mediawiki -t native
+'' Should be emph ''
+^D
+[Para [Emph [Str "Should",Space,Str "be",Space,Str "emph"]]]
+```
+
-- 
cgit v1.2.3


From 17e3efc785fa8b0680ec6d4ebaac1ea6bdb57e1a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 1 Jan 2021 10:50:58 +0100
Subject: Org reader: restructure output of captioned code blocks

The Div wrapper of code blocks with captions now has the class
"captioned-content". The caption itself is added as a Plain block
inside a Div of class "caption". This makes it easier to write filters
which match on captioned code blocks. Existing filters will need to be
updated.

Closes: #6977
---
 src/Text/Pandoc/Readers/Org/Blocks.hs     | 26 ++++++++++++--------------
 test/Tests/Readers/Org/Block/CodeBlock.hs |  6 +++---
 2 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f2e8b1ab6..17e3ff986 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -294,24 +294,22 @@ verseBlock blockType = try $ do
 codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
 codeBlock blockAttrs blockType = do
   skipSpaces
-  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  content           <- rawBlockContent blockType
-  resultsContent    <- option mempty babelResultsBlock
-  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
-  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
-  let labelledBlck   = maybe (pure codeBlck)
-                             (labelDiv codeBlck)
-                             (blockAttrCaption blockAttrs)
+  (classes, kv)  <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  content        <- rawBlockContent blockType
+  resultsContent <- option mempty babelResultsBlock
+  let identifier = fromMaybe mempty $ blockAttrName blockAttrs
+  let codeBlk    = B.codeBlockWith (identifier, classes, kv) content
+  let wrap       = maybe pure addCaption (blockAttrCaption blockAttrs)
   return $
-    (if exportsCode kv    then labelledBlck   else mempty) <>
+    (if exportsCode kv    then wrap codeBlk   else mempty) <>
     (if exportsResults kv then resultsContent else mempty)
  where
-   labelDiv :: Blocks -> F Inlines -> F Blocks
-   labelDiv blk value =
-     B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+   addCaption :: F Inlines -> Blocks -> F Blocks
+   addCaption caption blk = B.divWith ("", ["captioned-content"], [])
+                         <$> (mkCaptionBlock caption <> pure blk)
 
-   labelledBlock :: F Inlines -> F Blocks
-   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+   mkCaptionBlock :: F Inlines -> F Blocks
+   mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain)
 
    exportsResults :: [(Text, Text)] -> Bool
    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index adf6661ca..2648a6e1f 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -185,10 +185,10 @@ tests =
                 , "#+end_src"
                 ] =?>
       divWith
-         nullAttr
+         ("", ["captioned-content"], [] )
          (mappend
-          (plain $ spanWith ("", ["label"], [])
-                            (spcSep [ "Functor", "laws", "in", "Haskell" ]))
+          (divWith ("", ["caption"], []) $
+           plain (spcSep [ "Functor", "laws", "in", "Haskell" ]))
           (codeBlockWith ("functor-laws", ["haskell"], [])
                          (T.unlines [ "fmap id = id"
                                     , "fmap (p . q) = (fmap p) . (fmap q)"
-- 
cgit v1.2.3


From 260aaaacc640cc458259e292c42e15c3d2f34b3f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 2 Jan 2021 08:19:00 -0800
Subject: LaTeX reader: put contents of unknown environments in a Div...

when `raw_tex` is not enabled. (When `raw_tex` is enabled,
the whole environment is parsed as a raw block.)
The class name is the name of the environment.
Previously, we just included the contents without the
surrounding Div, but having a record of the environment's
boundaries and name can be useful.

Closes #6997.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index afe960454..d9fe0f502 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1835,7 +1835,7 @@ rawEnv name = do
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-       return bs
+       return $ divWith ("",[name],[]) bs
 
 rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
 rawVerbEnv name = do
-- 
cgit v1.2.3


From 57b10941521d9db808e3892ea9893878065b7c50 Mon Sep 17 00:00:00 2001
From: Dimitri Sabadie <dimitri.sabadie@gmail.com>
Date: Sun, 3 Jan 2021 08:57:47 +0100
Subject: Org reader: mark verbatim code with class "verbatim". (#6998)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Replace org-mode’s verbatim from code to codeWith.

This adds the `"verbatim"` class so that exporters can apply a specific
style on it. For instance, it will be possible for HTML to add a CSS
rule for code + verbatim class.

* Alter test for org-mode’s verbatim change.

See previous commit for further detail on the new implementation.
---
 src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +-
 test/Tests/Readers/Org/Inline.hs       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 0330cf55f..b819a34a3 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -572,7 +572,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines)
 underline = fmap B.underline    <$> emphasisBetween '_'
 
 verbatim  :: PandocMonad m => OrgParser m (F Inlines)
-verbatim  = return . B.code     <$> verbatimBetween '='
+verbatim  = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
 
 code      :: PandocMonad m => OrgParser m (F Inlines)
 code      = return . B.code     <$> verbatimBetween '~'
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index 9edd328c3..b5d0caa26 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -56,7 +56,7 @@ tests =
 
   , "Verbatim" =:
       "=Robot.rock()=" =?>
-      para (code "Robot.rock()")
+      para (codeWith ("", ["verbatim"], []) "Robot.rock()")
 
   , "Code" =:
       "~word for word~" =?>
@@ -190,7 +190,7 @@ tests =
                   ])
   , "Verbatim text can contain equal signes (=)" =:
       "=is_subst = True=" =?>
-      para (code "is_subst = True")
+      para (codeWith ("", ["verbatim"], []) "is_subst = True")
 
   , testGroup "Images"
     [ "Image" =:
-- 
cgit v1.2.3


From ea479bf28a4031f408af12ea92d3e19f9a838820 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 4 Jan 2021 14:05:03 -0800
Subject: LaTeX reader: handle filecontents environment.

Closes #7003.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 32 +++++++++++++++++++++------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  2 ++
 test/command/7003.md                     | 37 ++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 test/command/7003.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index d9fe0f502..14a41a911 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1219,6 +1219,7 @@ preamble :: PandocMonad m => LP m Blocks
 preamble = mconcat <$> many preambleBlock
   where preambleBlock =  (mempty <$ spaces1)
                      <|> macroDef (rawBlock "latex")
+                     <|> filecontents
                      <|> (mempty <$ blockCommand)
                      <|> (mempty <$ braced)
                      <|> (do notFollowedBy (begin_ "document")
@@ -1272,6 +1273,16 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
 insertIncluded :: PandocMonad m
                => FilePath
                -> FilePath
@@ -1281,13 +1292,12 @@ insertIncluded defaultExtension f' = do
                 ".tex" -> f'
                 ".sty" -> f'
                 _      -> addExtension f' defaultExtension
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
   pos <- getPosition
   containers <- getIncludeFiles <$> getState
   when (T.pack f `elem` containers) $
     throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
   updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromDirs dirs f
+  mbcontents <- readFileFromTexinputs f
   contents <- case mbcontents of
                    Just s -> return s
                    Nothing -> do
@@ -1695,6 +1705,18 @@ environments = M.fromList
    , ("iftoggle", try $ ifToggle >> block)
    ]
 
+filecontents :: PandocMonad m => LP m Blocks
+filecontents = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  guard $ name == "filecontents" || name == "filecontents*"
+  skipopts
+  fp <- untokenize <$> braced
+  txt <- verbEnv name
+  updateState $ \st ->
+    st{ sFileContents = M.insert fp txt (sFileContents st) }
+  return mempty
+
 theoremstyle :: PandocMonad m => LP m Blocks
 theoremstyle = do
   stylename <- untokenize <$> braced
@@ -1894,8 +1916,7 @@ inputMinted = do
   pos <- getPosition
   attr <- mintedAttr
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   rawcode <- case mbCode of
                   Just s -> return s
                   Nothing -> do
@@ -1981,8 +2002,7 @@ inputListing = do
   pos <- getPosition
   options <- option [] keyvals
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   codeLines <- case mbCode of
                       Just s -> return $ T.lines s
                       Nothing -> do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..e92ed387c 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -152,6 +152,7 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
+                            , sFileContents  :: M.Map Text Text
                             }
      deriving Show
 
@@ -177,6 +178,7 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sHasChapters   = False
                               , sToggles       = M.empty
                               , sExpanded      = False
+                              , sFileContents  = M.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
diff --git a/test/command/7003.md b/test/command/7003.md
new file mode 100644
index 000000000..1f6275943
--- /dev/null
+++ b/test/command/7003.md
@@ -0,0 +1,37 @@
+```
+% pandoc -f latex -t native
+\documentclass{article}
+\usepackage{listings}
+
+\lstset{basicstyle=\ttfamily}
+
+\begin{filecontents*}[overwrite]{example.tex}
+\documentclass{article}
+\begin{document}
+\section{Bar}
+This a Bar section
+\end{document}
+\end{filecontents*}
+
+\begin{document}
+
+\section{With lstlisting environment}
+
+\begin{lstlisting}
+\documentclass{article}
+\begin{document}
+\section{Foo}
+This a Foo section
+\end{document}
+\end{lstlisting}
+
+\section{With lstinputlisting command}
+
+\lstinputlisting{example.tex}
+\end{document}
+^D
+[Header 1 ("with-lstlisting-environment",[],[]) [Str "With",Space,Str "lstlisting",Space,Str "environment"]
+,CodeBlock ("",[],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Foo}\nThis a Foo section\n\\end{document}"
+,Header 1 ("with-lstinputlisting-command",[],[]) [Str "With",Space,Str "lstinputlisting",Space,Str "command"]
+,CodeBlock ("",["latex"],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Bar}\nThis a Bar section\n\\end{document}"]
+```
-- 
cgit v1.2.3


From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 8 Jan 2021 18:38:20 +0100
Subject: Update copyright notices for 2021 (#7012)

---
 COPYRIGHT                                       | 26 ++++++++++++-------------
 MANUAL.txt                                      |  2 +-
 README.md                                       |  2 +-
 README.template                                 |  2 +-
 app/pandoc.hs                                   |  2 +-
 benchmark/benchmark-pandoc.hs                   |  2 +-
 benchmark/weigh-pandoc.hs                       |  2 +-
 pandoc.cabal                                    |  2 +-
 src/Text/Pandoc.hs                              |  2 +-
 src/Text/Pandoc/App.hs                          |  2 +-
 src/Text/Pandoc/App/CommandLineOptions.hs       |  4 ++--
 src/Text/Pandoc/App/FormatHeuristics.hs         |  2 +-
 src/Text/Pandoc/App/Opt.hs                      |  2 +-
 src/Text/Pandoc/App/OutputSettings.hs           |  2 +-
 src/Text/Pandoc/Asciify.hs                      |  2 +-
 src/Text/Pandoc/BCP47.hs                        |  2 +-
 src/Text/Pandoc/CSS.hs                          |  2 +-
 src/Text/Pandoc/CSV.hs                          |  2 +-
 src/Text/Pandoc/Data.hs                         |  2 +-
 src/Text/Pandoc/Error.hs                        |  2 +-
 src/Text/Pandoc/Extensions.hs                   |  2 +-
 src/Text/Pandoc/Filter.hs                       |  2 +-
 src/Text/Pandoc/Filter/JSON.hs                  |  2 +-
 src/Text/Pandoc/Filter/Lua.hs                   |  2 +-
 src/Text/Pandoc/Filter/Path.hs                  |  2 +-
 src/Text/Pandoc/Highlighting.hs                 |  2 +-
 src/Text/Pandoc/Image.hs                        |  2 +-
 src/Text/Pandoc/ImageSize.hs                    |  2 +-
 src/Text/Pandoc/Logging.hs                      |  2 +-
 src/Text/Pandoc/Lua.hs                          |  2 +-
 src/Text/Pandoc/Lua/ErrorConversion.hs          |  2 +-
 src/Text/Pandoc/Lua/Filter.hs                   |  4 ++--
 src/Text/Pandoc/Lua/Global.hs                   |  2 +-
 src/Text/Pandoc/Lua/Init.hs                     |  2 +-
 src/Text/Pandoc/Lua/Marshaling.hs               |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AST.hs           |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AnyValue.hs      |  2 +-
 src/Text/Pandoc/Lua/Marshaling/CommonState.hs   |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/Context.hs       |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/List.hs          |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/MediaBag.hs      |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/PandocError.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/Version.hs       |  2 +-
 src/Text/Pandoc/Lua/Module/MediaBag.hs          |  2 +-
 src/Text/Pandoc/Lua/Module/Pandoc.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/System.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/Types.hs             |  2 +-
 src/Text/Pandoc/Lua/Module/Utils.hs             |  2 +-
 src/Text/Pandoc/Lua/Packages.hs                 |  2 +-
 src/Text/Pandoc/Lua/PandocLua.hs                |  2 +-
 src/Text/Pandoc/Lua/Util.hs                     |  4 ++--
 src/Text/Pandoc/Lua/Walk.hs                     |  4 ++--
 src/Text/Pandoc/MIME.hs                         |  2 +-
 src/Text/Pandoc/MediaBag.hs                     |  2 +-
 src/Text/Pandoc/Options.hs                      |  2 +-
 src/Text/Pandoc/PDF.hs                          |  2 +-
 src/Text/Pandoc/Parsing.hs                      |  2 +-
 src/Text/Pandoc/Process.hs                      |  2 +-
 src/Text/Pandoc/Readers.hs                      |  2 +-
 src/Text/Pandoc/Readers/BibTeX.hs               |  2 +-
 src/Text/Pandoc/Readers/CSV.hs                  |  2 +-
 src/Text/Pandoc/Readers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Readers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Readers/DocBook.hs              |  2 +-
 src/Text/Pandoc/Readers/Docx/Combine.hs         |  2 +-
 src/Text/Pandoc/Readers/Docx/Util.hs            |  2 +-
 src/Text/Pandoc/Readers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Readers/HTML/Parsing.hs         |  2 +-
 src/Text/Pandoc/Readers/HTML/Table.hs           |  4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs   |  2 +-
 src/Text/Pandoc/Readers/HTML/Types.hs           |  2 +-
 src/Text/Pandoc/Readers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Readers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs           |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs          |  2 +-
 src/Text/Pandoc/Readers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Readers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Readers/Metadata.hs             |  2 +-
 src/Text/Pandoc/Readers/Native.hs               |  2 +-
 src/Text/Pandoc/Readers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Readers/Org.hs                  |  2 +-
 src/Text/Pandoc/Readers/Org/BlockStarts.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Blocks.hs           |  2 +-
 src/Text/Pandoc/Readers/Org/DocumentTree.hs     |  2 +-
 src/Text/Pandoc/Readers/Org/ExportSettings.hs   |  2 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Meta.hs             |  2 +-
 src/Text/Pandoc/Readers/Org/ParserState.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Shared.hs           |  2 +-
 src/Text/Pandoc/Readers/RST.hs                  |  2 +-
 src/Text/Pandoc/Readers/Textile.hs              |  2 +-
 src/Text/Pandoc/RoffChar.hs                     |  2 +-
 src/Text/Pandoc/SelfContained.hs                |  2 +-
 src/Text/Pandoc/Shared.hs                       |  2 +-
 src/Text/Pandoc/Slides.hs                       |  2 +-
 src/Text/Pandoc/Templates.hs                    |  2 +-
 src/Text/Pandoc/Translations.hs                 |  2 +-
 src/Text/Pandoc/UTF8.hs                         |  2 +-
 src/Text/Pandoc/UUID.hs                         |  2 +-
 src/Text/Pandoc/Writers.hs                      |  2 +-
 src/Text/Pandoc/Writers/AsciiDoc.hs             |  2 +-
 src/Text/Pandoc/Writers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs              |  2 +-
 src/Text/Pandoc/Writers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Writers/Custom.hs               |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs              |  2 +-
 src/Text/Pandoc/Writers/Docx.hs                 |  2 +-
 src/Text/Pandoc/Writers/Docx/StyleMap.hs        |  2 +-
 src/Text/Pandoc/Writers/DokuWiki.hs             |  2 +-
 src/Text/Pandoc/Writers/EPUB.hs                 |  2 +-
 src/Text/Pandoc/Writers/FB2.hs                  |  2 +-
 src/Text/Pandoc/Writers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Writers/JATS.hs                 |  2 +-
 src/Text/Pandoc/Writers/JATS/Table.hs           |  2 +-
 src/Text/Pandoc/Writers/JATS/Types.hs           |  2 +-
 src/Text/Pandoc/Writers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Caption.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Notes.hs          |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Table.hs          |  2 +-
 src/Text/Pandoc/Writers/Man.hs                  |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Writers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Writers/Ms.hs                   |  2 +-
 src/Text/Pandoc/Writers/Native.hs               |  2 +-
 src/Text/Pandoc/Writers/ODT.hs                  |  2 +-
 src/Text/Pandoc/Writers/OOXML.hs                |  2 +-
 src/Text/Pandoc/Writers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Org.hs                  |  4 ++--
 src/Text/Pandoc/Writers/RST.hs                  |  2 +-
 src/Text/Pandoc/Writers/RTF.hs                  |  2 +-
 src/Text/Pandoc/Writers/Roff.hs                 |  2 +-
 src/Text/Pandoc/Writers/Shared.hs               |  2 +-
 src/Text/Pandoc/Writers/TEI.hs                  |  2 +-
 src/Text/Pandoc/Writers/Texinfo.hs              |  2 +-
 src/Text/Pandoc/Writers/Textile.hs              |  2 +-
 src/Text/Pandoc/Writers/ZimWiki.hs              |  2 +-
 src/Text/Pandoc/XML.hs                          |  2 +-
 test/Tests/Command.hs                           |  2 +-
 test/Tests/Helpers.hs                           |  2 +-
 test/Tests/Lua.hs                               |  2 +-
 test/Tests/Lua/Module.hs                        |  2 +-
 test/Tests/Old.hs                               |  2 +-
 test/Tests/Readers/Creole.hs                    |  2 +-
 test/Tests/Readers/EPUB.hs                      |  2 +-
 test/Tests/Readers/HTML.hs                      |  2 +-
 test/Tests/Readers/Jira.hs                      |  2 +-
 test/Tests/Readers/LaTeX.hs                     |  2 +-
 test/Tests/Readers/Man.hs                       |  2 +-
 test/Tests/Readers/Markdown.hs                  |  2 +-
 test/Tests/Readers/Odt.hs                       |  2 +-
 test/Tests/Readers/Org.hs                       |  2 +-
 test/Tests/Readers/Org/Block.hs                 |  2 +-
 test/Tests/Readers/Org/Block/CodeBlock.hs       |  2 +-
 test/Tests/Readers/Org/Block/Figure.hs          |  2 +-
 test/Tests/Readers/Org/Block/Header.hs          |  2 +-
 test/Tests/Readers/Org/Block/List.hs            |  2 +-
 test/Tests/Readers/Org/Block/Table.hs           |  2 +-
 test/Tests/Readers/Org/Directive.hs             |  2 +-
 test/Tests/Readers/Org/Inline.hs                |  2 +-
 test/Tests/Readers/Org/Inline/Citation.hs       |  2 +-
 test/Tests/Readers/Org/Inline/Note.hs           |  2 +-
 test/Tests/Readers/Org/Inline/Smart.hs          |  2 +-
 test/Tests/Readers/Org/Meta.hs                  |  2 +-
 test/Tests/Readers/Org/Shared.hs                |  2 +-
 test/Tests/Readers/RST.hs                       |  2 +-
 test/Tests/Readers/Txt2Tags.hs                  |  2 +-
 test/Tests/Shared.hs                            |  2 +-
 trypandoc/trypandoc.hs                          |  2 +-
 175 files changed, 200 insertions(+), 200 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/COPYRIGHT b/COPYRIGHT
index eb916cc54..9992e5680 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,5 +1,5 @@
 Pandoc
-Copyright (C) 2006-2020 John MacFarlane <jgm at berkeley dot edu>
+Copyright (C) 2006-2021 John MacFarlane <jgm at berkeley dot edu>
 
 With the exceptions noted below, this code is released under the [GPL],
 version 2 or later:
@@ -37,7 +37,7 @@ The modules in the `pandoc-types` repository (Text.Pandoc.Definition,
 Text.Pandoc.Builder, Text.Pandoc.Generics, Text.Pandoc.JSON,
 Text.Pandoc.Walk) are licensed under the BSD 3-clause license:
 
-Copyright (c) 2006-2020, John MacFarlane
+Copyright (c) 2006-2021, John MacFarlane
 
 All rights reserved.
 
@@ -73,7 +73,7 @@ Pandoc's templates (in `data/templates`) are dual-licensed as either
 GPL (v2 or higher, same as pandoc) or (at your option) the BSD
 3-clause license.
 
-Copyright (c) 2014--2020, John MacFarlane
+Copyright (c) 2014--2021, John MacFarlane
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Muse.hs
@@ -83,19 +83,19 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Texinfo.hs
-Copyright (C) 2008-2020 John MacFarlane and Peter Wang
+Copyright (C) 2008-2021 John MacFarlane and Peter Wang
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/OpenDocument.hs
-Copyright (C) 2008-2020 Andrea Rossato and John MacFarlane
+Copyright (C) 2008-2021 Andrea Rossato and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Org.hs
-Copyright (C) 2010-2020 Puneeth Chaganti, John MacFarlane, and
+Copyright (C) 2010-2021 Puneeth Chaganti, John MacFarlane, and
                         Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
@@ -115,7 +115,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/Textile.hs
-Copyright (C) 2010-2020 Paul Rivier and John MacFarlane
+Copyright (C) 2010-2021 Paul Rivier and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -133,7 +133,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/EPUB.hs
-Copyright (C) 2014-2020 Matthew Pickering and John MacFarlane
+Copyright (C) 2014-2021 Matthew Pickering and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -141,7 +141,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Org.hs
 src/Text/Pandoc/Readers/Org/*
 test/Tests/Readers/Org/*
-Copyright (C) 2014-2020 Albert Krewinkel
+Copyright (C) 2014-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -149,7 +149,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Lua.hs
 src/Text/Pandoc/Lua/*
 test/lua/*
-Copyright (C) 2017--2020 Albert Krewinkel and John MacFarlane
+Copyright (C) 2017--2021 Albert Krewinkel and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -157,7 +157,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Jira.hs
 src/Text/Pandoc/Writers/Jira.hs
 test/Tests/Readers/Jira.hs
-Copyright (C) 2019--2020 Albert Krewinkel
+Copyright (C) 2019--2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -169,7 +169,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 data/pandoc.lua
-Copyright (C) 2017-2020 Albert Krewinkel
+Copyright (C) 2017-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -183,7 +183,7 @@ Released under the Do What the Fuck You Want To Public License.
 ------------------------------------------------------------------------
 Pandoc embeds a lua interpreter (via hslua).
 
-Copyright © 1994–2019 Lua.org, PUC-Rio.
+Copyright © 1994–2020 Lua.org, PUC-Rio.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/MANUAL.txt b/MANUAL.txt
index 831ffb0b0..aa75a32a9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -6329,7 +6329,7 @@ application, here are some things to keep in mind:
 
 # Authors
 
-Copyright 2006--2020 John MacFarlane (jgm@berkeley.edu). Released
+Copyright 2006--2021 John MacFarlane (jgm@berkeley.edu). Released
 under the [GPL], version 2 or greater.  This software carries no
 warranty of any kind.  (See COPYRIGHT for full copyright and
 warranty notices.) For a full list of contributors, see the file
diff --git a/README.md b/README.md
index aea55b0c4..9317438fc 100644
--- a/README.md
+++ b/README.md
@@ -236,7 +236,7 @@ opening a new issue.
 
 ## License
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL](http://www.gnu.org/copyleft/gpl.html "GNU General Public License"),
 version 2 or greater. This software carries no warranty of any kind.
 (See COPYRIGHT for full copyright and warranty notices.)
diff --git a/README.template b/README.template
index 5eeafb9e3..53d14b584 100644
--- a/README.template
+++ b/README.template
@@ -81,7 +81,7 @@ new issue.
 License
 -------
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL], version 2 or greater.  This software carries no warranty of
 any kind.  (See COPYRIGHT for full copyright and warranty notices.)
 
diff --git a/app/pandoc.hs b/app/pandoc.hs
index 9ed3b9e9f..162570f18 100644
--- a/app/pandoc.hs
+++ b/app/pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Main
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs
index 3e7b663b4..b3e67bb14 100644
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs
index 4ba6feb03..ad4c83ad7 100644
--- a/benchmark/weigh-pandoc.hs
+++ b/benchmark/weigh-pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2016-2019 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2016-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/pandoc.cabal b/pandoc.cabal
index cce3c1a58..bb68ea402 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -4,7 +4,7 @@ version:         2.11.3.2
 build-type:      Simple
 license:         GPL-2.0-or-later
 license-file:    COPYING.md
-copyright:       (c) 2006-2020 John MacFarlane
+copyright:       (c) 2006-2021 John MacFarlane
 author:          John MacFarlane <jgm@berkeley.edu>
 maintainer:      John MacFarlane <jgm@berkeley.edu>
 bug-reports:     https://github.com/jgm/pandoc/issues
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 549aeddfb..f09dfd8c7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index e6d5c93d4..725c76424 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 21ee47b7b..307f28b5c 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.CommandLineOptions
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -968,7 +968,7 @@ usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
 
 copyrightMessage :: String
 copyrightMessage = intercalate "\n" [
- "Copyright (C) 2006-2020 John MacFarlane. Web:  https://pandoc.org",
+ "Copyright (C) 2006-2021 John MacFarlane. Web:  https://pandoc.org",
  "This is free software; see the source for copying conditions. There is no",
  "warranty, not even for merchantability or fitness for a particular purpose." ]
 
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 155b7e586..17ed30fe9 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.App.FormatHeuristics
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 6dd19758e..0d96ab67c 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.Opt
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 139b408cb..53c7d82ef 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE TupleSections       #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 9e9cc8d9b..01a7b624a 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Asciify
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index b41e93125..69824aa57 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
index d98c85147..625feadbb 100644
--- a/src/Text/Pandoc/CSS.hs
+++ b/src/Text/Pandoc/CSS.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Text.Pandoc.CSS
-Copyright   : © 2006-2020 John MacFarlane <jgm@berkeley.edu>,
+Copyright   : © 2006-2021 John MacFarlane <jgm@berkeley.edu>,
                 2015-2016 Mauro Bieg,
                 2015      Ophir Lifshitz <hangfromthefloor@gmail.com>
 License     : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 10812644f..ec212fa9a 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.CSV
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs
index 38682b9f9..55ed3f5bf 100644
--- a/src/Text/Pandoc/Data.hs
+++ b/src/Text/Pandoc/Data.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE TemplateHaskell #-}
 {- |
 Module      : Text.Pandoc.Data
-Copyright   : Copyright (C) 2013-2020 John MacFarlane
+Copyright   : Copyright (C) 2013-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 2c311bb49..204cf15ca 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Error
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 9865f897b..39c2a0489 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Extensions
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
index 6d4846f98..1209ceeb7 100644
--- a/src/Text/Pandoc/Filter.hs
+++ b/src/Text/Pandoc/Filter.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs
index 83ec9a97c..d2323fac4 100644
--- a/src/Text/Pandoc/Filter/JSON.hs
+++ b/src/Text/Pandoc/Filter/JSON.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs
index a76c8da2f..c238e53d9 100644
--- a/src/Text/Pandoc/Filter/Lua.hs
+++ b/src/Text/Pandoc/Filter/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Lua
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs
index 9e5e8fa77..1d16c53b9 100644
--- a/src/Text/Pandoc/Filter/Path.hs
+++ b/src/Text/Pandoc/Filter/Path.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Path
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index ce8880f84..0bb6ed319 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Highlighting
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs
index e37de4e00..e0c938938 100644
--- a/src/Text/Pandoc/Image.hs
+++ b/src/Text/Pandoc/Image.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
 {- |
 Module      : Text.Pandoc.Image
-Copyright   : Copyright (C) 2020 John MacFarlane
+Copyright   : Copyright (C) 2020-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9ce5c668d..098c16721 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -3,7 +3,7 @@
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {- |
 Module      : Text.Pandoc.ImageSize
-Copyright   : Copyright (C) 2011-2020 John MacFarlane
+Copyright   : Copyright (C) 2011-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 825fdaadb..3a3acedc7 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Logging
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index 39db0074a..f0e9e076b 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017–2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs
index 59c962723..4e6880722 100644
--- a/src/Text/Pandoc/Lua/ErrorConversion.hs
+++ b/src/Text/Pandoc/Lua/ErrorConversion.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.ErrorConversion
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 94d7adeb2..bffe01a34 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -1,8 +1,8 @@
 {-# LANGUAGE FlexibleContexts  #-}
 {- |
 Module      : Text.Pandoc.Lua.Filter
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs
index 4285be662..29b788f04 100644
--- a/src/Text/Pandoc/Lua/Global.hs
+++ b/src/Text/Pandoc/Lua/Global.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index e89e9d6e0..0a5ce85cb 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs
index 1254402b6..f517c7c27 100644
--- a/src/Text/Pandoc/Lua/Marshaling.hs
+++ b/src/Text/Pandoc/Lua/Marshaling.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index c889618c4..6485da661 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -2,8 +2,8 @@
 {-# LANGUAGE LambdaCase           #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AST
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
index c4720aedf..82e26b963 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AnyValue
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
index 636650af3..147197c5d 100644
--- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings    #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.CommonState
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs
index c0e7aef60..606bdcfb2 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Context.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs
@@ -1,8 +1,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Context
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs
index e6614400d..0446302a1 100644
--- a/src/Text/Pandoc/Lua/Marshaling/List.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/List.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE UndecidableInstances #-}
 {- |
 Module      : Text.Pandoc.Lua.Marshaling.List
-Copyright   : © 2012-2020 John MacFarlane
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
index 2cf5b8893..70bd010a0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.MediaBag
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
index 74537a1dd..f698704e0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.PandocError
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
index 2e45affe4..dd7bf2e61 100644
--- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
@@ -4,8 +4,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.ReaderOptions
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
index 98fa1efa4..6d43039fa 100644
--- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.SimpleTable
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs
index 9adb1b763..4f4ffac51 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Version.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Version.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Version
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index e5a10217a..715e53885 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.MediaBag
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 3886568b7..a9ce3866d 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Pandoc
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs
index 04508e461..bd35babaf 100644
--- a/src/Text/Pandoc/Lua/Module/System.hs
+++ b/src/Text/Pandoc/Lua/Module/System.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.System
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs
index 999f2e588..bb4f02c3c 100644
--- a/src/Text/Pandoc/Lua/Module/Types.hs
+++ b/src/Text/Pandoc/Lua/Module/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.Types
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 7595b9c0f..1b04021a7 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Utils
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 4c3b9d79d..d62fb725d 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Packages
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs
index 6c3b410dd..4beac22b7 100644
--- a/src/Text/Pandoc/Lua/PandocLua.hs
+++ b/src/Text/Pandoc/Lua/PandocLua.hs
@@ -9,7 +9,7 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.PandocLua
-   Copyright   : Copyright © 2020 Albert Krewinkel
+   Copyright   : Copyright © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
index fbd013801..70a8a6d47 100644
--- a/src/Text/Pandoc/Lua/Util.hs
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Util
-   Copyright   : © 2012–2020 John MacFarlane,
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane,
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs
index 695c7b44e..d6d973496 100644
--- a/src/Text/Pandoc/Lua/Walk.hs
+++ b/src/Text/Pandoc/Lua/Walk.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
 Module      : Text.Pandoc.Lua.Walk
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 4fe25ebe1..53c5cd018 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.MIME
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 26f44cef0..3249bcdeb 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {- |
    Module      : Text.Pandoc.MediaBag
-   Copyright   : Copyright (C) 2014-2015, 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2014-2015, 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index ecd65a54d..92bda36b2 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE TemplateHaskell    #-}
 {- |
    Module      : Text.Pandoc.Options
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index c4080a227..7c0082c29 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.PDF
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index fd14341ad..4c4dd531d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -9,7 +9,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 866972e3f..b896feb7e 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Process
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 9a069f7d0..ac70f7d4c 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Readers
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..956b9f1f7 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.BibTeX
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 45f4d88d4..2958d6180 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CSV
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 9eef498e1..150a837e4 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..30bb19483 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..d3b2dd4d3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.DocBook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..bcf26c4a3 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.Combine
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2020 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..f9c9a8e26 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 8e94a0812..afc7a3e25 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index e28ebe77b..4c069e2c3 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 91639fa4c..6179ea8e7 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
-   Copyright   : © 2006-2020 John MacFarlane,
-                   2020 Albert Krewinkel
+   Copyright   : © 2006-2021 John MacFarlane,
+                   2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.TagCategories
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Types
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..70296bb6b 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..5a5d0ee1e 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 14a41a911..f49323996 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..5f634818e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Lang
-   Copyright   : Copyright (C) 2018-2020 John MacFarlane
+   Copyright   : Copyright (C) 2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index e92ed387c..12a3ba2f6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables   #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..f8c214318 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..5c3a21bb7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d712b1120..cdb746c67 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..927291776 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Metadata
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..9c8bc0374 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..5b8996025 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..afeb27a87 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.BlockStarts
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 17e3ff986..6bd046e04 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ExportSettings
-   Copyright   : © 2016–2020 Albert Krewinkel
+   Copyright   : © 2016-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b819a34a3..68c2ba5e0 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Inlines
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..6621822a2 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Meta
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ParserState
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..d33920d47 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.Org.Parsing
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Shared
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..707af905f 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..4991c6308 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Textile
    Copyright   : Copyright (C) 2010-2012 Paul Rivier
-                               2010-2020 John MacFarlane
+                               2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs
index 67e8b9cd5..d1c38204f 100644
--- a/src/Text/Pandoc/RoffChar.hs
+++ b/src/Text/Pandoc/RoffChar.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.RoffChar
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 061361aba..c9e20cad0 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4853621c8..b908a0172 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 9ea0389c9..a3e550b1f 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Slides
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 0c10b258d..e83f26329 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Templates
-   Copyright   : Copyright (C) 2009-2020 John MacFarlane
+   Copyright   : Copyright (C) 2009-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 200d756f6..0c7d7ab23 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Translations
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 4621e1765..b583dbbdb 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.UTF8
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index ca0df2d0b..12579be90 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.UUID
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 0654c2d85..49531d924 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 0a312d1d1..b4ef7c8b9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.AsciiDoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 66ded218f..8733b7149 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0a6313513..4d44842e2 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.ConTeXt
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index 08310de65..13b95586b 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 8da611b61..58c4bb5be 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Writers.Custom
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index affa0de04..a6776608d 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 65946ec88..a99e13a85 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.Docx
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
index c3c54c7e5..04868eaad 100644
--- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
@@ -2,7 +2,7 @@
 {- |
    Module : Text.Pandoc.Writers.Docx.StyleMap
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015-2019 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 90ec6824f..7df47c912 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.DokuWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Clare Macrae <clare.macrae@googlemail.com>
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index c8f5ff60d..3e3fd8fd6 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.EPUB
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 701ff3d9b..25b1f28d1 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -3,7 +3,7 @@
 {- |
 Module      : Text.Pandoc.Writers.FB2
 Copyright   : Copyright (C) 2011-2012 Sergey Astanin
-                            2012-2020 John MacFarlane
+                            2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 0b051c8f0..26df0325e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs
index d01d5a7e5..2613851c5 100644
--- a/src/Text/Pandoc/Writers/Ipynb.hs
+++ b/src/Text/Pandoc/Writers/Ipynb.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 7058a4557..e8d93b8d5 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index a4d42832d..465480f59 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS.Table
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 8162f3bc0..54ed4a8bd 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.JATS.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index 6bc048a61..c21085a4f 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE PatternGuards #-}
 {- |
    Module      : Text.Pandoc.Writers.Jira
-   Copyright   : © 2010-2020 Albert Krewinkel, John MacFarlane
+   Copyright   : © 2010-2021 Albert Krewinkel, John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2281290c0..df922e17b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
index 61ca41fb1..7b9ce186f 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Caption
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
index 216a7bfc3..f225ef0c5 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Notes.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Notes
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 8411d9f80..6f8386937 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Table
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 4eb0db042..edb70f53e 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Man
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 1b5c00468..d62727d90 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index d1912caa6..5029be69f 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.MediaWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 0fc333bc2..48395c420 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Ms
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 4d4dfca15..9c2ce805d 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Native
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index e41fb7176..05dfad5eb 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.ODT
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index ac991b594..3ac007f4e 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.OOXML
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 810a94775..8c9229fc0 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE CPP               #-}
 {- |
    Module      : Text.Pandoc.Writers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 2af93017d..43ebf1807 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -3,8 +3,8 @@
 {- |
    Module      : Text.Pandoc.Writers.Org
    Copyright   : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
-                   2010-2020 John MacFarlane <jgm@berkeley.edu>
-                   2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+                   2010-2021 John MacFarlane <jgm@berkeley.edu>
+                   2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 8beeef46a..a3be1d723 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index e3966ed07..cf27011c2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.RTF
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs
index 00b027cc9..6af56242f 100644
--- a/src/Text/Pandoc/Writers/Roff.hs
+++ b/src/Text/Pandoc/Writers/Roff.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Roff
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 129e45e9d..fc3f8ff3a 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Shared
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index a9ee5eece..b926c48a1 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE PatternGuards     #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index c6debd9ce..53da70f84 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Texinfo
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
                                2012 Peter Wang
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 61ddb7497..03d030477 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Textile
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 902b093d3..9e45f0417 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.ZimWiki
-   Copyright   : © 2008-2020 John MacFarlane,
+   Copyright   : © 2008-2021 John MacFarlane,
                    2017-2019 Alex Ivkin
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 4b71d7b69..c4e3ed1e7 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.XML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 522c4b3a1..b3e2a0509 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Command
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs
index c9ee6d206..21898d10e 100644
--- a/test/Tests/Helpers.hs
+++ b/test/Tests/Helpers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE FlexibleInstances    #-}
 {- |
    Module      : Tests.Helpers
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs
index 853375327..1dfbbd053 100644
--- a/test/Tests/Lua.hs
+++ b/test/Tests/Lua.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Lua
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs
index e2570e87f..d88633cf8 100644
--- a/test/Tests/Lua/Module.hs
+++ b/test/Tests/Lua/Module.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Tests.Lua.Module
-Copyright   : © 2019-2020 Albert Krewinkel
+Copyright   : © 2019-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 
 Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index cf0396d0a..638620a36 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Old
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs
index 15b826460..1fc0e62d7 100644
--- a/test/Tests/Readers/Creole.hs
+++ b/test/Tests/Readers/Creole.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Tests.Readers.Creole
    Copyright   : © 2017 Sascha Wilde
-                   2017-2020 John MacFarlane
+                   2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Sascha Wilde <wilde@sha-bang.de>
diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs
index 700d6723d..3c75dd08d 100644
--- a/test/Tests/Readers/EPUB.hs
+++ b/test/Tests/Readers/EPUB.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Readers.EPUB
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.eu>
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index e4c681421..578c76860 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.HTML
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index 32b8ecb7c..bf78fe1fe 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.Jira
-   Copyright   : © 2019-2020 Albert Krewinel
+   Copyright   : © 2019-2021 Albert Krewinel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index c50c91ca1..2a52ffd18 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.LaTeX
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index f591aa00d..4f3ab5a28 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Tests.Readers.Man
    Copyright   : © 2018-2019 Yan Pas <yanp.bugz@gmail.com>,
-                   2018-2020 John MacFarlane
+                   2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 3af5e2a94..18f909583 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Markdown
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index 14062c884..f5e427ba2 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Odt
-   Copyright   : © 2015-2020 John MacFarlane
+   Copyright   : © 2015-2021 John MacFarlane
                    2015 Martin Linnemann
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Readers/Org.hs b/test/Tests/Readers/Org.hs
index d4f7bb6dc..290bb603e 100644
--- a/test/Tests/Readers/Org.hs
+++ b/test/Tests/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs
index 995bd0316..2ce07c4bb 100644
--- a/test/Tests/Readers/Org/Block.hs
+++ b/test/Tests/Readers/Org/Block.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index 2648a6e1f..d40c3bc1d 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.CodeBlock
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs
index 56ddde9d8..8822f5b03 100644
--- a/test/Tests/Readers/Org/Block/Figure.hs
+++ b/test/Tests/Readers/Org/Block/Figure.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Figure
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs
index d38d26efb..887055451 100644
--- a/test/Tests/Readers/Org/Block/Header.hs
+++ b/test/Tests/Readers/Org/Block/Header.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index 15373b3b3..ac03c583b 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs
index 31c994d3f..cb38fcc12 100644
--- a/test/Tests/Readers/Org/Block/Table.hs
+++ b/test/Tests/Readers/Org/Block/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Table
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs
index 727a29658..00cb9762b 100644
--- a/test/Tests/Readers/Org/Directive.hs
+++ b/test/Tests/Readers/Org/Directive.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Directive
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index b5d0caa26..13e9fef21 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 792e4559c..87bb3ca75 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Citation
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs
index 5924e69cc..20157d2ae 100644
--- a/test/Tests/Readers/Org/Inline/Note.hs
+++ b/test/Tests/Readers/Org/Inline/Note.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Note
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs
index e9309108e..7fde380af 100644
--- a/test/Tests/Readers/Org/Inline/Smart.hs
+++ b/test/Tests/Readers/Org/Inline/Smart.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Smart
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index bc167f2a5..b30b8949a 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Meta
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs
index 184adee44..4d0848575 100644
--- a/test/Tests/Readers/Org/Shared.hs
+++ b/test/Tests/Readers/Org/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs
index bd9897ebc..68241b7f9 100644
--- a/test/Tests/Readers/RST.hs
+++ b/test/Tests/Readers/RST.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.RST
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs
index 989b7f673..62f336690 100644
--- a/test/Tests/Readers/Txt2Tags.hs
+++ b/test/Tests/Readers/Txt2Tags.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Txt2Tags
-   Copyright   : © 2014-2020 John MacFarlane,
+   Copyright   : © 2014-2021 John MacFarlane,
                  © 2014 Matthew Pickering
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs
index a23edf452..72a59fec0 100644
--- a/test/Tests/Shared.hs
+++ b/test/Tests/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs
index 9383c92ce..9a53aa18c 100644
--- a/trypandoc/trypandoc.hs
+++ b/trypandoc/trypandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2014-2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2014-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-- 
cgit v1.2.3


From fe1378227b24fa6a8661b2e0d377b808eb270c52 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 9 Jan 2021 13:35:07 +0100
Subject: Org reader: allow multiple pipe chars in todo sequences

Additional pipe chars, used to separate "action" state from "no further
action" states, are ignored. E.g., for the following sequence, both
`DONE` and `FINISHED` are states with no further action required.

    #+TODO: UNFINISHED | DONE | FINISHED

Previously, parsing of the todo sequence failed if multiple pipe chars
were included.

Closes: #7014
---
 src/Text/Pandoc/Readers/Org/Meta.hs | 14 ++++++++++----
 test/Tests/Readers/Org/Meta.hs      | 10 ++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 6621822a2..a1b21046a 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -239,7 +239,7 @@ lineOfInlines = do
 todoSequence :: Monad m => OrgParser m TodoSequence
 todoSequence = try $ do
   todoKws <- todoKeywords
-  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+  doneKws <- optionMaybe $ todoDoneSep *> doneKeywords
   newline
   -- There must be at least one DONE keyword. The last TODO keyword is
   -- taken if necessary.
@@ -250,11 +250,17 @@ todoSequence = try $ do
                     (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
 
  where
+   todoKeyword :: Monad m => OrgParser m Text
+   todoKeyword = many1Char nonspaceChar <* skipSpaces
+
    todoKeywords :: Monad m => OrgParser m [Text]
    todoKeywords = try $
-     let keyword = many1Char nonspaceChar <* skipSpaces
-         endOfKeywords = todoDoneSep <|> void newline
-     in manyTill keyword (lookAhead endOfKeywords)
+     let endOfKeywords = todoDoneSep <|> void newline
+     in manyTill todoKeyword (lookAhead endOfKeywords)
+
+   doneKeywords :: Monad m => OrgParser m [Text]
+   doneKeywords = try $
+     manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline)
 
    todoDoneSep :: Monad m => OrgParser m ()
    todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index b30b8949a..3c50f891b 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -116,6 +116,16 @@ tests =
       "#+LANGUAGE: de-DE" =?>
       Pandoc (setMeta "lang" (MetaString "de-DE") nullMeta) mempty
 
+    , testGroup "Todo sequences"
+      [ "not included in document" =:
+        "#+todo: WAITING | FINISHED" =?>
+        Pandoc mempty mempty
+
+      , "can contain multiple pipe characters" =:
+        "#+todo: UNFINISHED | RESEARCH | NOTES | CHART\n" =?>
+        Pandoc mempty mempty
+      ]
+
     , testGroup "LaTeX"
       [ "LATEX_HEADER" =:
         "#+latex_header: \\usepackage{tikz}" =?>
-- 
cgit v1.2.3


From 6efd3460a776620fdb93812daa4f6831e6c332ce Mon Sep 17 00:00:00 2001
From: Gautier DI FOLCO <gautier.difolco@gmail.com>
Date: Sun, 17 Jan 2021 01:15:33 +0100
Subject: Markdown reader: support GitHub wiki's internal links (#2923) (#6458)

Canges overview:

 * Add a `Ext_markdown_github_wikilink` constructor to `Extension` [API change].
 * Add the parser `githubWikiLink` in `Text.Pandoc.Readers.Markdown`
 * Add tests.
---
 src/Text/Pandoc/Extensions.hs       |  3 +++
 src/Text/Pandoc/Readers/Markdown.hs | 25 +++++++++++++++++++++++++
 test/Tests/Readers/Markdown.hs      | 30 ++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 39c2a0489..69257ecc8 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -118,6 +118,7 @@ data Extension =
     | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
     | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown iff
                                   --   container has attribute 'markdown'
+    | Ext_wikilinks -- ^ Interpret a markdown wiki link
     | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
     | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
     | Ext_mmd_link_attributes     -- ^ MMD style reference link attributes
@@ -258,6 +259,7 @@ githubMarkdownExtensions = extensionsFromList
   , Ext_emoji
   , Ext_fenced_code_blocks
   , Ext_backtick_code_blocks
+  , Ext_wikilinks
   ]
 
 -- | Extensions to be used with multimarkdown.
@@ -444,6 +446,7 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_tex_math_single_backslash
        , Ext_tex_math_double_backslash
        , Ext_markdown_attribute
+       , Ext_wikilinks
        , Ext_mmd_title_block
        , Ext_abbreviations
        , Ext_autolink_bare_uris
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5c3a21bb7..8fd0b68e2 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Data.Functor (($>))
 import Data.List (transpose, elemIndex, sortOn)
 import qualified Data.Map as M
 import Data.Maybe
@@ -1485,6 +1486,7 @@ inline = choice [ whitespace
                 , note
                 , cite
                 , bracketedSpan
+                , githubWikiLink
                 , link
                 , image
                 , math
@@ -1780,6 +1782,29 @@ source = do
 linkTitle :: PandocMonad m => MarkdownParser m Text
 linkTitle = quotedTitle '"' <|> quotedTitle '\''
 
+-- Github wiki style link, with optional title
+-- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content
+githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines)
+githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink
+  where
+    wikilink = try $ do
+      string "[["
+      firstPart <- fmap mconcat . sequence <$> wikiText
+      (char '|' *> complexWikilink firstPart)
+        <|> (string "]]" $> (B.link
+                               <$> (stringify <$> firstPart)
+                               <*> return "wikilink"
+                               <*> firstPart))
+
+    complexWikilink firstPart = do
+      url <- fmap stringify . sequence <$> wikiText
+      string "]]"
+      return $ B.link <$> url
+                      <*> return "wikilink"
+                      <*> firstPart
+
+    wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar)
+
 link :: PandocMonad m => MarkdownParser m (F Inlines)
 link = try $ do
   st <- getState
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 18f909583..a2abcb143 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -307,6 +307,36 @@ tests = [ testGroup "inline code"
             "[https://example.org(](url)" =?>
             para (link "url" "" (text "https://example.org("))
           ]
+        , testGroup "Github wiki links"
+          [ test markdownGH "autolink" $
+            "[[https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "https://example.org"))
+          , test markdownGH "link with title" $
+            "[[title|https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "title"))
+          , test markdownGH "bad link with title" $
+            "[[title|random string]]" =?>
+            para (link "random-string" "wikilink" (text "title"))
+          , test markdownGH "autolink not being a link" $
+            "[[Name of page]]" =?>
+            para (link "Name-of-page" "wikilink" (text "Name of page"))
+          , test markdownGH "autolink not being a link with a square bracket" $
+            "[[Name of ]page]]" =?>
+            para (link "Name-of-]page" "wikilink" (text "Name of ]page"))
+          , test markdownGH "formatting (strong and emphasis) should not be interpreted" $
+             "[[***a**b **c**d*|https://example.org]]" =?>
+             para (text "[[" <> emph (strong (str "a") <> str "b" <> space
+                   <> strong (str "c") <> str "d") <> text "|https://example.org]]")
+          , test markdownGH "inlined code should not make a link" $
+            "[[ti`|`le|https://example.org]]" =?>
+            para (text "[[ti" <> code "|" <> text "le|https://example.org]]")
+          , test markdownGH "link with title and a cut should take the middle part as link" $
+            "[[tit|le|https://example.org]]" =?>
+            para (link "le" "wikilink" (text "tit"))
+          , test markdownGH "link with inline start should be a link" $
+            "[[t`i*t_le|https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "t`i*t_le"))
+          ]
         , testGroup "Headers"
           [ "blank line before header" =:
             "\n# Header\n"
-- 
cgit v1.2.3


From c841bcf3b01548b2f9b462b39d8edda4c10be534 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 16 Jan 2021 16:22:04 -0800
Subject: Revert "Markdown reader: support GitHub wiki's internal links (#2923)
 (#6458)"

This reverts commit 6efd3460a776620fdb93812daa4f6831e6c332ce.

Since this extension is designed to be used with
GitHub markdown (gfm), we need to implement the parser
as a commonmark extension (commonmark-extensions),
rather than in pandoc's markdown reader.  When that is
done, we can add it here.
---
 src/Text/Pandoc/Extensions.hs       |  3 ---
 src/Text/Pandoc/Readers/Markdown.hs | 25 -------------------------
 test/Tests/Readers/Markdown.hs      | 30 ------------------------------
 3 files changed, 58 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 69257ecc8..39c2a0489 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -118,7 +118,6 @@ data Extension =
     | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
     | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown iff
                                   --   container has attribute 'markdown'
-    | Ext_wikilinks -- ^ Interpret a markdown wiki link
     | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
     | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
     | Ext_mmd_link_attributes     -- ^ MMD style reference link attributes
@@ -259,7 +258,6 @@ githubMarkdownExtensions = extensionsFromList
   , Ext_emoji
   , Ext_fenced_code_blocks
   , Ext_backtick_code_blocks
-  , Ext_wikilinks
   ]
 
 -- | Extensions to be used with multimarkdown.
@@ -446,7 +444,6 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_tex_math_single_backslash
        , Ext_tex_math_double_backslash
        , Ext_markdown_attribute
-       , Ext_wikilinks
        , Ext_mmd_title_block
        , Ext_abbreviations
        , Ext_autolink_bare_uris
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8fd0b68e2..5c3a21bb7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,7 +21,6 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.Functor (($>))
 import Data.List (transpose, elemIndex, sortOn)
 import qualified Data.Map as M
 import Data.Maybe
@@ -1486,7 +1485,6 @@ inline = choice [ whitespace
                 , note
                 , cite
                 , bracketedSpan
-                , githubWikiLink
                 , link
                 , image
                 , math
@@ -1782,29 +1780,6 @@ source = do
 linkTitle :: PandocMonad m => MarkdownParser m Text
 linkTitle = quotedTitle '"' <|> quotedTitle '\''
 
--- Github wiki style link, with optional title
--- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content
-githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines)
-githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink
-  where
-    wikilink = try $ do
-      string "[["
-      firstPart <- fmap mconcat . sequence <$> wikiText
-      (char '|' *> complexWikilink firstPart)
-        <|> (string "]]" $> (B.link
-                               <$> (stringify <$> firstPart)
-                               <*> return "wikilink"
-                               <*> firstPart))
-
-    complexWikilink firstPart = do
-      url <- fmap stringify . sequence <$> wikiText
-      string "]]"
-      return $ B.link <$> url
-                      <*> return "wikilink"
-                      <*> firstPart
-
-    wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar)
-
 link :: PandocMonad m => MarkdownParser m (F Inlines)
 link = try $ do
   st <- getState
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index a2abcb143..18f909583 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -307,36 +307,6 @@ tests = [ testGroup "inline code"
             "[https://example.org(](url)" =?>
             para (link "url" "" (text "https://example.org("))
           ]
-        , testGroup "Github wiki links"
-          [ test markdownGH "autolink" $
-            "[[https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "https://example.org"))
-          , test markdownGH "link with title" $
-            "[[title|https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "title"))
-          , test markdownGH "bad link with title" $
-            "[[title|random string]]" =?>
-            para (link "random-string" "wikilink" (text "title"))
-          , test markdownGH "autolink not being a link" $
-            "[[Name of page]]" =?>
-            para (link "Name-of-page" "wikilink" (text "Name of page"))
-          , test markdownGH "autolink not being a link with a square bracket" $
-            "[[Name of ]page]]" =?>
-            para (link "Name-of-]page" "wikilink" (text "Name of ]page"))
-          , test markdownGH "formatting (strong and emphasis) should not be interpreted" $
-             "[[***a**b **c**d*|https://example.org]]" =?>
-             para (text "[[" <> emph (strong (str "a") <> str "b" <> space
-                   <> strong (str "c") <> str "d") <> text "|https://example.org]]")
-          , test markdownGH "inlined code should not make a link" $
-            "[[ti`|`le|https://example.org]]" =?>
-            para (text "[[ti" <> code "|" <> text "le|https://example.org]]")
-          , test markdownGH "link with title and a cut should take the middle part as link" $
-            "[[tit|le|https://example.org]]" =?>
-            para (link "le" "wikilink" (text "tit"))
-          , test markdownGH "link with inline start should be a link" $
-            "[[t`i*t_le|https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "t`i*t_le"))
-          ]
         , testGroup "Headers"
           [ "blank line before header" =:
             "\n# Header\n"
-- 
cgit v1.2.3


From 98c2a52b4ee6c833c0a2f2652386cec024e377eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 26 Jan 2021 22:45:57 -0800
Subject: Clean up BibTeX parsing.

Previously there was a messy code path that gave strange
results in some cases, not passing through raw tex but
trying to extract a string content.  This was an artefact
of trying to handle some special bibtex-specific commands
in the BibTeX reader. Now we just handle these in the
LaTeX reader and simplify parsing in the BibTeX reader.
This does mean that more raw tex will be passed through
(and currently this is not sensitive to the `raw_tex`
extension; this should be fixed).

Closes #7049.
---
 src/Text/Pandoc/Citeproc/BibTeX.hs | 33 +--------------------------------
 src/Text/Pandoc/Readers/LaTeX.hs   | 18 ++++++++++++++++++
 test/command/biblatex-cotton.md    |  6 +++---
 test/command/biblatex-murray.md    |  5 +++--
 4 files changed, 25 insertions(+), 37 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 10730a1e9..5b9068378 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -750,41 +750,10 @@ blocksToInlines bs =
        _          -> B.fromList $ Walk.query (:[]) bs
 
 adjustSpans :: Lang -> Inline -> Inline
-adjustSpans lang (RawInline (Format "latex") s)
-  | s == "\\hyphen" || s == "\\hyphen " = Str "-"
-  | otherwise = parseRawLaTeX lang s
+adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s
 adjustSpans _ SoftBreak = Space
 adjustSpans _ x = x
 
-parseRawLaTeX :: Lang -> Text -> Inline
-parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
-  case parseLaTeX lang contents of
-          Right [Para ys]  -> f command ys
-          Right [Plain ys] -> f command ys
-          Right []         -> f command []
-          _                -> RawInline (Format "latex") t
-   where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
-         command  = T.strip command'
-         contents = T.drop 1 $ T.dropEnd 1 contents'
-         f "mkbibquote"    ils = Span nullAttr [Quoted DoubleQuote ils]
-         f "mkbibemph"     ils = Span nullAttr [Emph ils]
-         f "mkbibitalic"   ils = Span nullAttr [Emph ils]
-         f "mkbibbold"     ils = Span nullAttr [Strong ils]
-         f "mkbibparens"   ils = Span nullAttr $
-                                  [Str "("] ++ ils ++ [Str ")"]
-         f "mkbibbrackets" ils = Span nullAttr $
-                                  [Str "["] ++ ils ++ [Str "]"]
-         -- ... both should be nestable & should work in year fields
-         f "autocap"    ils    = Span nullAttr ils
-           -- TODO: should work in year fields
-         f "textnormal" ils    = Span ("",["nodecor"],[]) ils
-         f "bibstring" [Str s] = Str $ resolveKey' lang s
-         f "adddot"    []      = Str "."
-         f "adddotspace" []    = Span nullAttr [Str ".", Space]
-         f "addabbrvspace" []  = Space
-         f _            ils    = Span nullAttr ils
-parseRawLaTeX _ t = RawInline (Format "latex") t
-
 latex' :: Text -> Bib [Block]
 latex' t = do
   lang <- gets localeLang
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f49323996..91c71c000 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -940,6 +940,24 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("uline", underline <$> tok)
   -- plain tex stuff that should just be passed through as raw tex
   , ("ifdim", ifdim)
+  -- bibtex
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
   ]
 
 accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
diff --git a/test/command/biblatex-cotton.md b/test/command/biblatex-cotton.md
index 51c3ee2f4..7a7a2e296 100644
--- a/test/command/biblatex-cotton.md
+++ b/test/command/biblatex-cotton.md
@@ -43,9 +43,9 @@ Cotton, F. A., Wilkinson, G., Murillio, C. A., & Bochmann, M. (1999).
 ---
 nocite: "[@*]"
 references:
-- annote: A book entry with author authors and an edition field. By
-    default, long author and editor lists are automatically truncated.
-    This is configurable
+- annote: A book entry with `\arabic{author}`{=latex} authors and an
+    edition field. By default, long author and editor lists are
+    automatically truncated. This is configurable
   author:
   - family: Cotton
     given: Frank Albert
diff --git a/test/command/biblatex-murray.md b/test/command/biblatex-murray.md
index 78081c6fe..8242e4822 100644
--- a/test/command/biblatex-murray.md
+++ b/test/command/biblatex-murray.md
@@ -55,8 +55,9 @@ properties as a function of core size. *Langmuir*, *14*(1), 17–30.
 ---
 nocite: "[@*]"
 references:
-- annote: An article entry with author authors. By default, long author
-    and editor lists are automatically truncated. This is configurable
+- annote: An article entry with `\arabic{author}`{=latex} authors. By
+    default, long author and editor lists are automatically truncated.
+    This is configurable
   author:
   - family: Hostetler
     given: Michael J.
-- 
cgit v1.2.3


From d1875b69ec136c19d9b2336ff57e360ff7b7bef6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 31 Jan 2021 12:05:46 -0800
Subject: RST reader: fix handling of header in CSV tables.

The interpretation of this line is not affected
by the delim option. Closes #7064.
---
 src/Text/Pandoc/Readers/RST.hs |  9 +++++----
 test/command/7064.md           | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 test/command/7064.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 707af905f..0f32d993c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -877,10 +877,11 @@ csvTableDirective top fields rawcsv = do
                     (bs, _) <- fetchItem u
                     return $ UTF8.toText bs
                   Nothing -> return rawcsv
-  let res = parseCSV opts (case explicitHeader of
-                              Just h  -> h <> "\n" <> rawcsv'
-                              Nothing -> rawcsv')
-  case res of
+  let header' = case explicitHeader of
+                  Just h  -> parseCSV defaultCSVOptions h
+                  Nothing -> Right []
+  let res = parseCSV opts rawcsv'
+  case (<>) <$> header' <*> res of
        Left e  ->
          throwError $ PandocParsecError "csv table" e
        Right rawrows -> do
diff --git a/test/command/7064.md b/test/command/7064.md
new file mode 100644
index 000000000..58b72e363
--- /dev/null
+++ b/test/command/7064.md
@@ -0,0 +1,32 @@
+```
+% pandoc -f rst -t html
+.. csv-table:: Changes
+    :header: "Version", "Date", "Description"
+    :widths: 15, 15, 70
+    :delim: $
+
+    0.1.0 $ 18/02/2013 $ Initial Release
+^D
+<table>
+<caption>Changes</caption>
+<colgroup>
+<col style="width: 15%" />
+<col style="width: 15%" />
+<col style="width: 70%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th>Version</th>
+<th>Date</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>0.1.0</td>
+<td>18/02/2013</td>
+<td>Initial Release</td>
+</tr>
+</tbody>
+</table>
+```
-- 
cgit v1.2.3


From 8e9131db4edfda6deafdf94939b907e4dce3cbaa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 6 Feb 2021 21:52:12 -0800
Subject: Markdown reader: improved handling of mmd link attributes in
 references.

Previously they only worked for links that had titles.  Closes #7080.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 ++
 test/command/7080.md                | 8 ++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 test/command/7080.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5c3a21bb7..e46553dd8 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -334,6 +334,8 @@ referenceKey = try $ do
                     skipMany spaceChar
                     notFollowedBy' referenceTitle
                     notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+                    notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
+                                     try (spnl <* keyValAttr)
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
diff --git a/test/command/7080.md b/test/command/7080.md
new file mode 100644
index 000000000..0727c1083
--- /dev/null
+++ b/test/command/7080.md
@@ -0,0 +1,8 @@
+```
+% pandoc -f markdown_mmd -t native
+![][image]
+
+[image]: image.png width=100px height=150px
+^D
+[Para [Image ("",[],[("width","100px"),("height","150px")]) [] ("image.png","")]]
+```
-- 
cgit v1.2.3


From 69b7401e31216ae52164950970cf88b778d8d198 Mon Sep 17 00:00:00 2001
From: Nils Carlson <nils@nilscarlson.se>
Date: Mon, 8 Feb 2021 17:36:58 +0000
Subject: DocBook reader: Support informalfigure (#7079)

Add support for informalfigure.
---
 src/Text/Pandoc/Readers/DocBook.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d3b2dd4d3..ada3e98ec 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -206,7 +206,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] info - A wrapper for information about a component or other block. (DocBook v5)
 [x] informalequation - A displayed mathematical equation without a title
 [x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
+[x] informalfigure - An untitled figure
 [ ] informaltable - A table without a title
 [ ] initializer - The initializer for a FieldSynopsis
 [x] inlineequation - A mathematical equation or expression occurring inline
@@ -669,6 +669,7 @@ blockTags =
   , "index"
   , "info"
   , "informalexample"
+  , "informalfigure"
   , "informaltable"
   , "itemizedlist"
   , "linegroup"
@@ -855,6 +856,7 @@ parseBlock (Elem e) =
         "variablelist" -> definitionList <$> deflistitems
         "procedure" -> bulletList <$> steps
         "figure" -> getFigure e
+        "informalfigure" -> getFigure e
         "mediaobject" -> para <$> getMediaobject e
         "caption" -> skip
         "info" -> addMetadataFromElement e
-- 
cgit v1.2.3


From 5cd1c1001fbfc768fdb146f14ae056a9a7ba94eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 09:15:43 -0800
Subject: ODT reader: give more information if zip can't be unpacked.

---
 src/Text/Pandoc/Readers/Odt.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 7b8bfd4b5..9b66b60ec 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -23,6 +23,8 @@ import System.FilePath
 
 import Control.Monad.Except (throwError)
 
+import qualified Data.Text as T
+
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -60,7 +62,8 @@ readOdt' _ bytes = bytesToOdt bytes-- of
 bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
 bytesToOdt bytes = case toArchiveOrFail bytes of
   Right archive -> archiveToOdt archive
-  Left _        -> Left $ PandocParseError "Couldn't parse odt file."
+  Left err      -> Left $ PandocParseError
+                        $ "Could not unzip ODT: " <> T.pack err
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-- 
cgit v1.2.3


From f70795dc5e431c0132acc8bb2d20bb5dec942de7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 09:39:29 -0800
Subject: ODT reader: finer-grained errors on parse failure.

See #7091.
---
 src/Text/Pandoc/Readers/Odt.hs | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 9b66b60ec..9943d3147 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt
@@ -67,29 +66,27 @@ bytesToOdt bytes = case toArchiveOrFail bytes of
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
-  | Just contentEntry      <- findEntryByPath "content.xml" archive
-  , Just stylesEntry       <- findEntryByPath "styles.xml"  archive
-  , Just contentElem       <- entryToXmlElem contentEntry
-  , Just stylesElem        <- entryToXmlElem stylesEntry
-  , Right styles           <- chooseMax (readStylesAt stylesElem )
-                                        (readStylesAt contentElem)
-  , media                  <- filteredFilesFromArchive archive filePathIsOdtMedia
-  , startState             <- readerState styles media
-  , Right pandocWithMedia  <- runConverter' read_body
-                                            startState
-                                            contentElem
-
-  = Right pandocWithMedia
-
-  | otherwise
-    -- Not very detailed, but I don't think more information would be helpful
-  = Left $ PandocParseError "Couldn't parse odt file."
-    where
-      filePathIsOdtMedia :: FilePath -> Bool
+archiveToOdt archive = either (Left. PandocParseError) Right $ do
+  let onFailure msg Nothing = Left msg
+      onFailure _   (Just x) = Right x
+  contentEntry <- onFailure "Could not find content.xml"
+                   (findEntryByPath "content.xml" archive)
+  stylesEntry <- onFailure "Could not find styles.xml"
+                   (findEntryByPath "styles.xml" archive)
+  contentElem <- onFailure "Could not find content element"
+                   (entryToXmlElem contentEntry)
+  stylesElem <- onFailure "Could not find styles element"
+                   (entryToXmlElem stylesEntry)
+  styles <- either (\_ -> Left "Could not read styles") Right
+               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
+  let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
         let (dir, name) = splitFileName fp
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
+  let media = filteredFilesFromArchive archive filePathIsOdtMedia
+  let startState = readerState styles media
+  either (\_ -> Left "Could not convert opendocument") Right
+    (runConverter' read_body startState contentElem)
 
 
 --
-- 
cgit v1.2.3


From 8ca191604dcd13af27c11d2da225da646ebce6fc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 23:35:19 -0800
Subject: Add new unexported module T.P.XMLParser.

This exports functions that uses xml-conduit's parser to
produce an xml-light Element or [Content].  This allows
existing pandoc code to use a better parser without
much modification.

The new parser is used in all places where xml-light's
parser was previously used.  Benchmarks show a significant
performance improvement in parsing XML-based formats
(especially ODT and FB2).

Note that the xml-light types use String, so the
conversion from xml-conduit types involves a lot
of extra allocation.  It would be desirable to
avoid that in the future by gradually switching
to using xml-conduit directly. This can be done
module by module.

The new parser also reports errors, which we report
when possible.

A new constructor PandocXMLError has been added to
PandocError in T.P.Error [API change].

Closes #7091, which was the main stimulus.

These changes revealed the need for some changes
in the tests.  The docbook-reader.docbook test
lacked definitions for the entities it used; these
have been added. And the docx golden tests have been
updated, because the new parser does not preserve
the order of attributes.

Add entity defs to docbook-reader.docbook.

Update golden tests for docx.
---
 MANUAL.txt                                         |   1 +
 pandoc.cabal                                       |   2 +
 src/Text/Pandoc/Error.hs                           |   3 +
 src/Text/Pandoc/ImageSize.hs                       |   5 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  52 ++++++++++------
 src/Text/Pandoc/Readers/Docx/Parse.hs              |  21 ++++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  28 +++++----
 src/Text/Pandoc/Readers/EPUB.hs                    |  17 ++++--
 src/Text/Pandoc/Readers/FB2.hs                     |  10 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |   9 ++-
 src/Text/Pandoc/Readers/OPML.hs                    |  10 +++-
 src/Text/Pandoc/Readers/Odt.hs                     |  24 ++++----
 src/Text/Pandoc/Writers/EPUB.hs                    |  13 +++-
 src/Text/Pandoc/Writers/FB2.hs                     |  11 +++-
 src/Text/Pandoc/Writers/ODT.hs                     |  29 +++++----
 src/Text/Pandoc/Writers/OOXML.hs                   |   9 +--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       |   4 +-
 src/Text/Pandoc/XMLParser.hs                       |  66 +++++++++++++++++++++
 test/Tests/Readers/JATS.hs                         |   1 +
 test/command/5321.md                               |   4 +-
 test/docbook-reader.docbook                        |   7 ++-
 test/docx/golden/block_quotes.docx                 | Bin 10092 -> 10098 bytes
 test/docx/golden/codeblock.docx                    | Bin 9944 -> 9950 bytes
 test/docx/golden/comments.docx                     | Bin 10279 -> 10285 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10042 -> 10048 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10666 -> 10673 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12434 bytes
 test/docx/golden/definition_list.docx              | Bin 9941 -> 9947 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9947 -> 9953 bytes
 test/docx/golden/document-properties.docx          | Bin 10423 -> 10429 bytes
 test/docx/golden/headers.docx                      | Bin 10080 -> 10086 bytes
 test/docx/golden/image.docx                        | Bin 26758 -> 26764 bytes
 test/docx/golden/inline_code.docx                  | Bin 9880 -> 9886 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10060 -> 10066 bytes
 test/docx/golden/inline_images.docx                | Bin 26816 -> 26822 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10101 -> 10107 bytes
 test/docx/golden/links.docx                        | Bin 10276 -> 10282 bytes
 test/docx/golden/lists.docx                        | Bin 10352 -> 10358 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10143 -> 10149 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10232 -> 10238 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10144 -> 10150 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10239 -> 10245 bytes
 test/docx/golden/notes.docx                        | Bin 10046 -> 10052 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9980 -> 9986 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10115 -> 10121 bytes
 test/docx/golden/table_one_row.docx                | Bin 9932 -> 9938 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10249 -> 10255 bytes
 test/docx/golden/tables.docx                       | Bin 10266 -> 10272 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9924 -> 9930 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9907 -> 9913 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9941 -> 9947 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10053 -> 10059 bytes
 test/docx/golden/unicode.docx                      | Bin 9865 -> 9871 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9913 -> 9919 bytes
 test/jats-reader.native                            |   2 +-
 test/jats-reader.xml                               |   1 +
 test/pptx/code-custom.pptx                         | Bin 28230 -> 28221 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395524 -> 395516 bytes
 test/pptx/code.pptx                                | Bin 28229 -> 28220 bytes
 test/pptx/code_templated.pptx                      | Bin 395522 -> 395514 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27012 -> 27004 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394298 -> 394288 bytes
 test/pptx/document-properties.pptx                 | Bin 27417 -> 27408 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394701 -> 394691 bytes
 test/pptx/endnotes.pptx                            | Bin 26969 -> 26962 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394262 -> 394253 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27892 -> 27789 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395186 -> 395083 bytes
 test/pptx/images.pptx                              | Bin 44626 -> 44619 bytes
 test/pptx/images_templated.pptx                    | Bin 411916 -> 411909 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26156 -> 26148 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393447 -> 393438 bytes
 test/pptx/lists.pptx                               | Bin 27056 -> 27049 bytes
 test/pptx/lists_templated.pptx                     | Bin 394349 -> 394340 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26948 -> 26940 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394240 -> 394231 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44073 -> 44065 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411359 -> 411352 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28582 -> 28575 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27751 -> 27744 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395045 -> 395038 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395875 -> 395868 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29539 -> 29532 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396833 -> 396826 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35444 -> 35436 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31683 -> 31675 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398964 -> 398955 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30700 -> 30691 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397988 -> 397979 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51612 -> 51604 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418903 -> 418896 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402736 -> 402728 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27031 -> 27023 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394323 -> 394314 bytes
 test/pptx/tables.pptx                              | Bin 27573 -> 27566 bytes
 test/pptx/tables_templated.pptx                    | Bin 394868 -> 394859 bytes
 test/pptx/two_column.pptx                          | Bin 26075 -> 26065 bytes
 test/pptx/two_column_templated.pptx                | Bin 393366 -> 393355 bytes
 98 files changed, 238 insertions(+), 91 deletions(-)
 create mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 5b90d039a..dc3b4ca77 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings:
    24 PandocCiteprocError
    31 PandocEpubSubdirectoryError
    43 PandocPDFError
+   44 PandocXMLError
    47 PandocPDFProgramNotFoundError
    61 PandocHttpError
    62 PandocShouldNeverHappenError
diff --git a/pandoc.cabal b/pandoc.cabal
index 72e7c2da5..e56456c68 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -493,6 +493,7 @@ library
                  unicode-transforms    >= 0.3      && < 0.4,
                  unordered-containers  >= 0.2      && < 0.3,
                  xml                   >= 1.3.12   && < 1.4,
+                 xml-conduit           >= 1.7      && < 1.10,
                  zip-archive           >= 0.2.3.4  && < 0.5,
                  zlib                  >= 0.5      && < 0.7
   if os(windows) && arch(i386)
@@ -686,6 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
+                   Text.Pandoc.XMLParser,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 204cf15ca..831405f42 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError
                  | PandocFailOnWarningError
                  | PandocPDFProgramNotFoundError Text
                  | PandocPDFError Text
+                 | PandocXMLError Text Text
                  | PandocFilterError Text Text
                  | PandocLuaError Text
                  | PandocCouldNotFindDataFileError Text
@@ -103,6 +104,8 @@ handleError (Left e) =
     PandocPDFProgramNotFoundError pdfprog -> err 47 $
         pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
     PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <>
+        (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
     PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
         filtername <> ":\n" <> msg
     PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e19958f6a..e0a1af8e8 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -45,7 +45,9 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
+import Text.Pandoc.XMLParser (parseXMLElement)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Control.Applicative
 import qualified Data.Attoparsec.ByteString.Char8 as A
@@ -327,7 +329,8 @@ getSize img =
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
-  doc <- Xml.parseXMLDoc $ UTF8.toString img
+  doc <- either (const mzero) return $ parseXMLElement
+                                     $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
         vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
         [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ada3e98ec..ad0108843 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, toUpper, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
@@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 {-
 
@@ -537,22 +541,25 @@ instance Default DBState where
 
 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)
 
 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@@ -892,7 +899,11 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          codeBlockWithLang = do
@@ -964,7 +975,7 @@ parseBlock (Elem e) =
                                      cs -> map toAlignment cs
                       let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                    || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                         Just d  -> case attrValue "table-width" d of
                                                      "" -> 1.0
                                                      w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1165,12 +1176,15 @@ parseInline (Elem e) =
         "title" -> return mempty
         "affiliation" -> skip
         -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          innerInlines f = extractSpaces f . mconcat <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..056dab6c2 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
 import Text.XML.Light
 import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do
     Right doc -> Right (Docx doc, stateWarnings st)
     Left e    -> Left e
 
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
   entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
   let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
   rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                        Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   docPath <- asks envDocXmlPath
   entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
   let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
@@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
   let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
@@ -415,7 +422,7 @@ archiveToNotes zf =
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
       cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
   in
@@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+  , Just relElems <- parseXMLFromEntry entry =
   mapMaybe (relElemToRelationship relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
@@ -527,7 +534,7 @@ archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
     Nothing -> Just $ Numbering [] [] []
     Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
           numElems = findChildrenByName namespaces "w" "num" numberingElem
           absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..edade8654 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -53,6 +53,7 @@ import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                            , rParentStyle = Nothing
                            }
 
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)
 
 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..369c4f0c9 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB
   (readEPUB)
   where
 
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                           toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
@@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type Items = M.Map String (FilePath, MimeType)
 
@@ -181,7 +182,7 @@ renameMeta s         = T.pack s
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
   metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
   let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
@@ -190,7 +191,7 @@ getManifest archive = do
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest
 
 -- Fixup
 
@@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
   mkE ("No entry on path: " ++ path) $ findEntryByPath path a
 
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
 findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..b804eab4f 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -32,6 +32,7 @@ import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -42,6 +43,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type FB2 m = StateT FB2State m
 
@@ -64,10 +66,10 @@ instance HasMeta FB2State where
 
 readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
       let authors = if null $ fb2Authors st
                     then id
                     else setMeta "author" (map text $ reverse $ fb2Authors st)
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..dfd343b7a 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
 import Data.Char (isDigit, isSpace, toUpper)
 import Data.Default
 import Data.Generics
@@ -22,6 +24,7 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -29,6 +32,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -51,8 +55,9 @@ instance Default JATSState where
 
 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  tree <- either (throwError . PandocXMLError "")
+                 (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict $ crFilter inp)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5b8996025..bdadc4dd9 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -19,14 +19,18 @@ import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
+import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
 
@@ -49,8 +53,10 @@ instance Default OPMLState where
 readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readOPML opts inp  = do
   (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                 def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
     setAuthors (opmlDocAuthors st') $
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 9943d3147..85308deb1 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
 import qualified Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 import qualified Data.ByteString.Lazy as B
 
@@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive = either (Left. PandocParseError) Right $ do
-  let onFailure msg Nothing = Left msg
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
       onFailure _   (Just x) = Right x
   contentEntry <- onFailure "Could not find content.xml"
                    (findEntryByPath "content.xml" archive)
   stylesEntry <- onFailure "Could not find styles.xml"
                    (findEntryByPath "styles.xml" archive)
-  contentElem <- onFailure "Could not find content element"
-                   (entryToXmlElem contentEntry)
-  stylesElem <- onFailure "Could not find styles element"
-                   (entryToXmlElem stylesEntry)
-  styles <- either (\_ -> Left "Could not read styles") Right
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
                (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
   let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
@@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
   let media = filteredFilesFromArchive archive filePathIsOdtMedia
   let startState = readerState styles media
-  either (\_ -> Left "Could not convert opendocument") Right
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
     (runConverter' read_body startState contentElem)
 
 
 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f16f6772..e99fa2567 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
 import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems, parseXML,
+                       add_attrs, lookupAttr, node, onlyElems,
                        ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XMLParser (parseXMLContents)
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -160,7 +161,12 @@ mkEntry path content = do
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
-  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+  elts <- case writerEpubMetadata opts of
+            Nothing -> return []
+            Just t -> case parseXMLContents (TL.fromStrict t) of
+                          Left msg -> throwError $
+                            PandocXMLError "epub metadata" msg
+                          Right ns -> return (onlyElems ns)
   let md' = foldr addMetadataFromXML md elts
   let addIdentifier m =
        if null (epubIdentifier m)
@@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do
                                             : case subs of
                                                  []    -> []
                                                  (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-          where titElements = parseXML titRendered
+          where titElements = either (const []) id $
+                                parseXMLContents (TL.fromStrict titRendered)
                 titRendered = case P.runPure
                                (writeHtmlStringForEPUB version
                                  opts{ writerTemplate = Nothing
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 25b1f28d1..9334d6e9a 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see:
 module Text.Pandoc.Writers.FB2 (writeFB2)  where
 
 import Control.Monad (zipWithM)
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
@@ -27,16 +27,18 @@ import Data.Either (lefts, rights)
 import Data.List (intercalate)
 import Data.Text (Text, pack)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
 import Text.XML.Light
 import qualified Text.XML.Light as X
 import qualified Text.XML.Light.Cursor as XC
-import qualified Text.XML.Light.Input as XI
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
@@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
                              map (el "p" . el "code" . T.unpack) . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
-    then return $ XI.parseXML str
+    then
+      case parseXMLContents (TL.fromStrict str) of
+         Left msg  -> throwError $ PandocXMLError "" msg
+         Right nds -> return nds
     else return []
 blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 05dfad5eb..a32ff618c 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Codec.Archive.Zip
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
 import Data.Generics (everywhere', mkT)
@@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
@@ -35,10 +36,11 @@ import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
                                    fixDisplayMath)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.TeXMath
 import Text.XML.Light
 
@@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
 updateStyleWithLang Nothing arch = return arch
 updateStyleWithLang (Just lang) arch = do
   epochtime <- floor `fmap` lift P.getPOSIXTime
-  return arch{ zEntries = [if eRelativePath e == "styles.xml"
-                              then case parseXMLDoc
-                                      (toStringLazy (fromEntry e)) of
-                                      Nothing -> e
-                                      Just d ->
-                                        toEntry "styles.xml" epochtime
-                                        ( fromStringLazy
-                                        . ppTopElement
-                                        . addLang lang $ d )
-                              else e
-                            | e <- zEntries arch] }
+  entries <- mapM (\e -> if eRelativePath e == "styles.xml"
+                            then case parseXMLElement
+                                    (toTextLazy (fromEntry e)) of
+                                    Left msg -> throwError $
+                                        PandocXMLError "styles.xml" msg
+                                    Right d -> return $
+                                      toEntry "styles.xml" epochtime
+                                      ( fromStringLazy
+                                      . ppTopElement
+                                      . addLang lang $ d )
+                            else return e) (zEntries arch)
+  return arch{ zEntries = entries }
 
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 3ac007f4e..8f60e70d5 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -35,6 +35,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 mknode :: Node t => String -> [(String,String)] -> t -> Element
 mknode s attrs =
@@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath =
          findEntryByPath relpath distArchive of
             Nothing -> throwError $ PandocSomeError $
                         T.pack relpath <> " missing in reference file"
-            Just e  -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
-                       Nothing -> throwError $ PandocSomeError $
-                                   T.pack relpath <> " corrupt in reference file"
-                       Just d  -> return d
+            Just e  -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of
+                       Left msg ->
+                         throwError $ PandocXMLError (T.pack relpath) msg
+                       Right d  -> return d
 
 -- Copied from Util
 
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 8554db622..cd092969b 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
 getPresentationSize refArchive distArchive = do
   entry <- findEntryByPath "ppt/presentation.xml" refArchive  `mplus`
            findEntryByPath "ppt/presentation.xml" distArchive
-  presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+  presElement <- either (const Nothing) return $
+                   parseXMLElement $ UTF8.toTextLazy $ fromEntry entry
   let ns = elemToNameSpaces presElement
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
new file mode 100644
index 000000000..8ad22a66a
--- /dev/null
+++ b/src/Text/Pandoc/XMLParser.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XMLParser
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Bridge to allow using xml-conduit's parser with xml-light's types.
+-}
+module Text.Pandoc.XMLParser
+  ( parseXMLElement
+  , parseXMLContents
+  , module Text.XML.Light.Types
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import qualified Text.XML.Light as Light
+import Text.XML.Light.Types
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe)
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Light.Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Light.Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing | "xmlns:" `T.isPrefixOf` localName ->
+           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
+                       (Just "xmlns")
+      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
+                       (T.unpack <$> mbpref)
+
+nodeToContent :: Conduit.Node -> Maybe Light.Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Light.Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
+nodeToContent _ = Nothing
+
diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs
index 525499c86..a9c9a0586 100644
--- a/test/Tests/Readers/JATS.hs
+++ b/test/Tests/Readers/JATS.hs
@@ -88,6 +88,7 @@ tests = [ testGroup "inline code"
             "<p>\n\
             \  <inline-formula><alternatives>\n\
             \  <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
+            \  </alternatives></inline-formula>\n\
             \</p>"
             =?> para (math "\\sigma|_{\\{x\\}}")
           , test jats "math ml only" $
diff --git a/test/command/5321.md b/test/command/5321.md
index 081abe2a0..83404632a 100644
--- a/test/command/5321.md
+++ b/test/command/5321.md
@@ -4,7 +4,7 @@
   <caption>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]]
@@ -17,7 +17,7 @@
     <title>foo</title>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]]
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 02568d8de..5717d78d0 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -1,6 +1,11 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
-                  "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"
+[
+<!ENTITY GHC "GHC" >
+<!ENTITY let "LET" >
+<!ENTITY case "CASE" >
+]>
 <article>
   <articleinfo>
     <title>Pandoc Test Suite</title>
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index 3e1bf16e7..d3b16d0f2 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 66f055063..6293ef493 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index fb3a02a0a..4205a1516 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index bc6c2702a..adb3f23db 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 8c555a5bd..92c8137fe 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 5f96cc911..f53470617 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index c21b3a5b3..d6af90a72 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 92ce144e9..e18dbe853 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index d21b67309..820299043 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index 3558a47bf..ae0f41d12 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 606df92a3..94cd35dfa 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 759269cac..879f2a25b 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index c37777080..93f86478f 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 9450b1a73..967d297f2 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 6f0b830e6..c5614e2fa 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index e53889cfb..0f39a831f 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 5dbe298b7..07046f223 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 194181288..3656618e6 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 6e0b634f7..8798253d5 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 477178e77..0a24d1840 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 51110356e..52bb7a217 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index b6206cdf5..182c06c64 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 07b576080..7b69a56a3 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index d46095eb7..3d3a35701 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 7caba4e93..5ae37b406 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 6aaa6da61..c29aa6716 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 5746c5ad0..664493246 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 5f22dccc6..b6d15340e 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index ab5c4f56d..f8e1092d2 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 085f33162..b4cda82f2 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 1ac86d5c8..ee222efa0 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c2c443b19..c6f8d9c96 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 5ea18d32e..ea8146690 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/jats-reader.native b/test/jats-reader.native
index ab77dd1a0..0715ea8cc 100644
--- a/test/jats-reader.native
+++ b/test/jats-reader.native
@@ -1,4 +1,4 @@
-Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
+Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
 [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
diff --git a/test/jats-reader.xml b/test/jats-reader.xml
index f75b3e95a..f33cb9ab3 100644
--- a/test/jats-reader.xml
+++ b/test/jats-reader.xml
@@ -20,6 +20,7 @@
       <surname>MacFarlane</surname>
       <given-names>John</given-names>
     </name>
+  </contrib>
   <contrib contrib-type="author">
     <name>
       <surname>Anonymous</surname>
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index aa9b7692a..58070eb3f 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index 9aaef4cb5..db9b7e371 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index 1737ec757..c7b1ed7d5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 87fb560ef..6944d92bf 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index 961c31020..ae0d28429 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 894738ef7..37c74c69a 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 188e8d826..324e443a1 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index 253e8c0a7..c81b983e3 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index e230420d2..30ce33db6 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index 49384fd65..d6c604968 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index cdf1be4ad..000e17ecd 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index c4fcbad45..fdcd2e29b 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index 4a13b5b7f..e73126376 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index 7a6e9700e..e3f968e9e 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index 926c8ff3f..eadb9372e 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 16f48e182..8ca6bab2b 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index f47b17a74..ae188ee68 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 88109a95e..60301fa50 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 84020708f..17124a50d 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index a2f77e945..19ae7dd4e 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index 48bf7bc8a..b650b7585 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 23b134a5f..0ab029614 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index d6eebeffb..2a6e35080 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a6c76a187..a7bcf6a4b 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 1fbde815b..21b018c25 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index cb3af4aa1..4ec4772a4 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index dff386885..5983657b6 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index 43b125f5e..dd54c7082 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index 3314a1c65..b3e5ed5b9 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 27a136838..1078854bb 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 7aa3b6a87..5116c6c4e 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index d43709ca7..0c8e49bd9 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 793ea10f6..68695939d 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 2f4d3b820..7ed9b946d 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 94a221398..79fc82345 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 22040c88c..9f943c279 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index 18477380b..ac72d8ced 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 4b9d0ba4d..15c7b5469 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 1c5b54185..926c5e699 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index 1314f4de4..a37e72d2c 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 9018be36e..7f86533fe 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 35e93af67..89e3db0ab 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From 390d5e65b2d66078b2f9b9db142dbe2167ea29a9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 10 Feb 2021 23:13:33 -0800
Subject: Use getTimestamp instead of getCurrentTime in writers.

Setting SOURCE_DATE_EPOCH will allow reproducible builds.

Partially addresses #7093.  This does not suffice to fully enable
reproducible in EPUB, since a unique id is being generated for each
build.
---
 src/Text/Pandoc/Readers/RST.hs               | 4 ++--
 src/Text/Pandoc/Writers/Docx.hs              | 2 +-
 src/Text/Pandoc/Writers/EPUB.hs              | 4 ++--
 src/Text/Pandoc/Writers/ODT.hs               | 2 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 0f32d993c..29f81b046 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -28,7 +28,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getCurrentTime)
+                                      readFileFromDirs, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -685,7 +685,7 @@ directive' = do
         "replace" -> B.para <$>  -- consumed by substKey
                    parseInlineFromText (trim top)
         "date" -> B.para <$> do  -- consumed by substKey
-                     t <- getCurrentTime
+                     t <- getTimestamp
                      let format = case T.unpack (T.strip top) of
                                     [] -> "%Y-%m-%d"
                                     x  -> x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 8f498775d..da990e4d3 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -217,7 +217,7 @@ writeDocx opts doc = do
   let doc' = Pandoc meta blocks'
 
   username <- P.lookupEnv "USERNAME"
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   oldUserDataDir <- P.getUserDataDir
   P.setUserDataDir Nothing
   res <- P.readDefaultDataFile "reference.docx"
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index e99fa2567..171ffe582 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -191,7 +191,7 @@ getEPUBMetadata opts meta = do
   let fixDate m =
        if null (epubDate m)
           then do
-            currentTime <- lift P.getCurrentTime
+            currentTime <- lift P.getTimestamp
             return $ m{ epubDate = [ Date{
                              dateText = showDateTimeISO8601 currentTime
                            , dateEvent = Nothing } ] }
@@ -709,7 +709,7 @@ pandocToEPUB version opts doc = do
   uuid <- case epubIdentifier metadata of
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
-  currentTime <- lift P.getCurrentTime
+  currentTime <- lift P.getTimestamp
   let contentsData = UTF8.fromStringLazy $ ppTopElement $
         unode "package" !
           ([("version", case version of
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index a32ff618c..06369b4db 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -68,7 +68,7 @@ pandocToODT :: PandocMonad m
 pandocToODT opts doc@(Pandoc meta _) = do
   let title = docTitle meta
   let authors = docAuthors meta
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   lang <- toLang (getLang opts meta)
   refArchive <-
        case writerReferenceDoc opts of
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index cd092969b..0a7060895 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -312,7 +312,7 @@ presentationToArchive opts pres = do
                      Nothing -> toArchive . BL.fromStrict <$>
                         P.readDataFile "reference.pptx"
 
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
 
   presSize <- case getPresentationSize refArchive distArchive of
                 Just sz -> return sz
-- 
cgit v1.2.3


From d9322629a36ad50035912ee56df876c345039225 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Feb 2021 17:45:05 -0800
Subject: LaTeX reader improvements.

* Rewrote `withRaw` so it doesn't rely on fragile assumptions
  about token positions (which break when macros are expanded).
  This requires the addition of `sEnableWithRaw` and `sRawTokens`
  in `LaTeXState`, and a new combinator `disablingWithRaw` to
  disable collecting of raw tokens in certain contexts.
* Add `parseFromToks` to T.P.Readers.LaTeX.Parsing.
* Fix parsing of single character tokens so it doesn't mess
  up the new raw token collecting.
* These changes slightly increase allocations and have a small
  performance impact, but it's minor.

Closes #7092.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  6 +--
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 84 +++++++++++++++++++++++++-------
 2 files changed, 68 insertions(+), 22 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 91c71c000..0a66b7f39 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1054,7 +1054,7 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
       symbol '}'
       return res
     parser = do
-      Tok _ Word s <- satisfyTok isWordTok
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
       let (digits, rest) = T.span isDigit s
       unless (T.null rest) $
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
@@ -2208,9 +2208,7 @@ parseTableRow envname prefsufs = do
         option [] (count 1 amp)
         return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
   rawcells <- mapM celltoks prefsufs
-  oldInput <- getInput
-  cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
-  setInput oldInput
+  cells <- mapM (parseFromToks parseTableCell) rawcells
   spaces
   return $ Row nullAttr cells
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 12a3ba2f6..313aa6c51 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -32,6 +32,8 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , totoks
   , toksToString
   , satisfyTok
+  , parseFromToks
+  , disablingWithRaw
   , doMacros
   , doMacros'
   , setpos
@@ -87,13 +89,15 @@ import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -153,6 +157,8 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
+                            , sEnableWithRaw :: Bool
+                            , sRawTokens     :: IntMap.IntMap [Tok]
                             }
      deriving Show
 
@@ -179,6 +185,8 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sToggles       = M.empty
                               , sExpanded      = False
                               , sFileContents  = M.empty
+                              , sEnableWithRaw = True
+                              , sRawTokens     = IntMap.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -404,11 +412,31 @@ untoken t = untokenAccum t mempty
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+  oldInput <- getInput
+  setInput toks
+  result <- disablingWithRaw parser
+  setInput oldInput
+  return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+  oldEnableWithRaw <- sEnableWithRaw <$> getState
+  updateState $ \st -> st{ sEnableWithRaw = False }
+  result <- parser
+  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+  return result
+
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False }
+    updateState $ \st -> st{ sExpanded = False
+                           , sRawTokens =
+                              if sEnableWithRaw st
+                                 then IntMap.map (res:) $ sRawTokens st
+                                 else sRawTokens st }
     return res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
@@ -594,18 +622,22 @@ isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+  satisfyTok $ \case
+     Tok _ Word  t   -> T.length t == 1
+     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t)
+     _               -> False
+
 singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+  singleCharFromWord = do
+    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+    let (t1, t2) = (T.take 1 t, T.drop 1 t)
+    inp <- getInput
+    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+    anyTok
 
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
@@ -725,11 +757,23 @@ ignore raw = do
 
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
-  inp <- getInput
+  rawTokensMap <- sRawTokens <$> getState
+  let key = case IntMap.lookupMax rawTokensMap of
+               Nothing     -> 0
+               Just (n,_)  -> n + 1
+  -- insert empty list at key
+  updateState $ \st -> st{ sRawTokens =
+                             IntMap.insert key [] $ sRawTokens st }
   result <- parser
-  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
-  let raw = takeWhile (\(Tok pos _ _) -> maybe True
-                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+  raw <- case mbRevToks of
+           Just revtoks -> do
+             updateState $ \st -> st{ sRawTokens =
+                                        IntMap.delete key $ sRawTokens st}
+             return $ reverse revtoks
+           Nothing      ->
+             throwError $ PandocShouldNeverHappenError $
+                "sRawTokens has nothing at key " <> T.pack (show key)
   return (result, raw)
 
 keyval :: PandocMonad m => LP m (Text, Text)
@@ -794,7 +838,7 @@ getRawCommand name txt = do
   (_, rawargs) <- withRaw $
       case name of
            "write" -> do
-             void $ satisfyTok isWordTok -- digits
+             void $ many $ satisfyTok isDigitTok -- digits
              void braced
            "titleformat" -> do
              void braced
@@ -809,6 +853,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _              = False
+
 skipopts :: PandocMonad m => LP m ()
 skipopts = skipMany (void overlaySpecification <|> void rawopt)
 
-- 
cgit v1.2.3


From eb0c63b00263263e3dc0f74c0bf19e93c7afeb13 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Feb 2021 19:29:48 -0800
Subject: Avoid an unnecessary withRaw.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0a66b7f39..2a949cbdc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1869,9 +1869,12 @@ rawEnv name = do
   pos1 <- getPosition
   (bs, raw) <- withRaw $ env name blocks
   if parseRaw
-     then return $ rawBlock "latex"
+     then do
+       (_, raw) <- withRaw $ env name blocks
+       return $ rawBlock "latex"
                  $ beginCommand <> untokenize raw
      else do
+       bs <- env name blocks
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-- 
cgit v1.2.3


From 6323250bad5e0bf9ef0ec007e16c869d99828080 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 00:22:22 -0800
Subject: LaTeX reader: remove unnecessary line

---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2a949cbdc..7d8dfab0e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1867,7 +1867,6 @@ rawEnv name = do
   rawOptions <- mconcat <$> many rawopt
   let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
   pos1 <- getPosition
-  (bs, raw) <- withRaw $ env name blocks
   if parseRaw
      then do
        (_, raw) <- withRaw $ env name blocks
-- 
cgit v1.2.3


From a3beed9db874517fa57b55380658f4e019e809b2 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Feb 2021 09:37:43 +0100
Subject: Org: support task_lists extension

The tasks lists extension is now supported by the org reader and writer;
the extension is turned on by default.

Closes: #6336
---
 src/Text/Pandoc/Extensions.hs         |  2 ++
 src/Text/Pandoc/Readers/Org/Blocks.hs | 41 +++++++++++++++++++++++--
 src/Text/Pandoc/Writers/Org.hs        | 16 ++++++++--
 test/Tests/Readers/Org/Block/List.hs  | 13 ++++++++
 test/Tests/Writers/Org.hs             | 57 ++++++++++++++++++++++++++++-------
 5 files changed, 113 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 82eb0e957..3b96f9e04 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -381,6 +381,7 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
+                                           Ext_task_lists,
                                            Ext_auto_identifiers]
 getDefaultExtensions "html"            = extensionsFromList
                                           [Ext_auto_identifiers,
@@ -515,6 +516,7 @@ getAllExtensions f = universalExtensions <> getAll f
     extensionsFromList
     [ Ext_citations
     , Ext_smart
+    , Ext_task_lists
     ]
   getAll "html"            = autoIdExtensions <>
     extensionsFromList
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 6bd046e04..d1aff701e 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE FlexibleContexts  #-}
-{-# LANGUAGE RecordWildCards   #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards   #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
    Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
@@ -850,16 +851,52 @@ definitionListItem parseIndentedMarker = try $ do
    definitionMarker =
      spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
 
+-- | Checkbox for tasks.
+data Checkbox
+  = UncheckedBox
+  | CheckedBox
+  | SemicheckedBox
+
+-- | Parses a checkbox in a plain list.
+checkbox :: PandocMonad m
+         => OrgParser m Checkbox
+checkbox = do
+  guardEnabled Ext_task_lists
+  try (char '[' *> status <* char ']') <?> "checkbox"
+  where
+    status = choice
+      [ UncheckedBox   <$ char ' '
+      , CheckedBox     <$ char 'X'
+      , SemicheckedBox <$ char '-'
+      ]
+
+checkboxToInlines :: Checkbox -> Inline
+checkboxToInlines = B.Str . \case
+  UncheckedBox   -> "☐"
+  SemicheckedBox -> "☐"
+  CheckedBox     -> "☒"
+
 -- | parse raw text for one list item
 listItem :: PandocMonad m
          => OrgParser m Int
          -> OrgParser m (F Blocks)
 listItem parseIndentedMarker = try . withContext ListItemState $ do
   markerLength <- try parseIndentedMarker
+  box <- optionMaybe checkbox
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   rest <- T.concat <$> many (listContinuation markerLength)
-  parseFromString blocks $ firstLine <> blank <> rest
+  contents <- parseFromString blocks $ firstLine <> blank <> rest
+  return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
+
+-- | Prepend inlines to blocks, adding them to the first paragraph or
+-- creating a new Plain element if necessary.
+prependInlines :: Inline -> Blocks -> Blocks
+prependInlines inlns = B.fromList . prepend . B.toList
+  where
+    prepend (Plain is : bs) = Plain (inlns : Space : is) : bs
+    prepend (Para  is : bs) = Para  (inlns : Space : is) : bs
+    prepend bs              = Plain [inlns, Space] : bs
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 43ebf1807..8dfc2749c 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -213,25 +213,35 @@ blockToOrg (DefinitionList items) = do
 -- | Convert bullet list item (list of blocks) to Org.
 bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text)
 bulletListItemToOrg items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang 2 "- " contents $$
           if endsWithPlain items
              then cr
              else blankline
 
-
 -- | Convert ordered list item (a list of blocks) to Org.
 orderedListItemToOrg :: PandocMonad m
                      => Text   -- ^ marker for list item
                      -> [Block]  -- ^ list item (list of blocks)
                      -> Org m (Doc Text)
 orderedListItemToOrg marker items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang (T.length marker + 1) (literal marker <> space) contents $$
           if endsWithPlain items
              then cr
              else blankline
 
+-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
+-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@).
+taskListItemToOrg :: Extensions -> [Block] -> [Block]
+taskListItemToOrg = handleTaskListItem toOrg
+  where
+    toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is
+    toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is
+    toOrg is = is
+
 -- | Convert definition list item (label, list of blocks) to Org.
 definitionListItemToOrg :: PandocMonad m
                         => ([Inline], [[Block]]) -> Org m (Doc Text)
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index 9686b5148..2ee37081e 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -118,6 +118,19 @@ tests =
                 ] =?>
       bulletList [ plain "", plain "" ]
 
+  , "Task list" =:
+    T.unlines [ "- [ ] nope"
+              , "- [X] yup"
+              , "- [-] started"
+              , "  1. [X] sure"
+              , "  2. [ ] nuh-uh"
+              ] =?>
+    bulletList [ plain "☐ nope", plain "☒ yup"
+               , mconcat [ plain "☐ started"
+                         , orderedList [plain "☒ sure", plain "☐ nuh-uh"]
+                         ]
+               ]
+
   , "Simple Ordered List" =:
       ("1. Item1\n" <>
        "2. Item2\n") =?>
diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs
index 9cbe360da..bd6c9b7ab 100644
--- a/test/Tests/Writers/Org.hs
+++ b/test/Tests/Writers/Org.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Org (tests) where
 
+import Data.Text as T
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
@@ -9,17 +10,51 @@ import Text.Pandoc.Builder
 
 infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
-     => String -> (a, String) -> TestTree
-(=:) = test (purely (writeOrg def . toPandoc))
+     => String -> (a, Text) -> TestTree
+(=:) = test org
+
+defopts :: WriterOptions
+defopts = def
+  { writerExtensions = getDefaultExtensions "org"
+  }
+
+org :: (ToPandoc a) => a -> Text
+org = orgWithOpts defopts
+
+orgWithOpts :: (ToPandoc a) => WriterOptions -> a -> Text
+orgWithOpts opts x = purely (writeOrg opts) $ toPandoc x
+
 
 tests :: [TestTree]
-tests = [ testGroup "links"
-          -- See http://orgmode.org/manual/Internal-links.html#Internal-links
-          [ "simple link"
-              =: link "/url" "" "foo"
-              =?> "[[/url][foo]]"
-          , "internal link to anchor"
-              =: link "#my-custom-id" "" "#my-custom-id"
-              =?> "[[#my-custom-id]]"
+tests =
+  [ testGroup "links"
+    -- See http://orgmode.org/manual/Internal-links.html#Internal-links
+    [ "simple link"
+      =: link "/url" "" "foo"
+      =?> "[[/url][foo]]"
+    , "internal link to anchor"
+      =: link "#my-custom-id" "" "#my-custom-id"
+      =?> "[[#my-custom-id]]"
+    ]
+
+  , testGroup "lists"
+    [ "bullet task list"
+      =: bulletList [plain "☐ a", plain "☒ b"]
+      =?> T.unlines
+          [ "- [ ] a"
+          , "- [X] b"
+          ]
+    , "ordered task list"
+      =: orderedList [plain ("☐" <> space <> "a"), plain "☒ b"]
+      =?> T.unlines
+          [ "1. [ ] a"
+          , "2. [X] b"
+          ]
+    , test (orgWithOpts def) "bullet without task_lists" $
+      bulletList [plain "☐ a", plain "☒ b"]
+      =?> T.unlines
+          [ "- ☐ a"
+          , "- ☒ b"
           ]
-        ]
+    ]
+  ]
-- 
cgit v1.2.3


From d84a6041e12547331f3a252ea4e8b4d229ba1159 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 13:06:22 -0800
Subject: HTML reader: fix bad handling of empty src attribute in iframe.

- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
  and skip instead of failing with an error.
- Closes #7099.
---
 src/Text/Pandoc/Readers/HTML.hs | 18 ++++++++++++------
 test/command/7099.md            | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index afc7a3e25..cc60b5501 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
 
 import Control.Applicative ((<|>))
 import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAlphaNum, isLetter)
@@ -393,11 +393,17 @@ pIframe = try $ do
   tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
   pCloses "iframe" <|> eof
   url <- canonicalizeUrl $ fromAttrib "src" tag
-  (bs, _) <- openURL url
-  let inp = UTF8.toText bs
-  opts <- readerOpts <$> getState
-  Pandoc _ contents <- readHtml opts inp
-  return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+  if T.null url
+     then ignore $ renderTags' [tag, TagClose "iframe"]
+     else catchError
+       (do (bs, _) <- openURL url
+           let inp = UTF8.toText bs
+           opts <- readerOpts <$> getState
+           Pandoc _ contents <- readHtml opts inp
+           return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+       (\e -> do
+         logMessage $ CouldNotFetchResource url (renderError e)
+         ignore $ renderTags' [tag, TagClose "iframe"])
 
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
diff --git a/test/command/7099.md b/test/command/7099.md
index d9ff8e5ff..33ac8aea1 100644
--- a/test/command/7099.md
+++ b/test/command/7099.md
@@ -2,7 +2,17 @@
 % pandoc -f html -t native --verbose
 <iframe src=""></iframe>
 ^D
-[INFO] Fetching ...
-[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
+[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
+[]
+```
+
+```
+% pandoc -f html -t native --verbose
+<iframe src="h:invalid@url"></iframe>
+^D
+[INFO] Fetching h:invalid@url...
+[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
+  InvalidUrlException "h:invalid@url" "Invalid scheme"
+[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
 []
 ```
-- 
cgit v1.2.3


From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 14 Feb 2021 22:29:21 -0800
Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...

..and add new definitions isomorphic to xml-light's, but with
Text instead of String.  This allows us to keep most of the code in
existing readers that use xml-light, but avoid lots of unnecessary
allocation.

We also add versions of the functions from xml-light's
Text.XML.Light.Output and Text.XML.Light.Proc that operate
on our modified XML types, and functions that convert
xml-light types to our types (since some of our dependencies,
like texmath, use xml-light).

Update golden tests for docx and pptx.

OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`.

Docx: Do a manual traversal to unwrap sdt and smartTag.
This is faster, and needed to pass the tests.

Benchmarks:

A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
C = this commit

| Reader  |  A    | B      | C     |
| ------- | ----- | ------ | ----- |
| docbook | 18 ms | 12 ms  | 10 ms |
| opml    | 65 ms | 62 ms  | 35 ms |
| jats    | 15 ms | 11 ms  |  9 ms |
| docx    | 72 ms | 69 ms  | 44 ms |
| odt     | 78 ms | 41 ms  | 28 ms |
| epub    | 64 ms | 61 ms  | 56 ms |
| fb2     | 14 ms | 5  ms  | 4 ms  |
---
 .hlint.yaml                                        |   1 +
 pandoc.cabal                                       |   2 +-
 src/Text/Pandoc/ImageSize.hs                       |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  76 +--
 src/Text/Pandoc/Readers/Docx/Parse.hs              | 163 +++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  31 +-
 src/Text/Pandoc/Readers/Docx/Util.hs               |  27 +-
 src/Text/Pandoc/Readers/EPUB.hs                    |  65 +--
 src/Text/Pandoc/Readers/FB2.hs                     |  93 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |  58 +-
 src/Text/Pandoc/Readers/OPML.hs                    |  29 +-
 src/Text/Pandoc/Readers/Odt.hs                     |   5 +-
 src/Text/Pandoc/Readers/Odt/ContentReader.hs       |  13 +-
 src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs  |   3 +-
 src/Text/Pandoc/Readers/Odt/Generic/Utils.hs       |  33 +-
 .../Pandoc/Readers/Odt/Generic/XMLConverter.hs     |  23 +-
 src/Text/Pandoc/Readers/Odt/Namespaces.hs          |  11 +-
 src/Text/Pandoc/Readers/Odt/StyleReader.hs         |  23 +-
 src/Text/Pandoc/Writers/Docx.hs                    | 263 +++++----
 src/Text/Pandoc/Writers/EPUB.hs                    | 356 ++++++-------
 src/Text/Pandoc/Writers/FB2.hs                     | 127 ++---
 src/Text/Pandoc/Writers/ODT.hs                     |  16 +-
 src/Text/Pandoc/Writers/OOXML.hs                   |  35 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 201 +++----
 src/Text/Pandoc/XML/Light.hs                       | 586 +++++++++++++++++++++
 src/Text/Pandoc/XMLParser.hs                       |  66 ---
 test/Tests/Writers/OOXML.hs                        |   3 +-
 test/docx/golden/block_quotes.docx                 | Bin 10098 -> 10071 bytes
 test/docx/golden/codeblock.docx                    | Bin 9950 -> 9920 bytes
 test/docx/golden/comments.docx                     | Bin 10285 -> 10258 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10048 -> 10021 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10673 -> 10650 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12403 bytes
 test/docx/golden/definition_list.docx              | Bin 9947 -> 9920 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9953 -> 9925 bytes
 test/docx/golden/document-properties.docx          | Bin 10429 -> 10404 bytes
 test/docx/golden/headers.docx                      | Bin 10086 -> 10059 bytes
 test/docx/golden/image.docx                        | Bin 26764 -> 26736 bytes
 test/docx/golden/inline_code.docx                  | Bin 9886 -> 9859 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10066 -> 10038 bytes
 test/docx/golden/inline_images.docx                | Bin 26822 -> 26793 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10107 -> 10081 bytes
 test/docx/golden/links.docx                        | Bin 10282 -> 10251 bytes
 test/docx/golden/lists.docx                        | Bin 10358 -> 10332 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10149 -> 10123 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10238 -> 10210 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10150 -> 10122 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10245 -> 10216 bytes
 test/docx/golden/notes.docx                        | Bin 10052 -> 10028 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9986 -> 9960 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10121 -> 10094 bytes
 test/docx/golden/table_one_row.docx                | Bin 9938 -> 9908 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10255 -> 10227 bytes
 test/docx/golden/tables.docx                       | Bin 10272 -> 10244 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9930 -> 9903 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9913 -> 9886 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9947 -> 9920 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10059 -> 10032 bytes
 test/docx/golden/unicode.docx                      | Bin 9871 -> 9845 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9919 -> 9892 bytes
 test/pptx/code-custom.pptx                         | Bin 28221 -> 28184 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395516 -> 395477 bytes
 test/pptx/code.pptx                                | Bin 28220 -> 28183 bytes
 test/pptx/code_templated.pptx                      | Bin 395514 -> 395477 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27004 -> 26967 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394288 -> 394253 bytes
 test/pptx/document-properties.pptx                 | Bin 27408 -> 27375 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394691 -> 394656 bytes
 test/pptx/endnotes.pptx                            | Bin 26962 -> 26928 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394253 -> 394219 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27789 -> 27747 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395083 -> 395041 bytes
 test/pptx/images.pptx                              | Bin 44619 -> 44579 bytes
 test/pptx/images_templated.pptx                    | Bin 411909 -> 411870 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26148 -> 26121 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393438 -> 393412 bytes
 test/pptx/lists.pptx                               | Bin 27049 -> 27015 bytes
 test/pptx/lists_templated.pptx                     | Bin 394340 -> 394307 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26940 -> 26908 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394231 -> 394198 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44065 -> 44025 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411352 -> 411311 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28575 -> 28531 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27744 -> 27705 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395868 -> 395825 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29532 -> 29481 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396826 -> 396776 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35436 -> 35360 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31675 -> 31636 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398955 -> 398915 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30691 -> 30657 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51604 -> 51548 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418896 -> 418834 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402728 -> 402650 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27023 -> 26991 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394314 -> 394283 bytes
 test/pptx/tables.pptx                              | Bin 27566 -> 27532 bytes
 test/pptx/tables_templated.pptx                    | Bin 394859 -> 394827 bytes
 test/pptx/two_column.pptx                          | Bin 26065 -> 26038 bytes
 test/pptx/two_column_templated.pptx                | Bin 393355 -> 393327 bytes
 102 files changed, 1388 insertions(+), 930 deletions(-)
 create mode 100644 src/Text/Pandoc/XML/Light.hs
 delete mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/.hlint.yaml b/.hlint.yaml
index d5ebffd34..e482b2b37 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -14,6 +14,7 @@
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
 - ignore: {name: "Use &&&"}
 - ignore: {name: "Use String"}
+- ignore: {name: "Use camelCase"}
 - ignore: {name: "Use fmap"}  # specific for GHC 7.8 compat
 - ignore: {name: "Use isDigit"}
 
diff --git a/pandoc.cabal b/pandoc.cabal
index 3c7063f6c..22aebd55e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -687,7 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
-                   Text.Pandoc.XMLParser,
+                   Text.Pandoc.XML.Light,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e0a1af8e8..bb1aa6351 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -44,8 +44,7 @@ import Numeric (showFFloat)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Text.XML.Light as Xml
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light hiding (Attr)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
@@ -332,12 +331,12 @@ svgSize opts img = do
   doc <- either (const mzero) return $ parseXMLElement
                                      $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
-        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
-        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
+        vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc
+        [_,_,w,h] <- mapM safeRead (T.words vb)
         return (w,h)
   let dpi = fromIntegral $ writerDpi opts
   let dirToInt dir = do
-        dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
+        dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim
         return $ inPixel opts dim
   w <- dirToInt "width" <|> (fst <$> viewboxSize)
   h <- dirToInt "height" <|> (snd <$> viewboxSize)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ad0108843..e201b54fe 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -31,8 +31,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 
 {-
 
@@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <>
+                                    convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- convenience function
 named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
 
 --
 
@@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blockTags
 isBlockElement _ = False
 
-blockTags :: [String]
+blockTags :: [Text]
 blockTags =
   [ "abstract"
   , "ackno"
@@ -721,7 +721,7 @@ blockTags =
   , "variablelist"
   ] ++ admonitionTags
 
-admonitionTags :: [String]
+admonitionTags :: [Text]
 admonitionTags = ["important","caution","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
@@ -779,10 +779,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -837,7 +837,7 @@ parseBlock (Elem e) =
         "refsect2" -> sect 2
         "refsect3" -> sect 3
         "refsection" -> gets dbSectionLevel >>= sect . (+1)
-        l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+        l | l `elem` admonitionTags -> parseAdmonition l
         "area" -> skip
         "areaset" -> skip
         "areaspec" -> skip
@@ -899,7 +899,7 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -911,7 +911,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ T.pack $ strContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -965,7 +965,7 @@ parseBlock (Elem e) =
                             w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x ->
                                                      (x >= '0' && x <= '9')
-                                                      || x == '.') (T.pack w)
+                                                      || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = case bodyrows of
                                          [] -> 0
@@ -1048,12 +1048,12 @@ parseMixed container conts = do
       x <- parseMixed container rs
       return $ p <> b <> x
 
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
 parseRow cn = do
   let isEntry x  = named "entry" x || named "td" x || named "th" x
   mapM (parseEntry cn) . filterChildren isEntry
 
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
 parseEntry cn el = do
   let colDistance sa ea = do
         let iStrt = elemIndex sa cn
@@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = trimInlines . mconcat <$>
                  mapM parseInline (elContent e')
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
-  return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+  return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "anchor" -> do
@@ -1138,7 +1138,7 @@ parseInline (Elem e) =
         "userinput" -> codeWithLang
         "systemitem" -> codeWithLang
         "varargs" -> return $ code "(...)"
-        "keycap" -> return (str $ T.pack $ strContent e)
+        "keycap" -> return (str $ strContent e)
         "keycombo" -> keycombo <$>
                          mapM parseInline (elContent e)
         "menuchoice" -> menuchoice <$>
@@ -1150,17 +1150,17 @@ parseInline (Elem e) =
             let title = case attrValue "endterm" e of
                             ""      -> maybe "???" xrefTitleByElem
                                          (findElementById linkend content)
-                            endterm -> maybe "???" (T.pack . strContent)
+                            endterm -> maybe "???" strContent
                                          (findElementById endterm content)
             return $ link ("#" <> linkend) "" (text title)
-        "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
-                          $ str $ T.pack $ strContent e
-        "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+        "email" -> return $ link ("mailto:" <> strContent e) ""
+                          $ str $ strContent e
+        "uri" -> return $ link (strContent e) "" $ str $ strContent e
         "ulink" -> innerInlines (link (attrValue "url" e) "")
         "link" -> do
              ils <- innerInlines id
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "linkend" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1180,7 +1180,7 @@ parseInline (Elem e) =
         "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -1193,7 +1193,7 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
          simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
                          (filterChildren (named "member") e)
          segmentedList = do
@@ -1234,10 +1234,10 @@ parseInline (Elem e) =
                   "sect5"        -> descendantContent "title" el
                   "cmdsynopsis"  -> descendantContent "command" el
                   "funcsynopsis" -> descendantContent "function" el
-                  _              -> T.pack $ qName (elName el) ++ "_title"
+                  _              -> qName (elName el) <> "_title"
           where
             xrefLabel = attrValue "xreflabel" el
-            descendantContent name = maybe "???" (T.pack . strContent)
+            descendantContent name = maybe "???" strContent
                                    . filterElementName (\n -> qName n == name)
 
 -- | Extract a math equation from an element
@@ -1258,7 +1258,7 @@ equation e constructor =
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
       (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
-      (readMathML . T.pack . showElement)
+      (readMathML . showElement)
 
     latexEquations :: [Text]
     latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1272,8 +1272,8 @@ equation e constructor =
 -- | Get the actual text stored in a CData block. 'showContent'
 -- returns the text still surrounded by the [[CDATA]] tags.
 showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
 
 
 -- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 056dab6c2..c76f3c171 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar)
 import Data.List
 import qualified Data.Map as M
 import qualified Data.Text as T
+import Data.Text (Text)
 import Data.Maybe
 import System.FilePath
 import Text.Pandoc.Readers.Docx.Util
@@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.TeXMath (Exp)
 import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -128,37 +127,23 @@ mapD f xs =
   in
    concatMapM handler xs
 
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
   | isElem ns "w" "sdt" element
   , Just sdtContent <- findChildByName ns "w" "sdtContent" element
-  = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+  = concatMap (unwrapElement ns) (elChildren sdtContent)
   | isElem ns "w" "smartTag" element
-  = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+  = concatMap (unwrapElement ns) (elChildren element)
+  | otherwise
+  = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
 
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
-  Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
 
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
-  let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
-  in
-    case XMLC.nextDF modifiedCur of
-      Just cur' -> walkDocument' ns cur'
-      Nothing   -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
 walkDocument ns element =
-  let cur = XMLC.fromContent (Elem element)
-      cur' = walkDocument' ns cur
-  in
-    case XMLC.toTree cur' of
-      Elem element' -> Just element'
-      _             -> Nothing
-
+  element{ elContent = concatMap (unwrapContent ns) (elContent element) }
 
 newtype Docx = Docx Document
           deriving Show
@@ -361,9 +346,9 @@ getDocumentXmlPath zf = do
   fp <- findAttr (QName "Target" Nothing Nothing) rel
   -- sometimes there will be a leading slash, which windows seems to
   -- have trouble with.
-  return $ case fp of
+  return $ case T.unpack fp of
     '/' : fp' -> fp'
-    _         -> fp
+    fp'       -> fp'
 
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
@@ -372,7 +357,7 @@ archiveToDocument zf = do
   docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
-  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+  let bodyElem' = walkDocument namespaces bodyElem
   body <- elemToBody namespaces bodyElem'
   return $ Document namespaces body
 
@@ -414,8 +399,8 @@ archiveToNotes zf =
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
-      en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+      fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+      en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
    Notes ns fn en
 
@@ -424,7 +409,8 @@ archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
                >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
-      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+      cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+               cmtsElem
   in
     case cmts of
       Just c  -> Comments cmts_namespaces c
@@ -443,8 +429,8 @@ filePathToRelType path docXmlPath =
 relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
 relElemToRelationship relType element | qName (elName element) == "Relationship" =
   do
-    relId <- findAttrText (QName "Id" Nothing Nothing) element
-    target <- findAttrText (QName "Target" Nothing Nothing) element
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
     return $ Relationship relType relId target
 relElemToRelationship _ _ = Nothing
 
@@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
 loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
 loElemToLevelOverride ns element
   | isElem ns "w" "lvlOverride" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       let startOverride = findChildByName ns "w" "startOverride" element
                           >>= findAttrByName ns "w" "val"
-                          >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                          >>= stringToInteger
           lvl = findChildByName ns "w" "lvl" element
                 >>= levelElemToLevel ns
       return $ LevelOverride ilvl startOverride lvl
@@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing
 numElemToNum :: NameSpaces -> Element -> Maybe Numb
 numElemToNum ns element
   | isElem ns "w" "num" element = do
-      numId <- findAttrTextByName ns "w" "numId" element
+      numId <- findAttrByName ns "w" "numId" element
       absNumId <- findChildByName ns "w" "abstractNumId" element
-                  >>= findAttrTextByName ns "w" "val"
+                  >>= findAttrByName ns "w" "val"
       let lvlOverrides = mapMaybe
                          (loElemToLevelOverride ns)
                          (findChildrenByName ns "w" "lvlOverride" element)
@@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing
 absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
 absNumElemToAbsNum ns element
   | isElem ns "w" "abstractNum" element = do
-      absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+      absNumId <- findAttrByName ns "w" "abstractNumId" element
       let levelElems = findChildrenByName ns "w" "lvl" element
           levels = mapMaybe (levelElemToLevel ns) levelElems
       return $ AbstractNumb absNumId levels
@@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing
 levelElemToLevel :: NameSpaces -> Element -> Maybe Level
 levelElemToLevel ns element
   | isElem ns "w" "lvl" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       fmt <- findChildByName ns "w" "numFmt" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       txt <- findChildByName ns "w" "lvlText" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       let start = findChildByName ns "w" "start" element
                   >>= findAttrByName ns "w" "val"
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                  >>= stringToInteger
       return (Level ilvl fmt txt start)
 levelElemToLevel _ _ = Nothing
 
@@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
   fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
   | isElem ns "w" (notetype <> "s") element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" notetype element)
       in
@@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
 elemToComments ns element
   | isElem ns "w" "comments" element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" "comment" element)
       in
@@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
       stringToInteger
     , hangingParIndent =
       findAttrByName ns "w" "hanging" element >>=
-      stringToInteger}
+      stringToInteger }
 elemToParIndentation _ _ = Nothing
 
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+  case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> (n' .|. n) /= 0
 
@@ -642,7 +628,7 @@ elemToBodyPart ns element
   | isElem ns "w" "p" element
   , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
       do
-        expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
+        expsLst <- eitherToD $ readOMML $ showElement c
         return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
@@ -666,7 +652,7 @@ elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
     let caption' = findChildByName ns "w" "tblPr" element
                    >>= findChildByName ns "w" "tblCaption"
-                   >>= findAttrTextByName ns "w" "val"
+                   >>= findAttrByName ns "w" "val"
         caption = fromMaybe "" caption'
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
@@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
 getTitleAndAlt ns element =
   let mbDocPr = findChildByName ns "wp" "inline" element >>=
                 findChildByName ns "wp" "docPr"
-      title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
-      alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+      title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+      alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -718,7 +704,7 @@ elemToParPart ns element
   = let (title, alt) = getTitleAndAlt ns drawingElem
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrTextByName ns "r" "embed"
+                  >>= findAttrByName ns "r" "embed"
     in
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -728,7 +714,7 @@ elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
     let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrTextByName ns "r" "id"
+                  >>= findAttrByName ns "r" "id"
     in
      case drawing of
        -- Todo: check out title and attr for deprecated format.
@@ -797,7 +783,7 @@ elemToParPart ns element
       fldCharState <- gets stateFldCharState
       case fldCharState of
         FldCharOpen -> do
-          info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+          info <- eitherToD $ parseFieldInfo $ strContent instrText
           modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
           return NullParPart
         _ -> return NullParPart
@@ -818,48 +804,48 @@ elemToParPart ns element
       return $ ChangedRuns change runs
 elemToParPart ns element
   | isElem ns "w" "bookmarkStart" element
-  , Just bmId <- findAttrTextByName ns "w" "id" element
-  , Just bmName <- findAttrTextByName ns "w" "name" element =
+  , Just bmId <- findAttrByName ns "w" "id" element
+  , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just relId <- findAttrTextByName ns "r" "id" element = do
+  , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
     runs <- mapD (elemToRun ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
-         case findAttrTextByName ns "w" "anchor" element of
+         case findAttrByName ns "w" "anchor" element of
              Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
              Nothing -> return $ ExternalHyperLink target runs
       Nothing     -> return $ ExternalHyperLink "" runs
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+  , Just anchor <- findAttrByName ns "w" "anchor" element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ InternalHyperLink anchor runs
 elemToParPart ns element
   | isElem ns "w" "commentRangeStart" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "commentRangeEnd" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element =
+  , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
 elemToParPart ns element
   | isElem ns "m" "oMath" element =
-    fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+    fmap PlainOMath (eitherToD $ readOMML $ showElement element)
 elemToParPart _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
   | isElem ns "w" "comment" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element
-  , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
-  , cmtDate <- findAttrTextByName ns "w" "date" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element
+  , Just cmtAuthor <- findAttrByName ns "w" "author" element
+  , cmtDate <- findAttrByName ns "w" "date" element = do
       bps <- mapD (elemToBodyPart ns) (elChildren element)
       return $ CommentStart cmtId cmtAuthor cmtDate bps
 elemToCommentStart _ _ = throwError WrongElem
@@ -878,7 +864,7 @@ elemToExtent drawingElem =
     where
       wp_ns  = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
-                    >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+                    >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
 
 childElemToRun :: NameSpaces -> Element -> D Run
@@ -889,7 +875,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
@@ -902,7 +888,7 @@ childElemToRun ns element
   = return InlineChart
 childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
-  , Just fnId <- findAttrTextByName ns "w" "id" element = do
+  , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -910,7 +896,7 @@ childElemToRun ns element
       Nothing  -> return $ Footnote []
 childElemToRun ns element
   | isElem ns "w" "endnoteReference" element
-  , Just enId <- findAttrTextByName ns "w" "id" element = do
+  , Just enId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing
 getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
 getTrackedChange ns element
   | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange ns element
   | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange _ _ = Nothing
 
@@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty
   | Just pPr <- findChildByName ns "w" "pPr" element =
     let style =
           mapMaybe
-          (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+          (fmap ParaStyleId . findAttrByName ns "w" "val")
           (findChildrenByName ns "w" "pStyle" pPr)
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
@@ -1012,7 +998,7 @@ elemToRunStyleD ns element
     charStyles <- asks envCharStyles
     let parentSty =
           findChildByName ns "w" "rStyle" rPr >>=
-          findAttrTextByName ns "w" "val" >>=
+          findAttrByName ns "w" "val" >>=
           flip M.lookup charStyles . CharStyleId
     return $ elemToRunStyle ns element parentSty
 elemToRunStyleD _ _ = return defaultRunStyle
@@ -1022,7 +1008,7 @@ elemToRunElem ns element
   | isElem ns "w" "t" element
     || isElem ns "w" "delText" element
     || isElem ns "m" "t" element = do
-    let str = T.pack $ strContent element
+    let str = strContent element
     font <- asks envFont
     case font of
       Nothing -> return $ TextRun str
@@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-    case readLitChar ("\\x" ++ s) of
+    case readLitChar ("\\x" ++ T.unpack s) of
          [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
          _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
-    getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
-    lowerFromPrivate ('F':xs) = '0':xs
-    lowerFromPrivate xs       = xs
+    getFont = textToFont =<< findAttrByName ns "w" "font" element
+    lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+                       | otherwise             = t
 getSymChar _ _ = TextRun ""
 
 elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1061,8 +1047,9 @@ elemToRunElems ns element
        let qualName = elemName ns "w"
        let font = do
                     fontElem <- findElement (qualName "rFonts") element
-                    textToFont . T.pack =<<
-                       foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+                    textToFont =<<
+                       foldr ((<|>) . (flip findAttr fontElem . qualName))
+                         Nothing ["ascii", "hAnsi"]
        local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
 elemToRunElems _ _ = throwError WrongElem
 
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index edade8654..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,12 +48,13 @@ import Data.Function (on)
 import Data.String (IsString(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
 import Data.Maybe
 import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          , isRTL        :: Maybe Bool
                          , isForceCTL   :: Maybe Bool
                          , rVertAlign   :: Maybe VertAlign
-                         , rUnderline   :: Maybe String
+                         , rUnderline   :: Maybe Text
                          , rParentStyle :: Maybe CharStyle
                          }
                 deriving Show
@@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle
   , Just styleType <- findAttrByName ns "w" "type" element
   , styleType == cStyleType parentStyle
   , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
-                       findAttrTextByName ns "w" "val"
+                       findAttrByName ns "w" "val"
   , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
   | isElem ns "w" "style" element
   , Just styleType <- findAttrByName ns "w" "type" element
@@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle
   | otherwise = False
 
 class HasStyleId a => ElemToStyle a where
-  cStyleType  :: Maybe a -> String
+  cStyleType  :: Maybe a -> Text
   elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
 
 class FromStyleId (StyleId a) => HasStyleId a where
@@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle =
     stys -> stys ++
             concatMap (buildBasedOnList ns element . Just) stys
 
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+                      Right (x,_) -> Just x
+                      Left _      -> Nothing
 
 checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
 checkOnOff ns rPr tag
@@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing
 elemToCharStyle :: NameSpaces
                 -> Element -> Maybe CharStyle -> Maybe CharStyle
 elemToCharStyle ns element parentStyle
-  = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+  = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
               <*> getElementStyleName ns element
               <*> Just (elemToRunStyle ns element parentStyle)
 
@@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
 getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
 getHeaderLevel ns element
   | Just styleName <- getElementStyleName ns element
-  , Just n <- stringToInteger . T.unpack =<<
+  , Just n <- stringToInteger =<<
               (T.stripPrefix "heading " . T.toLower $
                 fromStyleName styleName)
   , n > 0 = Just (styleName, fromInteger n)
@@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing
 
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
-  ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
-  <|> findAttrTextByName ns "w" "styleId" el)
+  ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+  <|> findAttrByName ns "w" "styleId" el)
 
 getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
 getNumInfo ns element = do
@@ -298,15 +301,15 @@ getNumInfo ns element = do
               findChildByName ns "w" "numPr"
       lvl = fromMaybe "0" (numPr >>=
                            findChildByName ns "w" "ilvl" >>=
-                           findAttrTextByName ns "w" "val")
+                           findAttrByName ns "w" "val")
   numId <- numPr >>=
            findChildByName ns "w" "numId" >>=
-           findAttrTextByName ns "w" "val"
+           findAttrByName ns "w" "val"
   return (numId, lvl)
 
 elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
 elemToParStyleData ns element parentStyle
-  | Just styleId <- findAttrTextByName ns "w" "styleId" element
+  | Just styleId <- findAttrByName ns "w" "styleId" element
   , Just styleName <- getElementStyleName ns element
   = Just $ ParStyle
       {
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f9c9a8e26..21df03d9e 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
@@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
-                                      , findAttrText
                                       , findAttrByName
-                                      , findAttrTextByName
                                       ) where
 
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 369c4f0c9..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Map as M (Map, elems, fromList, lookup)
 import Data.Maybe (mapMaybe)
 import qualified Data.Text.Lazy as TL
@@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
 
 readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
 readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
 imageMimes :: [MimeType]
 imageMimes = ["image/gif", "image/jpeg", "image/png"]
 
-type CoverId = String
+type CoverId = Text
 
 type CoverImage = FilePath
 
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
 parseManifest content coverId = do
   manifest <- findElementE (dfName "manifest") content
   let items = findChildren (dfName "item") manifest
   r <- mapM parseItem items
   let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
-  return (cover `mplus` coverId, M.fromList r)
+  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
   where
-    findCover e = maybe False (isInfixOf "cover-image")
+    findCover e = maybe False (T.isInfixOf "cover-image")
                   (findAttr (emptyName "properties") e)
                || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
     parseItem e = do
       uid <- findAttrE (emptyName "id") e
       href <- findAttrE (emptyName "href") e
       mime <- findAttrE (emptyName "media-type") e
-      return (uid, (href, T.pack mime))
+      return (uid, (T.unpack href, mime))
 
 parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
 parseSpine is e = do
@@ -173,11 +173,11 @@ parseMeta content = do
 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
 parseMetaItem :: Element -> Meta -> Meta
 parseMetaItem e@(stripNamespace . elName -> field) meta =
-  addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
 
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
 renameMeta "creator" = "author"
-renameMeta s         = T.pack s
+renameMeta s         = s
 
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
@@ -187,7 +187,7 @@ getManifest archive = do
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
     (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
-  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
@@ -201,7 +201,8 @@ fixInternalReferences pathToFile =
   . walk (fixBlockIRs filename)
   . walk (fixInlineIRs filename)
   where
-    (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+    (root, T.unpack . escapeURI . T.pack -> filename) =
+      splitFileName pathToFile
 
 fixInlineIRs :: String -> Inline -> Inline
 fixInlineIRs s (Span as v) =
@@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) =
   Link (fixAttrs s as) is t
 fixInlineIRs _ v = v
 
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
 prependHash ps l@(Link attr is (url, tit))
   | or [s `T.isPrefixOf` url | s <- ps] =
     Link attr is ("#" <> url, tit)
@@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
 fixBlockIRs _ b = b
 
 fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+  (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
 
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
 addHash _ ""    = ""
 addHash s ident = T.pack (takeFileName s) <> "#" <> ident
 
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
 
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
 isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
 
 -- Library
@@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c
 
 -- Utility
 
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
 stripNamespace (QName v _ _) = v
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
 attrToNSPair _                              = Nothing
 
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
 attrToPair (Attr (QName name _ _) val) = (name, val)
 
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
 defaultNameSpace = Just "http://www.idpf.org/2007/opf"
 
-dfName :: String -> QName
+dfName :: Text -> QName
 dfName s = QName s defaultNameSpace Nothing
 
-emptyName :: String -> QName
+emptyName :: Text -> QName
 emptyName s = QName s Nothing Nothing
 
 -- Convert Maybe interface to Either
 
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
 findAttrE q e = mkE "findAttr" $ findAttr q e
 
 findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
-  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+  mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
 
 parseXMLDocE :: PandocMonad m => Entry -> m Element
 parseXMLDocE entry =
@@ -293,7 +295,8 @@ parseXMLDocE entry =
   fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+  mkE ("Unable to find element: " <> tshow e) $ findElement e x
 
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b804eab4f..66e390bd7 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,7 +25,6 @@ TODO:
 module Text.Pandoc.Readers.FB2 ( readFB2 ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
 import Data.ByteString.Base64.Lazy
 import Data.Functor
 import Data.List (intersperse)
@@ -42,8 +41,8 @@ import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
 
 type FB2 m = StateT FB2State m
 
@@ -85,12 +84,12 @@ removeHash t = case T.uncons t of
   Just ('#', xs) -> xs
   _              -> t
 
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
 
 parseInline :: PandocMonad m => Content -> FB2 m Inlines
 parseInline (Elem e) =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -98,12 +97,12 @@ parseInline (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement name
       pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
 parseInline (CRef r) = pure $ str $ convertEntity r
 
 parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
 
 parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
 parseRootElement e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "FictionBook" -> do
       -- Parse notes before parsing the rest of the content.
       case filterChild isNotesBody e of
@@ -146,7 +145,7 @@ parseNote e =
     Just sectionId -> do
       content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
       oldNotes <- gets fb2Notes
-      modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+      modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
       pure ()
   where
     isTitle x = qName (elName x) == "title"
@@ -158,7 +157,7 @@ parseNote e =
 -- | Parse a child of @\<FictionBook>@ element.
 parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
 parseFictionBookChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "stylesheet" -> pure mempty -- stylesheet is ignored
     "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
     "body" -> if isNotesBody e
@@ -170,7 +169,7 @@ parseFictionBookChild e =
 -- | Parse a child of @\<description>@ element.
 parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
 parseDescriptionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
     "src-title-info" -> pure () -- ignore
     "document-info" -> pure ()
@@ -184,7 +183,7 @@ parseDescriptionChild e =
 -- | Parse a child of @\<body>@ element.
 parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
 parseBodyChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "image" -> parseImageElement e
     "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
     "epigraph" -> parseEpigraph e
@@ -198,7 +197,10 @@ parseBinaryElement e =
     (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
     (Just _, Nothing) ->
       report $ IgnoredElement "binary without content-type attribute"
-    (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+    (Just filename, contentType) ->
+      insertMedia (T.unpack filename) contentType
+                    (decodeLenient
+                      (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
 
 -- * Type parsers
 
@@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
 
 parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
 parseAuthorChild e =
-  case T.pack $ qName $ elName e of
-    "first-name" -> pure $ Just $ T.pack $ strContent e
-    "middle-name" -> pure $ Just $ T.pack $ strContent e
-    "last-name" -> pure $ Just $ T.pack $ strContent e
-    "nickname" -> pure $ Just $ T.pack $ strContent e
-    "home-page" -> pure $ Just $ T.pack $ strContent e
-    "email" -> pure $ Just $ T.pack $ strContent e
+  case qName $ elName e of
+    "first-name" -> pure $ Just $ strContent e
+    "middle-name" -> pure $ Just $ strContent e
+    "last-name" -> pure $ Just $ strContent e
+    "nickname" -> pure $ Just $ strContent e
+    "home-page" -> pure $ Just $ strContent e
+    "email" -> pure $ Just $ strContent e
     name -> do
       report $ IgnoredElement $ name <> " in author"
       pure Nothing
@@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing
 parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
 parseImageElement e =
   case href of
-    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
     Nothing -> do
       report $ IgnoredElement " image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        title = maybe "" T.pack $ findAttr (unqual "title") e
-        imgId = maybe "" T.pack $ findAttr (unqual "id") e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        title = fromMaybe "" $ findAttr (unqual "title") e
+        imgId = fromMaybe "" $ findAttr (unqual "id") e
         href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
 
 -- | Parse @pType@
@@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
 -- | Parse @citeType@ child
 parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
 parseCiteChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "empty-line" -> pure horizontalRule
@@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
 
 parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
 parsePoemChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "epigraph" -> parseEpigraph e
     "stanza" -> parseStanza e
     "text-author" -> para <$> parsePType e
-    "date" -> pure $ para $ text $ T.pack $ strContent e
+    "date" -> pure $ para $ text $ strContent e
     name -> report (UnexpectedXmlElement name "poem") $> mempty
 
 parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -292,7 +294,7 @@ joinLineBlocks [] = []
 
 parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
 parseStanzaChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "v" -> lineBlock . (:[]) <$> parsePType e
@@ -302,11 +304,11 @@ parseStanzaChild e =
 parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraph e =
   divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
-  where divId = maybe "" T.pack $ findAttr (unqual "id") e
+  where divId = fromMaybe "" $ findAttr (unqual "id") e
 
 parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraphChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
 
 parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
 parseAnnotationChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
 parseSection e = do
   n <- gets fb2SectionLevel
   modify $ \st -> st{ fb2SectionLevel = n + 1 }
-  let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+  let sectionId = fromMaybe "" $ findAttr (unqual "id") e
   bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
   modify $ \st -> st{ fb2SectionLevel = n }
   pure bs
 
 parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
 parseSectionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseBodyChild e
     "epigraph" -> parseEpigraph e
     "image" -> parseImageElement e
@@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
 parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
 parseNamedStyle e = do
   content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
-  let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+  let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
   case findAttr (unqual "name") e of
-    Just name -> pure $ spanWith ("", [T.pack name], lang) content
+    Just name -> pure $ spanWith ("", [name], lang) content
     Nothing -> do
       report $ IgnoredElement "link without required name"
       pure mempty
 
 parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
 parseNamedStyleChild (Elem e) =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement $ name <> " in style"
@@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
 parseLinkType e = do
   content <- mconcat <$> mapM parseStyleLinkType (elContent e)
   notes <- gets fb2Notes
-  case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+  case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
     Just href -> case findAttr (QName "type" Nothing Nothing) e of
                    Just "note" -> case M.lookup href notes of
                                     Nothing -> pure $ link href "" content
@@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
 -- | Parse @title-infoType@
 parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
 parseTitleInfoChild e =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "genre" -> pure ()
     "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
-    "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+    "book-title" -> modify (setMeta "title" (text $ strContent e))
     "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
     "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
-                                                                      $ T.pack
                                                                       $ strContent e))
-    "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+    "date" -> modify (setMeta "date" (text $ strContent e))
     "coverpage" -> parseCoverPage e
     "lang" -> pure ()
     "src-lang" -> pure ()
@@ -441,7 +442,7 @@ parseCoverPage e =
     Just img -> case href of
                   Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
                   Nothing -> pure ()
-                where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+                where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
     Nothing -> pure ()
 
 -- | Parse @inlineImageType@ element
@@ -454,5 +455,5 @@ parseInlineImageElement e =
     Nothing -> do
       report $ IgnoredElement "inline image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index dfd343b7a..5353f2001 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Error (PandocError(..))
-import Data.Char (isDigit, isSpace, toUpper)
+import Data.Char (isDigit, isSpace)
 import Data.Default
 import Data.Generics
 import Data.List (foldl', intersperse)
@@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr =
   fromMaybe "" . maybeAttrValue attr
 
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
 maybeAttrValue attr elt =
-  T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+  lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
 
 -- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
 named s e = qName (elName e) == s
 
 --
@@ -155,10 +154,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> JATS m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "p" -> parseMixed para (elContent e)
@@ -207,7 +206,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ textContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -271,7 +270,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = do
-                            w <- findAttrText (unqual "colwidth") c
+                            w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = foldl' max 0 $ map length bodyrows
@@ -442,16 +441,10 @@ parseRef e = do
        Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
        -- TODO handle mixed-citation
 
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
 textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
-  return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return . text . convertEntity $ ref
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
@@ -507,9 +499,9 @@ parseInline (Elem e) =
                         else linkWith attr ("#" <> rid) "" ils
         "ext-link" -> do
              ils <- innerInlines id
-             let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+             let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "rid" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, [], [])
@@ -529,7 +521,7 @@ parseInline (Elem e) =
    where innerInlines f = extractSpaces f . mconcat <$>
                           mapM parseInline (elContent e)
          mathML x =
-            case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+            case readMathML . showElement $ everywhere (mkT removePrefix) x of
                 Left _ -> mempty
                 Right m -> writeTeX m
          formula constructor = do
@@ -547,4 +539,4 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index bdadc4dd9..184d5a63f 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
-import Data.Char (toUpper)
 import Data.Default
 import Data.Generics
 import Data.Maybe (fromMaybe)
@@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
@@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
 -- exceptT = either throwError return
@@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
 parseBlock (Elem e) =
   case qName (elName e) of
         "ownerName"    -> mempty <$ modify (\st ->
-                              st{opmlDocAuthors = [text $ textContent e]})
+                              st{opmlDocAuthors = [text $ strContent e]})
         "dateModified" -> mempty <$ modify (\st ->
-                              st{opmlDocDate = text $ textContent e})
+                              st{opmlDocDate = text $ strContent e})
         "title"        -> mempty <$ modify (\st ->
-                              st{opmlDocTitle = text $ textContent e})
+                              st{opmlDocTitle = text $ strContent e})
         "outline" -> gets opmlSectionLevel >>= sect . (+1)
         "?xml"  -> return mempty
         _       -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 85308deb1..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -14,8 +14,7 @@ Entry point to the odt reader.
 module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 import qualified Data.ByteString.Lazy as B
 
@@ -91,7 +90,7 @@ archiveToOdt archive = do
 
 
 --
-entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem :: Entry -> Either PandocError Element
 entryToXmlElem entry =
   case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
     Right x  -> Right x
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..df90880fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -29,14 +29,14 @@ import Control.Monad ((<=<))
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
 import Data.Semigroup (First(..), Option(..))
 
 import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import Text.Pandoc.Builder hiding (underline)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -557,7 +557,7 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                        >>?% mappend
     --
     extractText     :: XML.Content -> Fallible T.Text
-    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -777,14 +777,14 @@ read_frame_img =
       ""   -> returnV mempty -< ()
       src' -> do
         let exts = extensionsFromList [Ext_auto_identifiers]
-        resource   <- lookupResource                          -< src'
+        resource   <- lookupResource                          -< T.unpack src'
         _          <- updateMediaWithResource                 -< resource
         w          <- findAttrText' NsSVG "width"             -< ()
         h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +804,8 @@ read_frame_mathml =
     case fold src of
       ""   -> returnV mempty -< ()
       src' -> do
-        let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+        let path = T.unpack $
+                    fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
         (_, mathml) <- lookupResource -< path
         case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,11 +61,11 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt.Namespaces
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
 module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
                                           ) where
 
-import Data.List (isPrefixOf)
 import qualified Data.Map as M (empty, insert)
 import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Pandoc.Readers.Odt.Generic.Namespaces
 
 
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
 
 
 findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
 
 nsIDmap :: NameSpaceIRIs Namespace
 nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
                  -- Core XML (basically only for the 'id'-attribute)
                | NsXML
                  -- Fallback
-               | NsOther String
+               | NsOther Text
   deriving ( Eq, Ord, Show )
 
 -- | Not the actual iri's, but large prefixes of them - this way there are
 -- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
 nsIDs = [
   ("urn:oasis:names:tc:opendocument:xmlns:animation"        , NsAnim         ),
   ("urn:oasis:names:tc:opendocument:xmlns:chart"            , NsChart        ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..5e10f896c 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE Arrows          #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE TupleSections   #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.StyleReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -46,11 +47,13 @@ import qualified Data.Foldable as F
 import Data.List (unfoldr)
 import qualified Data.Map as M
 import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
 import qualified Data.Set as S
 
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
 
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 
@@ -90,7 +93,7 @@ instance Default FontPitch where
 --
 -- Thus, we want
 
-type FontFaceName = String
+type FontFaceName = Text
 
 type FontPitches = M.Map FontFaceName FontPitch
 
@@ -151,7 +154,7 @@ findPitch =     ( lookupAttr NsStyle "font-pitch"
 -- Definitions of main data
 --------------------------------------------------------------------------------
 
-type StyleName        = String
+type StyleName        = Text
 
 -- | There are two types of styles: named styles with a style family and an
 -- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
   -- \^ simpler, but in general less efficient
 
 data ListLevelStyle = ListLevelStyle { listLevelType  :: ListLevelType
-                                     , listItemPrefix :: Maybe String
-                                     , listItemSuffix :: Maybe String
+                                     , listItemPrefix :: Maybe Text
+                                     , listItemSuffix :: Maybe Text
                                      , listItemFormat :: ListItemNumberFormat
                                      , listItemStart  :: Int
                                      }
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
   show ListLevelStyle{..} =    "<LLS|"
                             ++ show listLevelType
                             ++ "|"
-                            ++ maybeToString listItemPrefix
+                            ++ maybeToString (T.unpack <$> listItemPrefix)
                             ++ show listItemFormat
-                            ++ maybeToString listItemSuffix
+                            ++ maybeToString (T.unpack <$> listItemSuffix)
                             ++ ">"
     where maybeToString = fromMaybe ""
 
@@ -471,7 +474,7 @@ readTextProperties =
      )
   where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
         isFontBold = ("normal",False):("bold",True)
-                    :map ((,True).show) ([100,200..900]::[Int])
+                    :map ((,True) . tshow) ([100,200..900]::[Int])
 
 readUnderlineMode     :: StyleReaderSafe _x (Maybe UnderlineMode)
 readUnderlineMode     = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
 readStrikeThroughMode = readLineMode "text-line-through-mode"
                                      "text-line-through-style"
 
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
 readLineMode modeAttr styleAttr = proc x -> do
   isUL <- searchAttr  NsStyle styleAttr False isLinePresent -< x
   mode <- lookupAttr' NsStyle  modeAttr                     -< x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index da990e4d3..89c71d773 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -31,6 +31,7 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
@@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import Text.TeXMath
-import Text.XML.Light as XML
-import Text.XML.Light.Cursor as XMLC
 import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import Data.Generics (mkT, everywhere)
 
 data ListMarker = NoMarker
                 | BulletMarker
                 | NumberMarker ListNumberStyle ListNumberDelim Int
                 deriving (Show, Read, Eq, Ord)
 
-listMarkerToId :: ListMarker -> String
+listMarkerToId :: ListMarker -> Text
 listMarkerToId NoMarker = "990"
 listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) =
+listMarkerToId (NumberMarker sty delim n) = T.pack $
   '9' : '9' : styNum : delimNum : show n
   where styNum = case sty of
                       DefaultStyle -> '2'
@@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
                           , envListLevel      :: Int
                           , envListNumId      :: Int
                           , envInDel          :: Bool
-                          , envChangesAuthor  :: T.Text
-                          , envChangesDate    :: T.Text
+                          , envChangesAuthor  :: Text
+                          , envChangesDate    :: Text
                           , envPrintWidth     :: Integer
                           }
 
@@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty
 
 data WriterState = WriterState{
          stFootnotes      :: [Element]
-       , stComments       :: [([(T.Text, T.Text)], [Inline])]
-       , stSectionIds     :: Set.Set T.Text
-       , stExternalLinks  :: M.Map String String
+       , stComments       :: [([(Text, Text)], [Inline])]
+       , stSectionIds     :: Set.Set Text
+       , stExternalLinks  :: M.Map Text Text
        , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
        , stLists          :: [ListMarker]
        , stInsId          :: Int
@@ -164,18 +165,18 @@ defaultWriterState = WriterState{
 
 type WS m = ReaderT WriterEnv (StateT WriterState m)
 
-renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap :: Int -> [Element] -> M.Map Text Text
 renumIdMap _ [] = M.empty
 renumIdMap n (e:es)
   | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
-      M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es)
+      M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es)
   | otherwise = renumIdMap n es
 
-replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr]
 replaceAttr f val = map $
     \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a
 
-renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element
+renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element
 renumId f renumMap e
   | Just oldId <- findAttrBy f e
   , Just newId <- M.lookup oldId renumMap =
@@ -184,18 +185,12 @@ renumId f renumMap e
      e { elAttribs = attrs' }
   | otherwise = e
 
-renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element]
+renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element]
 renumIds f renumMap = map (renumId f renumMap)
 
-findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text
-findAttrTextBy x = fmap T.pack . findAttrBy x
-
-lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text
-lookupAttrTextBy x = fmap T.pack . lookupAttrBy x
-
 -- | Certain characters are invalid in XML even if escaped.
 -- See #1992
-stripInvalidChars :: T.Text -> T.Text
+stripInvalidChars :: Text -> Text
 stripInvalidChars = T.filter isValidChar
 
 -- | See XML reference
@@ -234,11 +229,11 @@ writeDocx opts doc = do
 
   -- Gets the template size
   let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz"))
-  let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs
+  let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs
 
   let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar"))
-  let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs
-  let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs
+  let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs
+  let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs
 
   -- Get the available area (converting the size and the margins to int and
   -- doing the difference
@@ -250,24 +245,21 @@ writeDocx opts doc = do
 
   -- styles
   mblang <- toLang $ getLang opts meta
+  -- TODO FIXME avoid this generic traversal!
+  -- lang is in w:docDefaults /  w:rPr  /  w:lang
   let addLang :: Element -> Element
-      addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $
-                                 XMLC.fromElement e) <$> mblang of
-                    Just (Elem e') -> e'
-                    _              -> e -- return original
-        where go :: String -> Cursor -> Cursor
-              go l cursor = case XMLC.findRec (isLangElt . current) cursor of
-                              Nothing -> cursor
-                              Just t  -> XMLC.modifyContent (setval l) t
-              setval :: String -> Content -> Content
-              setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $
-                                               elAttribs e' }
-              setval _ x         = x
-              setvalattr :: String -> XML.Attr -> XML.Attr
-              setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
-              setvalattr _ x                                 = x
-              isLangElt (Elem e') = qName (elName e') == "lang"
-              isLangElt _         = False
+      addLang = case mblang of
+                  Nothing -> id
+                  Just l  -> everywhere (mkT (go (renderLang l)))
+        where
+          go :: Text -> Element -> Element
+          go l e'
+            | qName (elName e') == "lang"
+                = e'{ elAttribs = map (setvalattr l) $ elAttribs e' }
+            | otherwise = e'
+
+          setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
+          setvalattr _ x                                 = x
 
   let stylepath = "word/styles.xml"
   styledoc <- addLang <$> parseXml refArchive distArchive stylepath
@@ -337,12 +329,13 @@ writeDocx opts doc = do
 
   -- [Content_Types].xml
   let mkOverrideNode (part', contentType') = mknode "Override"
-               [("PartName",part'),("ContentType",contentType')] ()
+               [("PartName", T.pack part')
+               ,("ContentType", contentType')] ()
   let mkImageOverride (_, imgpath, mbMimeType, _) =
-          mkOverrideNode ("/word/" ++ imgpath,
-                          maybe "application/octet-stream" T.unpack mbMimeType)
+          mkOverrideNode ("/word/" <> imgpath,
+                          fromMaybe "application/octet-stream" mbMimeType)
   let mkMediaOverride imgpath =
-          mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath)
+          mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath)
   let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -369,13 +362,14 @@ writeDocx opts doc = do
                   ,("/word/footnotes.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
                   ] ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
                     map mkImageOverride imgs ++
-                    [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive
-                                        , "word/media/" `isPrefixOf` eRelativePath e ]
+                    [ mkMediaOverride (eRelativePath e)
+                        | e <- zEntries refArchive
+                        , "word/media/" `isPrefixOf` eRelativePath e ]
 
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
@@ -421,7 +415,7 @@ writeDocx opts doc = do
   let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
   let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
   let baserels = baserels' ++ renumHeaders ++ renumFooters
-  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
   let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
@@ -489,10 +483,10 @@ writeDocx opts doc = do
   numbering <- parseXml refArchive distArchive numpath
   let newNumElts = mkNumbering (stLists st)
   let pandocAdded e =
-       case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of
+       case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of
          Just numid -> numid >= (990 :: Int)
          Nothing    ->
-           case findAttrTextBy ((== "numId") . qName) e >>= safeRead of
+           case findAttrBy ((== "numId") . qName) e >>= safeRead of
              Just numid -> numid >= (1000 :: Int)
              Nothing    -> False
   let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering)
@@ -514,7 +508,7 @@ writeDocx opts doc = do
   let extraCoreProps = ["subject","lang","category","description"]
   let extraCorePropsMap = M.fromList $ zip extraCoreProps
                        ["dc:subject","dc:language","cp:category","dc:description"]
-  let lookupMetaString' :: T.Text -> Meta -> T.Text
+  let lookupMetaString' :: Text -> Meta -> Text
       lookupMetaString' key' meta' =
         case key' of
              "description"    -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
@@ -530,21 +524,21 @@ writeDocx opts doc = do
           : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta))
           : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
             | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
-          ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords)
+          ++ mknode "cp:keywords" [] (T.intercalate ", " keywords)
           : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-                   ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+                   ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
   -- docProps/custom.xml
-  let customProperties :: [(String, String)]
-      customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta)
+  let customProperties :: [(Text, Text)]
+      customProperties = [ (k, lookupMetaString k meta)
                          | k <- M.keys (unMeta meta)
                          , k `notElem` (["title", "author", "keywords"]
                                        ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
+         ,("pid", tshow pid)
          ,("name", k)] $ mknode "vt:lpwstr" [] v
   let customPropsPath = "docProps/custom.xml"
   let customProps = mknode "Properties"
@@ -594,7 +588,8 @@ writeDocx opts doc = do
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
   webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
   headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $
-                     mapMaybe extractTarget (headers ++ footers)
+                         mapMaybe (fmap T.unpack . extractTarget)
+                         (headers ++ footers)
   let miscRelEntries = [ e | e <- zEntries refArchive
                        , "word/_rels/" `isPrefixOf` eRelativePath e
                        , ".xml.rels" `isSuffixOf` eRelativePath e
@@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "paragraph")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyText")] ()
      , mknode "w:qFormat" [] ()
      ]
@@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "character")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
      ]
 
@@ -643,13 +638,14 @@ styleToOpenXml sm style =
         toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing
                         | otherwise = Just $
                           mknode "w:style" [("w:type","character"),
-                           ("w:customStyle","1"),("w:styleId",show toktype)]
-                             [ mknode "w:name" [("w:val",show toktype)] ()
+                           ("w:customStyle","1"),("w:styleId", tshow toktype)]
+                             [ mknode "w:name" [("w:val", tshow toktype)] ()
                              , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
                              , mknode "w:rPr" [] $
-                               [ mknode "w:color" [("w:val",tokCol toktype)] ()
+                               [ mknode "w:color" [("w:val", tokCol toktype)] ()
                                  | tokCol toktype /= "auto" ] ++
-                               [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+                               [ mknode "w:shd" [("w:val","clear")
+                                                ,("w:fill",tokBg toktype)] ()
                                  | tokBg toktype /= "auto" ] ++
                                [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
                                [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
@@ -657,10 +653,10 @@ styleToOpenXml sm style =
                              ]
         tokStyles = tokenStyles style
         tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles
-        tokCol toktype = maybe "auto" (drop 1 . fromColor)
+        tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenColor =<< M.lookup toktype tokStyles)
                            `mplus` defaultColor style
-        tokBg toktype = maybe "auto" (drop 1 . fromColor)
+        tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenBackground =<< M.lookup toktype tokStyles)
                            `mplus` backgroundColor style
         parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing
@@ -673,10 +669,11 @@ styleToOpenXml sm style =
                              , mknode "w:pPr" []
                                $ mknode "w:wordWrap" [("w:val","off")] ()
                                :
-                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style)
+                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style)
                              ]
 
-copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
+copyChildren :: (PandocMonad m)
+             => Archive -> Archive -> String -> Integer -> [Text] -> m Entry
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
@@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do
     }
   where
     strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p++":"++name
+      | Just p <- prefix = p <> ":" <> name
       | otherwise        = name
     shouldCopy = (`elem` elNames) . strName
     cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
@@ -706,35 +703,35 @@ maxListLevel = 8
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
-  mknode "w:num" [("w:numId",show numid)]
+  mknode "w:num" [("w:numId",tshow numid)]
    $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
    : case marker of
        NoMarker     -> []
        BulletMarker -> []
        NumberMarker _ _ start ->
-          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
-              $ mknode "w:startOverride" [("w:val",show start)] ())
+          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))]
+              $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
 mkAbstractNum :: ListMarker -> Integer -> Element
 mkAbstractNum marker nsid =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
 
 mkLvl :: ListMarker -> Int -> Element
 mkLvl marker lvl =
-  mknode "w:lvl" [("w:ilvl",show lvl)] $
+  mknode "w:lvl" [("w:ilvl",tshow lvl)] $
     [ mknode "w:start" [("w:val",start)] ()
       | marker /= NoMarker && marker /= BulletMarker ] ++
     [ mknode "w:numFmt" [("w:val",fmt)] ()
-    , mknode "w:lvlText" [("w:val",lvltxt)] ()
+    , mknode "w:lvlText" [("w:val", lvltxt)] ()
     , mknode "w:lvlJc" [("w:val","left")] ()
     , mknode "w:pPr" []
-      [ mknode "w:ind" [ ("w:left",show $ lvl * step + step)
-                       , ("w:hanging",show (hang :: Int))
+      [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step)
+                       , ("w:hanging",tshow (hang :: Int))
                        ] ()
       ]
     ]
@@ -743,8 +740,8 @@ mkLvl marker lvl =
                  NoMarker             -> ("bullet"," ","1")
                  BulletMarker         -> ("bullet",bulletFor lvl,"1")
                  NumberMarker st de n -> (styleFor st lvl
-                                         ,patternFor de ("%" ++ show (lvl + 1))
-                                         ,show n)
+                                         ,patternFor de ("%" <> tshow (lvl + 1))
+                                         ,tshow n)
           step = 720
           hang = 480
           bulletFor 0 = "\x2022"  -- filled circle
@@ -767,9 +764,9 @@ mkLvl marker lvl =
           styleFor DefaultStyle 5 = "lowerRoman"
           styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6)
           styleFor _ _            = "decimal"
-          patternFor OneParen s  = s ++ ")"
-          patternFor TwoParens s = "(" ++ s ++ ")"
-          patternFor _ s         = s ++ "."
+          patternFor OneParen s  = s <> ")"
+          patternFor TwoParens s = "(" <> s <> ")"
+          patternFor _ s         = s <> "."
 
 getNumId :: (PandocMonad m) => WS m Int
 getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
@@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
 
 makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
 makeTOC opts = do
-  let depth = "1-"++show (writerTOCDepth opts)
-  let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+  let depth = "1-" <> tshow (writerTOCDepth opts)
+  let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u"
   tocTitle <- gets stTocTitle
   title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
   return
@@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
   let toComment (kvs, ils) = do
         annotation <- inlinesToOpenXML opts ils
         return $
-          mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
+          mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs]
             [ mknode "w:p" [] $
               map Elem
               [ mknode "w:pPr" []
@@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
 pStyleM styleName = do
   pStyleMap <- gets (smParaStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName pStyleMap
-  return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
 
 rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
 rStyleM styleName = do
   cStyleMap <- gets (smCharStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName cStyleMap
-  return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] ()
 
-getUniqueId :: (PandocMonad m) => WS m String
+getUniqueId :: (PandocMonad m) => WS m Text
 -- the + 20 is to ensure that there are no clashes with the rIds
 -- already in word/document.xml.rel
 getUniqueId = do
   n <- gets stCurId
   modify $ \st -> st{stCurId = n + 1}
-  return $ show n
+  return $ tshow n
 
 -- | Key for specifying user-defined docx styles.
-dynamicStyleKey :: T.Text
+dynamicStyleKey :: Text
 dynamicStyleKey = "custom-style"
 
 -- | Convert a Pandoc block element to OpenXML.
@@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst)
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
   | format == Format "openxml" = return [
-        Text (CData CDataRaw (T.unpack str) Nothing)
+        Text (CData CDataRaw str Nothing)
       ]
   | otherwise                  = do
       report $ BlockNotRendered b
@@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
   let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", show (floor (textwidth * w) :: Integer))] ()
+                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
   modify $ \s -> s { stInTable = False }
   -- for compatibility with Word <= 2007, we include a val with a bitmask
@@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
      mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
                                ,("w:noVBand","0")
-                               ,("w:val", printf "%04x" tblLookVal)
+                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
                                ] () :
-          [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
+          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
           | not (null caption) ] )
       : mknode "w:tblGrid" []
         (if all (==0) widths
@@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do
   modify $ \st -> st{ stInList = oldInList }
   return $ first'' ++ rest''
 
-alignmentToString :: Alignment -> [Char]
+alignmentToString :: Alignment -> Text
 alignmentToString alignment = case alignment of
                                  AlignLeft    -> "left"
                                  AlignRight   -> "right"
@@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do
   listLevel <- asks envListLevel
   numid <- asks envListNumId
   let listPr = [mknode "w:numPr" []
-                [ mknode "w:ilvl" [("w:val",show listLevel)] ()
-                , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara]
+                [ mknode "w:ilvl" [("w:val",tshow listLevel)] ()
+                , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara]
   return $ case listPr ++ squashProps props of
                 [] -> []
                 ps -> [mknode "w:pPr" [] ps]
@@ -1185,7 +1182,7 @@ withParaPropM md p = do
   d <- md
   withParaProp d p
 
-formattedString :: PandocMonad m => T.Text -> WS m [Element]
+formattedString :: PandocMonad m => Text -> WS m [Element]
 formattedString str =
   -- properly handle soft hyphens
   case splitTextBy (=='\173') str of
@@ -1194,7 +1191,7 @@ formattedString str =
          sh <- formattedRun [mknode "w:softHyphen" [] ()]
          intercalate sh <$> mapM formattedString' ws
 
-formattedString' :: PandocMonad m => T.Text -> WS m [Element]
+formattedString' :: PandocMonad m => Text -> WS m [Element]
 formattedString' str = do
   inDel <- asks envInDel
   formattedRun [ mktnode (if inDel then "w:delText" else "w:t")
@@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
      mknode "w:r" []
      (mknode "w:t"
        [("xml:space","preserve")]
-       ("\t" :: String))] ++)
+       ("\t" :: Text))] ++)
     <$> inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
   inlinesToOpenXML opts ils
@@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
   let ident' = fromMaybe ident (lookup "id" kvs)
       kvs' = filter (("id" /=) . fst) kvs
   modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
-  return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
+  return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ]
 inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
   -- prefer the "id" in kvs, since that is the one produced by the docx
   -- reader.
   let ident' = fromMaybe ident (lookup "id" kvs)
   in return . map Elem $
-     [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
+     [ mknode "w:commentRangeEnd" [("w:id", ident')] ()
      , mknode "w:r" []
        [ mknode "w:rPr" []
          [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
-       , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
+       , mknode "w:commentReference" [("w:id", ident')] () ]
      ]
 inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
   stylemod <- case lookup dynamicStyleKey kvs of
@@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
         defaultAuthor <- asks envChangesAuthor
         let author = fromMaybe defaultAuthor (lookup "author" kvs)
         let mdate = lookup "date" kvs
-        return $ ("w:author", T.unpack author) :
-                   maybe [] (\date -> [("w:date", T.unpack date)]) mdate
+        return $ ("w:author", author) :
+                   maybe [] (\date -> [("w:date", date)]) mdate
   insmod <- if "insertion" `elem` classes
                then do
                  changeAuthorDate <- getChangeAuthorDate
@@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                    x <- f
                    return [Elem $
                            mknode "w:ins"
-                             (("w:id", show insId) : changeAuthorDate) x]
+                             (("w:id", tshow insId) : changeAuthorDate) x]
                else return id
   delmod <- if "deletion" `elem` classes
                then do
@@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  return $ \f -> local (\env->env{envInDel=True}) $ do
                    x <- f
                    return [Elem $ mknode "w:del"
-                             (("w:id", show delId) : changeAuthorDate) x]
+                             (("w:id", tshow delId) : changeAuthorDate) x]
                else return id
   contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
                      $ inlinesToOpenXML opts ils
@@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) =
 inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
   | f == Format "openxml" = return
-                            [Text (CData CDataRaw (T.unpack str) Nothing)]
+                            [Text (CData CDataRaw str Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
@@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do
   when (mathType == DisplayMath) setFirstPara
   res <- (lift . lift) (convertMath writeOMML mathType str)
   case res of
-       Right r -> return [Elem r]
+       Right r -> return [Elem $ fromXLElement r]
        Left il -> inlineToOpenXML' opts il
 inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Code attrs str) = do
@@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do
         mknode "w:r" []
           [ mknode "w:rPr" [] $
             maybeToList (lookup toktype tokTypesMap)
-            , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+            , mknode "w:t" [("xml:space","preserve")] tok ]
   withTextPropM (rStyleM "Verbatim Char")
     $ if isNothing (writerHighlightStyle opts)
           then unhighlighted
@@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do
   let notemarker = mknode "w:r" []
                    [ mknode "w:rPr" [] footnoteStyle
                    , mknode "w:footnoteRef" [] () ]
-  let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker
+  let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
   let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
       insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : Space : ils) : xs
       insertNoteRef xs               = Para [notemarkerXml] : xs
@@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do
 inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   return
-    [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
+    [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
 -- external link:
 inlineToOpenXML' opts (Link _ txt (src,_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   extlinks <- gets stExternalLinks
-  id' <- case M.lookup (T.unpack src) extlinks of
+  id' <- case M.lookup src extlinks of
             Just i   -> return i
             Nothing  -> do
-              i <- ("rId"++) `fmap` getUniqueId
+              i <- ("rId" <>) <$> getUniqueId
               modify $ \st -> st{ stExternalLinks =
-                        M.insert (T.unpack src) i extlinks }
+                        M.insert src i extlinks }
               return i
   return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
@@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
                                              ,("noChangeAspect","1")] ()
         nvPicPr  = mknode "pic:nvPicPr" []
                         [ mknode "pic:cNvPr"
-                            [("descr",T.unpack src),("id","0"),("name","Picture")] ()
+                            [("descr",src),("id","0"),("name","Picture")] ()
                         , cNvPicPr ]
         blipFill = mknode "pic:blipFill" []
-          [ mknode "a:blip" [("r:embed",ident)] ()
+          [ mknode "a:blip" [("r:embed",T.pack ident)] ()
           , mknode "a:stretch" [] $
               mknode "a:fillRect" [] ()
           ]
         xfrm =    mknode "a:xfrm" []
                         [ mknode "a:off" [("x","0"),("y","0")] ()
-                        , mknode "a:ext" [("cx",show xemu)
-                                         ,("cy",show yemu)] () ]
+                        , mknode "a:ext" [("cx",tshow xemu)
+                                         ,("cy",tshow yemu)] () ]
         prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                          mknode "a:avLst" [] ()
         ln =      mknode "a:ln" [("w","9525")]
@@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt = mknode "w:r" [] $
           mknode "w:drawing" [] $
             mknode "wp:inline" []
-              [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+              [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] ()
               , mknode "wp:effectExtent"
                 [("b","0"),("l","0"),("r","0"),("t","0")] ()
               , mknode "wp:docPr"
-                [ ("descr", T.unpack $ stringify alt)
-                , ("title", T.unpack title)
+                [ ("descr", stringify alt)
+                , ("title", title)
                 , ("id","1")
                 , ("name","Picture")
                 ] ()
@@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     Just imgData -> return [Elem $ generateImgElt imgData]
     Nothing -> ( do --try
       (img, mt) <- P.fetchItem src
-      ident <- ("rId"++) `fmap` getUniqueId
+      ident <- ("rId" <>) <$> getUniqueId
 
       let
         imgext = case mt >>= extensionFromMimeType of
@@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
             Just Svg  -> ".svg"
             Just Emf  -> ".emf"
             Nothing   -> ""
-        imgpath = "media/" <> ident <> T.unpack imgext
-        mbMimeType = mt <|> getMimeType imgpath
+        imgpath = "media/" <> ident <> imgext
+        mbMimeType = mt <|> getMimeType (T.unpack imgpath)
 
-        imgData = (ident, imgpath, mbMimeType, img)
+        imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img)
 
       if T.null imgext
          then -- without an extension there is no rule for content type
@@ -1538,20 +1535,20 @@ withDirection x = do
                                     , envTextProperties = EnvProps textStyle textProps'
                                     }
 
-wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content]
+wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content]
 wrapBookmark "" contents = return contents
 wrapBookmark ident contents = do
   id' <- getUniqueId
   let bookmarkStart = mknode "w:bookmarkStart"
                        [("w:id", id')
-                       ,("w:name", T.unpack $ toBookmarkName ident)] ()
+                       ,("w:name", toBookmarkName ident)] ()
       bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
   return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
 
 -- Word imposes a 40 character limit on bookmark names and requires
 -- that they begin with a letter.  So we just use a hash of the
 -- identifier when otherwise we'd have an illegal bookmark name.
-toBookmarkName :: T.Text -> T.Text
+toBookmarkName :: Text -> Text
 toBookmarkName s
   | Just (c, _) <- T.uncons s
   , isLetter c
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 171ffe582..3f10cb437 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
                                    gets, lift, modify)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
+import Data.Char (isAlphaNum, isAscii, isDigit)
 import Data.List (isInfixOf, isPrefixOf)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
 import qualified Data.Set as Set
-import qualified Data.Text as TS
+import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
 import System.FilePath (takeExtension, takeFileName, makeRelative)
@@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
                             ObfuscationMethod (NoObfuscation), WrapOption (..),
                             WriterOptions (..))
 import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags',
-                           safeRead, stringify, trim, uniqueIdent, tshow)
+                           stringify, uniqueIdent, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.UUID (getRandomUUID)
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
-import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems,
-                       ppElement, showElement, strContent, unode, unqual)
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block]
 data EPUBState = EPUBState {
         stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
       , stMediaNextId :: Int
-      , stEpubSubdir  :: String
+      , stEpubSubdir  :: FilePath
       }
 
 type E m = StateT EPUBState m
@@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{
     epubIdentifier          :: [Identifier]
   , epubTitle               :: [Title]
   , epubDate                :: [Date]
-  , epubLanguage            :: String
+  , epubLanguage            :: Text
   , epubCreator             :: [Creator]
   , epubContributor         :: [Creator]
-  , epubSubject             :: [String]
-  , epubDescription         :: Maybe String
-  , epubType                :: Maybe String
-  , epubFormat              :: Maybe String
-  , epubPublisher           :: Maybe String
-  , epubSource              :: Maybe String
-  , epubRelation            :: Maybe String
-  , epubCoverage            :: Maybe String
-  , epubRights              :: Maybe String
-  , epubBelongsToCollection :: Maybe String
-  , epubGroupPosition       :: Maybe String
-  , epubCoverImage          :: Maybe String
+  , epubSubject             :: [Text]
+  , epubDescription         :: Maybe Text
+  , epubType                :: Maybe Text
+  , epubFormat              :: Maybe Text
+  , epubPublisher           :: Maybe Text
+  , epubSource              :: Maybe Text
+  , epubRelation            :: Maybe Text
+  , epubCoverage            :: Maybe Text
+  , epubRights              :: Maybe Text
+  , epubBelongsToCollection :: Maybe Text
+  , epubGroupPosition       :: Maybe Text
+  , epubCoverImage          :: Maybe FilePath
   , epubStylesheets         :: [FilePath]
   , epubPageDirection       :: Maybe ProgressionDirection
-  , epubIbooksFields        :: [(String, String)]
-  , epubCalibreFields       :: [(String, String)]
+  , epubIbooksFields        :: [(Text, Text)]
+  , epubCalibreFields       :: [(Text, Text)]
   } deriving Show
 
 data Date = Date{
-    dateText  :: String
-  , dateEvent :: Maybe String
+    dateText  :: Text
+  , dateEvent :: Maybe Text
   } deriving Show
 
 data Creator = Creator{
-    creatorText   :: String
-  , creatorRole   :: Maybe String
-  , creatorFileAs :: Maybe String
+    creatorText   :: Text
+  , creatorRole   :: Maybe Text
+  , creatorFileAs :: Maybe Text
   } deriving Show
 
 data Identifier = Identifier{
-    identifierText   :: String
-  , identifierScheme :: Maybe String
+    identifierText   :: Text
+  , identifierScheme :: Maybe Text
   } deriving Show
 
 data Title = Title{
-    titleText   :: String
-  , titleFileAs :: Maybe String
-  , titleType   :: Maybe String
+    titleText   :: Text
+  , titleFileAs :: Maybe Text
+  , titleType   :: Maybe Text
   } deriving Show
 
 data ProgressionDirection = LTR | RTL deriving Show
 
-dcName :: String -> QName
+dcName :: Text -> QName
 dcName n = QName n Nothing (Just "dc")
 
-dcNode :: Node t => String -> t -> Element
+dcNode :: Node t => Text -> t -> Element
 dcNode = node . dcName
 
-opfName :: String -> QName
+opfName :: Text -> QName
 opfName n = QName n Nothing (Just "opf")
 
-toId :: FilePath -> String
-toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+toId :: FilePath -> Text
+toId = T.pack .
+       map (\x -> if isAlphaNum x || x == '-' || x == '_'
                      then x
                      else '_') . takeFileName
 
@@ -141,8 +140,8 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
-toVal' :: String -> Val TS.Text
-toVal' = toVal . TS.pack
+toVal' :: Text -> Val T.Text
+toVal' = toVal
 
 mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
 mkEntry path content = do
@@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do
        if null (epubIdentifier m)
           then do
             randomId <- getRandomUUID
-            return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] }
+            return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] }
           else return m
   let addLanguage m =
-       if null (epubLanguage m)
+       if T.null (epubLanguage m)
           then case lookupContext "lang" (writerVariables opts) of
-                     Just x  -> return m{ epubLanguage = TS.unpack x }
+                     Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
                        mLang <- lift $ P.lookupEnv "LANG"
                        let localeLang =
                              case mLang of
                                Just lang ->
-                                 TS.map (\c -> if c == '_' then '-' else c) $
-                                 TS.takeWhile (/='.') lang
+                                 T.map (\c -> if c == '_' then '-' else c) $
+                                 T.takeWhile (/='.') lang
                                Nothing -> "en-US"
-                       return m{ epubLanguage = TS.unpack localeLang }
+                       return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
        if null (epubDate m)
@@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do
           then return m
           else do
             let authors' = map stringify $ docAuthors meta
-            let toAuthor name = Creator{ creatorText = TS.unpack name
+            let toAuthor name = Creator{ creatorText = name
                                        , creatorRole = Just "aut"
                                        , creatorFileAs = Nothing }
             return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
@@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
   where getAttr n = lookupAttr (opfName n) attrs
 addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
   case getAttr "property" of
-       Just s | "ibooks:" `isPrefixOf` s ->
-                md{ epubIbooksFields = (drop 7 s, strContent e) :
+       Just s | "ibooks:" `T.isPrefixOf` s ->
+                md{ epubIbooksFields = (T.drop 7 s, strContent e) :
                        epubIbooksFields md }
        _ -> case getAttr "name" of
-                 Just s | "calibre:" `isPrefixOf` s ->
+                 Just s | "calibre:" `T.isPrefixOf` s ->
                    md{ epubCalibreFields =
-                         (drop 8 s, fromMaybe "" $ getAttr "content") :
+                         (T.drop 8 s, fromMaybe "" $ getAttr "content") :
                           epubCalibreFields md }
                  _ -> md
   where getAttr n = lookupAttr (unqual n) attrs
 addMetadataFromXML _ md = md
 
-metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s)    = TS.unpack s
-metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils
-metaValueToString (MetaBlocks bs)   = TS.unpack $ stringify bs
+metaValueToString :: MetaValue -> Text
+metaValueToString (MetaString s)    = s
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs)   = stringify bs
 metaValueToString (MetaBool True)   = "true"
 metaValueToString (MetaBool False)  = "false"
 metaValueToString _                 = ""
 
 metaValueToPaths :: MetaValue -> [FilePath]
-metaValueToPaths (MetaList xs) = map metaValueToString xs
-metaValueToPaths x             = [metaValueToString x]
+metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs
+metaValueToPaths x             = [T.unpack $ metaValueToString x]
 
-getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a]
+getList :: T.Text -> Meta -> (MetaValue -> a) -> [a]
 getList s meta handleMetaValue =
   case lookupMeta s meta of
        Just (MetaList xs) -> map handleMetaValue xs
@@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue
                 , titleType = metaValueToString <$> M.lookup "type" m }
         handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
 
-getCreator :: TS.Text -> Meta -> [Creator]
+getCreator :: T.Text -> Meta -> [Creator]
 getCreator s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
@@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue
                   , creatorRole = metaValueToString <$> M.lookup "role" m }
         handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
 
-getDate :: TS.Text -> Meta -> [Date]
+getDate :: T.Text -> Meta -> [Date]
 getDate s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Date{ dateText = fromMaybe "" $
@@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue
         handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
                                   , dateEvent = Nothing }
 
-simpleList :: TS.Text -> Meta -> [String]
+simpleList :: T.Text -> Meta -> [Text]
 simpleList s meta =
   case lookupMeta s meta of
        Just (MetaList xs) -> map metaValueToString xs
@@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     , epubCoverage             = coverage
     , epubRights               = rights
     , epubBelongsToCollection  = belongsToCollection
-    , epubGroupPosition        = groupPosition 
+    , epubGroupPosition        = groupPosition
     , epubCoverImage           = coverImage
     , epubStylesheets          = stylesheets
     , epubPageDirection        = pageDirection
@@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
         belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta
-        groupPosition = metaValueToString <$> lookupMeta "group-position" meta       
-        coverImage =
-            (TS.unpack <$> lookupContext "epub-cover-image"
-                              (writerVariables opts))
+        groupPosition = metaValueToString <$> lookupMeta "group-position" meta
+        coverImage = T.unpack <$>
+            lookupContext "epub-cover-image" (writerVariables opts)
             `mplus` (metaValueToString <$> lookupMeta "cover-image" meta)
         mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta
         stylesheets = maybe [] metaValueToPaths mCss ++
                       case lookupContext "css" (writerVariables opts) of
-                         Just xs -> map TS.unpack xs
+                         Just xs -> map T.unpack xs
                          Nothing ->
                            case lookupContext "css" (writerVariables opts) of
-                             Just x  -> [TS.unpack x]
+                             Just x  -> [T.unpack x]
                              Nothing -> []
-        pageDirection = case map toLower . metaValueToString <$>
+        pageDirection = case T.toLower . metaValueToString <$>
                              lookupMeta "page-progression-direction" meta of
                               Just "ltr" -> Just LTR
                               Just "rtl" -> Just RTL
                               _          -> Nothing
         ibooksFields = case lookupMeta "ibooks" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
         calibreFields = case lookupMeta "calibre" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
 
 -- | Produce an EPUB2 file from a Pandoc document.
@@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m
 writeEPUB epubVersion opts doc = do
   let epubSubdir = writerEpubSubdirectory opts
   -- sanity check on epubSubdir
-  unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+  unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
     throwError $ PandocEpubSubdirectoryError epubSubdir
-  let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir }
+  let initState = EPUBState { stMediaPaths = []
+                            , stMediaNextId = 0
+                            , stEpubSubdir = T.unpack epubSubdir }
   evalStateT (pandocToEPUB epubVersion opts doc) initState
 
 pandocToEPUB :: PandocMonad m
@@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do
                         [] -> case epubTitle metadata of
                                    []    -> "UNTITLED"
                                    (x:_) -> titleText x
-                        x  -> TS.unpack $ stringify x
+                        x  -> stringify x
 
   -- stylesheet
   stylesheets <- case epubStylesheets metadata of
@@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do
                            (ListVal $ map
                              (\e -> toVal' $
                                 (if useprefix then "../" else "") <>
-                                makeRelative epubSubdir (eRelativePath e))
+                                T.pack
+                                 (makeRelative epubSubdir (eRelativePath e)))
                              stylesheetEntries)
                              mempty
 
@@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do
                              case imageSize opts' (B.toStrict imgContent) of
                                Right sz  -> return $ sizeInPixels sz
                                Left err' -> (0, 0) <$ report
-                                 (CouldNotDetermineImageSize (TS.pack img) err')
+                                 (CouldNotDetermineImageSize (T.pack img) err')
                        cpContent <- lift $ writeHtml
                             opts'{ writerVariables =
                                    Context (M.fromList [
                                     ("coverpage", toVal' "true"),
                                     ("pagetitle", toVal $
-                                      escapeStringForXML $ TS.pack plainTitle),
-                                    ("cover-image", toVal' coverImageName),
+                                      escapeStringForXML plainTitle),
+                                    ("cover-image",
+                                       toVal' $ T.pack coverImageName),
                                     ("cover-image-width", toVal' $
-                                       show coverImageWidth),
+                                       tshow coverImageWidth),
                                     ("cover-image-height", toVal' $
-                                       show coverImageHeight)]) <>
+                                       tshow coverImageHeight)]) <>
                                      cssvars True <> vars }
                             (Pandoc meta [])
                        coverEntry <- mkEntry "text/cover.xhtml" cpContent
@@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do
                                         ("titlepage", toVal' "true"),
                                         ("body-type",  toVal' "frontmatter"),
                                         ("pagetitle", toVal $
-                                            escapeStringForXML $ TS.pack plainTitle)])
+                                            escapeStringForXML plainTitle)])
                                       <> cssvars True <> vars }
                                (Pandoc meta [])
   tpEntry <- mkEntry "text/title_page.xhtml" tpContent
@@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do
   let matchingGlob f = do
         xs <- lift $ P.glob f
         when (null xs) $
-          report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files"
+          report $ CouldNotFetchResource (T.pack f) "glob did not match any font files"
         return xs
   let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
                         lift (P.readFileLazy f)
@@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do
 
   let chapters' = secsToChapters secs
 
-  let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
+  let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)]
       extractLinkURL' num (Span (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Link (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Image (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (RawInline fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL' _ _ = []
 
-  let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
+  let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)]
       extractLinkURL num (Div (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Header _ (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (RawBlock fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL num b = query (extractLinkURL' num) b
@@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do
 
   let fixInternalReferences :: Inline -> Inline
       fixInternalReferences (Link attr lab (src, tit))
-        | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of
+        | Just ('#', xs) <- T.uncons src = case lookup xs reftable of
              Just ys -> Link attr lab (ys, tit)
              Nothing -> Link attr lab (src, tit)
       fixInternalReferences x = x
@@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do
                  chapters'
 
   let chapToEntry num (Chapter bs) =
-        mkEntry ("text/" ++ showChapter num) =<<
+        mkEntry ("text/" ++ T.unpack (showChapter num)) =<<
         writeHtml opts'{ writerVariables =
                             Context (M.fromList
                                      [("body-type", toVal' bodyType),
@@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do
   let chapterNode ent = unode "item" !
                            ([("id", toId $ makeRelative epubSubdir
                                          $ eRelativePath ent),
-                             ("href", makeRelative epubSubdir
+                             ("href", T.pack $ makeRelative epubSubdir
                                       $ eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
                                     [] -> []
-                                    xs -> [("properties", unwords xs)])
+                                    xs -> [("properties", T.unwords xs)])
                         $ ()
 
   let chapterRefNode ent = unode "itemref" !
@@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do
   let pictureNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
                             ("media-type",
-                               maybe "application/octet-stream" TS.unpack
+                               fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
-                            ("media-type", maybe "" TS.unpack $
+                            ("media-type", fromMaybe "" $
                                   getMimeType $ eRelativePath ent)] $ ()
 
   let tocTitle = maybe plainTitle
@@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
   currentTime <- lift P.getTimestamp
-  let contentsData = UTF8.fromStringLazy $ ppTopElement $
+  let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $
         unode "package" !
           ([("version", case version of
                              EPUB2 -> "2.0"
@@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do
                                ,("media-type","application/xhtml+xml")] ++
                                [("properties","nav") | epub3 ]) $ ()
              ] ++
-             [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp)
+             [ unode "item" ! [("id","stylesheet" <> tshow n)
+                              , ("href", T.pack fp)
                               ,("media-type","text/css")] $ () |
                              (n :: Int, fp) <- zip [1..] (map
                                (makeRelative epubSubdir . eRelativePath)
@@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do
   let tocLevel = writerTOCDepth opts
 
   let navPointNode :: PandocMonad m
-                   => (Int -> [Inline] -> TS.Text -> [Element] -> Element)
+                   => (Int -> [Inline] -> T.Text -> [Element] -> Element)
                    -> Block -> StateT Int m [Element]
       navPointNode formatter (Div (ident,_,_)
                                 (Header lvl (_,_,kvs) ils : children)) =
@@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do
              n <- get
              modify (+1)
              let num = fromMaybe "" $ lookup "number" kvs
-             let tit = if writerNumberSections opts && not (TS.null num)
+             let tit = if writerNumberSections opts && not (T.null num)
                           then Span ("", ["section-header-number"], [])
                                 [Str num] : Space : ils
                           else ils
@@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do
         concat <$> mapM (navPointNode formatter) bs
       navPointNode _ _ = return []
 
-  let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navMapFormatter n tit src subs = unode "navPoint" !
-               [("id", "navPoint-" ++ show n)] $
-                  [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit
-                  , unode "content" ! [("src", "text/" <> TS.unpack src)] $ ()
+               [("id", "navPoint-" <> tshow n)] $
+                  [ unode "navLabel" $ unode "text" $ stringify tit
+                  , unode "content" ! [("src", "text/" <> src)] $ ()
                   ] ++ subs
 
   let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
-                  [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta)
+                  [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
                   , unode "content" ! [("src", "text/title_page.xhtml")]
                   $ () ]
 
   navMap <- lift $ evalStateT
              (concat <$> mapM (navPointNode navMapFormatter) secs) 1
-  let tocData = UTF8.fromStringLazy $ ppTopElement $
+  let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "ncx" ! [("version","2005-1")
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
@@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do
           ]
   tocEntry <- mkEntry "toc.ncx" tocData
 
-  let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navXhtmlFormatter n tit src subs = unode "li" !
-                                       [("id", "toc-li-" ++ show n)] $
+                                       [("id", "toc-li-" <> tshow n)] $
                                             (unode "a" !
-                                                [("href", "text/" <> TS.unpack src)]
+                                                [("href", "text/" <> src)]
                                              $ titElements)
                                             : case subs of
                                                  []    -> []
@@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do
                                      , writerVariables =
                                          Context (M.fromList
                                            [("pagetitle", toVal $
-                                             escapeStringForXML $ TS.pack plainTitle)])
+                                             escapeStringForXML plainTitle)])
                                        <> writerVariables opts}
                                  (Pandoc nullMeta
                                    [Plain $ walk clean tit])) of
@@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do
   tocBlocks <- lift $ evalStateT
                  (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
   let navBlocks = [RawBlock (Format "html")
-                  $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces
+                  $ showElement $ -- prettyprinting introduces bad spaces
                    unode navtag ! ([("epub:type","toc") | epub3] ++
                                    [("id","toc")]) $
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
@@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do
                                 [ unode "a" ! [("href",
                                                   "text/title_page.xhtml")
                                                ,("epub:type", "titlepage")] $
-                                  ("Title Page" :: String) ] :
+                                  ("Title Page" :: Text) ] :
                               [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
-                                  ("Cover" :: String)] |
+                                  ("Cover" :: Text)] |
                                   isJust (epubCoverImage metadata)
                               ] ++
                               [ unode "li"
                                 [ unode "a" ! [("href", "#toc")
                                               ,("epub:type", "toc")] $
-                                    ("Table of Contents" :: String)
+                                    ("Table of Contents" :: Text)
                                 ] | writerTableOfContents opts
                               ]
                          else []
-  let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $
+  let landmarks = [RawBlock (Format "html") $ ppElement $
                     unode "nav" ! [("epub:type","landmarks")
                                   ,("id","landmarks")
                                   ,("hidden","hidden")] $
@@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do
                         UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
-  let containerData = UTF8.fromStringLazy $ ppTopElement $
+  let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
            unode "rootfile" ! [("full-path",
                     (if null epubSubdir
                         then ""
-                        else epubSubdir ++ "/") ++ "content.opf")
+                        else T.pack epubSubdir <> "/") <> "content.opf")
                ,("media-type","application/oebps-package+xml")] $ ()
   containerEntry <- mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
-  let apple = UTF8.fromStringLazy $ ppTopElement $
+  let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
-            unode "option" ! [("name","specified-fonts")] $ ("true" :: String)
+            unode "option" ! [("name","specified-fonts")] $ ("true" :: Text)
   appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
   -- construct archive
@@ -947,7 +950,8 @@ metadataElement version md currentTime =
                   ++ publisherNodes ++ sourceNodes ++ relationNodes
                   ++ coverageNodes ++ rightsNodes ++ coverImageNodes
                   ++ modifiedNodes ++ belongsToCollectionNodes
-        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+        withIds base f = concat . zipWith f (map (\x -> base <>
+                                                        T.cons '-' (tshow x))
                          ([1..] :: [Int]))
         identifierNodes = withIds "epub-id" toIdentifierNode $
                           epubIdentifier md
@@ -961,9 +965,9 @@ metadataElement version md currentTime =
                                  (x:_) -> [dcNode "date" ! [("id","epub-date")]
                                             $ dateText x]
         ibooksNodes = map ibooksNode (epubIbooksFields md)
-        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
+        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v
         calibreNodes = map calibreNode (epubCalibreFields md)
-        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k),
+        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k),
                                              ("content", v)] $ ()
         languageNodes = [dcTag "language" $ epubLanguage md]
         creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
@@ -989,12 +993,12 @@ metadataElement version md currentTime =
             maybe []
                 (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection )
                 :
-                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ])
+                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ])
                 (epubBelongsToCollection md)++
             maybe []
                 (\groupPosition -> [unode "meta" !  [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ])
                 (epubGroupPosition md)
-        dcTag n s = unode ("dc:" ++ n) s
+        dcTag n s = unode ("dc:" <> n) s
         dcTag' n s = [dcTag n s]
         toIdentifierNode id' (Identifier txt scheme)
           | version == EPUB2 = [dcNode "identifier" !
@@ -1002,7 +1006,7 @@ metadataElement version md currentTime =
               txt]
           | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) :
               maybe [] ((\x -> [unode "meta" !
-                                [ ("refines",'#':id')
+                                [ ("refines","#" <> id')
                                 , ("property","identifier-type")
                                 , ("scheme","onix:codelist5")
                                 ]
@@ -1018,10 +1022,10 @@ metadataElement version md currentTime =
                (creatorRole creator >>= toRelator)) $ creatorText creator]
           | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (creatorFileAs creator) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","role"),
+                   [("refines","#" <> id'),("property","role"),
                      ("scheme","marc:relators")] $ x])
                    (creatorRole creator >>= toRelator)
         toTitleNode id' title
@@ -1033,16 +1037,16 @@ metadataElement version md currentTime =
           | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
               ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (titleFileAs title) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","title-type")] $ x])
+                   [("refines","#" <> id'),("property","title-type")] $ x])
                    (titleType title)
         toDateNode id' date = [dcNode "date" !
              (("id",id') :
                 maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
                  dateText date]
-        schemeToOnix :: String -> String
+        schemeToOnix :: Text -> Text
         schemeToOnix "ISBN-10"              = "02"
         schemeToOnix "GTIN-13"              = "03"
         schemeToOnix "UPC"                  = "04"
@@ -1060,48 +1064,48 @@ metadataElement version md currentTime =
         schemeToOnix "OLCC"                 = "28"
         schemeToOnix _                      = "01"
 
-showDateTimeISO8601 :: UTCTime -> String
-showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+showDateTimeISO8601 :: UTCTime -> Text
+showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ"
 
 transformTag :: PandocMonad m
-             => Tag TS.Text
-             -> E m (Tag TS.Text)
+             => Tag T.Text
+             -> E m (Tag T.Text)
 transformTag tag@(TagOpen name attr)
   | name `elem` ["video", "source", "img", "audio"] &&
     isNothing (lookup "data-external" attr) = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
-  newsrc <- modifyMediaRef $ TS.unpack src
-  newposter <- modifyMediaRef $ TS.unpack poster
+  newsrc <- modifyMediaRef $ T.unpack src
+  newposter <- modifyMediaRef $ T.unpack poster
   let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
-              [("src", "../" <> newsrc) | not (TS.null newsrc)] ++
-              [("poster", "../" <> newposter) | not (TS.null newposter)]
+              [("src", "../" <> newsrc) | not (T.null newsrc)] ++
+              [("poster", "../" <> newposter) | not (T.null newposter)]
   return $ TagOpen name attr'
 transformTag tag = return tag
 
 modifyMediaRef :: PandocMonad m
                => FilePath
-               -> E m TS.Text
+               -> E m T.Text
 modifyMediaRef "" = return ""
 modifyMediaRef oldsrc = do
   media <- gets stMediaPaths
   case lookup oldsrc media of
-         Just (n,_) -> return $ TS.pack n
+         Just (n,_) -> return $ T.pack n
          Nothing    -> catchError
-           (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc
-               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack
+           (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc
+               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack
                          (("." <>) <$> (mbMime >>= extensionFromMimeType))
                newName <- getMediaNextNewName ext
                let newPath = "media/" ++ newName
                entry <- mkEntry newPath (B.fromChunks . (:[]) $ img)
                modify $ \st -> st{ stMediaPaths =
                             (oldsrc, (newPath, Just entry)):media}
-               return $ TS.pack newPath)
+               return $ T.pack newPath)
            (\e -> do
-                report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e)
-                return $ TS.pack oldsrc)
+                report $ CouldNotFetchResource (T.pack oldsrc) (tshow e)
+                return $ T.pack oldsrc)
 
-getMediaNextNewName :: PandocMonad m => String -> E m String
+getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath
 getMediaNextNewName ext = do
   nextId <- gets stMediaNextId
   modify $ \st -> st { stMediaNextId = nextId + 1 }
@@ -1128,11 +1132,11 @@ transformInline  :: PandocMonad m
                  -> Inline
                  -> E m Inline
 transformInline _opts (Image attr lab (src,tit)) = do
-    newsrc <- modifyMediaRef $ TS.unpack src
+    newsrc <- modifyMediaRef $ T.unpack src
     return $ Image attr lab ("../" <> newsrc, tit)
 transformInline opts x@(Math t m)
   | WebTeX url <- writerHTMLMathMethod opts = do
-    newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m))
+    newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m))
     let mathclass = if t == DisplayMath then "display" else "inline"
     return $ Span ("",["math",mathclass],[])
                 [Image nullAttr [x] ("../" <> newsrc, "")]
@@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw)
   return $ RawInline fmt (renderTags' tags')
 transformInline _ x = return x
 
-(!) :: (t -> Element) -> [(String, String)] -> t -> Element
+(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element
 (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
 
--- | Version of 'ppTopElement' that specifies UTF-8 encoding.
-ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
-  -- unEntity removes numeric  entities introduced by ppElement
-  -- (kindlegen seems to choke on these).
-  where unEntity [] = ""
-        unEntity ('&':'#':xs) =
-                   let (ds,ys) = break (==';') xs
-                       rest = drop 1 ys
-                   in  case safeRead (TS.pack $ "'\\" <> ds <> "'") of
-                          Just x  -> x : unEntity rest
-                          Nothing -> '&':'#':unEntity xs
-        unEntity (x:xs) = x : unEntity xs
-
 mediaTypeOf :: FilePath -> Maybe MimeType
 mediaTypeOf x =
   let mediaPrefixes = ["image", "video", "audio"] in
   case getMimeType x of
-    Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y
+    Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y
     _      -> Nothing
 
 -- Returns filename for chapter number.
-showChapter :: Int -> String
-showChapter = printf "ch%03d.xhtml"
+showChapter :: Int -> Text
+showChapter = T.pack . printf "ch%03d.xhtml"
 
 -- Add identifiers to any headers without them.
 addIdentifiers :: WriterOptions -> [Block] -> [Block]
 addIdentifiers opts bs = evalState (mapM go bs) Set.empty
  where go (Header n (ident,classes,kvs) ils) = do
          ids <- get
-         let ident' = if TS.null ident
+         let ident' = if T.null ident
                          then uniqueIdent (writerExtensions opts) ils ids
                          else ident
          modify $ Set.insert ident'
@@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty
        go x = return x
 
 -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
-normalizeDate' :: String -> Maybe String
-normalizeDate' = fmap TS.unpack . go . trim . TS.pack
+normalizeDate' :: Text -> Maybe Text
+normalizeDate' = go . T.strip
   where
     go xs
-      | TS.length xs == 4            -- YYY
-      , TS.all isDigit xs = Just xs
-      | (y, s) <- TS.splitAt 4 xs    -- YYY-MM
-      , Just ('-', m) <- TS.uncons s
-      , TS.length m == 2
-      , TS.all isDigit y && TS.all isDigit m = Just xs
+      | T.length xs == 4            -- YYY
+      , T.all isDigit xs = Just xs
+      | (y, s) <- T.splitAt 4 xs    -- YYY-MM
+      , Just ('-', m) <- T.uncons s
+      , T.length m == 2
+      , T.all isDigit y && T.all isDigit m = Just xs
       | otherwise = normalizeDate xs
 
-toRelator :: String -> Maybe String
+toRelator :: Text -> Maybe Text
 toRelator x
   | x `elem` relators = Just x
-  | otherwise         = lookup (map toLower x) relatorMap
+  | otherwise         = lookup (T.toLower x) relatorMap
 
-relators :: [String]
+relators :: [Text]
 relators = map snd relatorMap
 
-relatorMap :: [(String, String)]
+relatorMap :: [(Text, Text)]
 relatorMap =
            [("abridger", "abr")
            ,("actor", "act")
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 9334d6e9a..3b5d04427 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
 import Data.Either (lefts, rights)
 import Data.List (intercalate)
-import Data.Text (Text, pack)
+import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
-import Text.XML.Light
-import qualified Text.XML.Light as X
-import qualified Text.XML.Light.Cursor as XC
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light as X
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
@@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
                            makeSections, tshow, stringify)
 import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
+import Data.Generics (everywhere, mkT)
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do
      (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch)
      let body' = replaceImagesWithAlt missing body
      let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
-     return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
+     return $ xml_head <> showContent fb2_xml <> "\n"
   where
   xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
   fb2_attrs =
@@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do
 description :: PandocMonad m => Meta -> FBM m Content
 description meta' = do
   let genre = case lookupMetaString "genre" meta' of
-                "" -> el "genre" ("unrecognised" :: String)
-                s  -> el "genre" (T.unpack s)
+                "" -> el "genre" ("unrecognised" :: Text)
+                s  -> el "genre" s
   bt <- booktitle meta'
   let as = authors meta'
   dd <- docdate meta'
@@ -112,7 +110,7 @@ description meta' = do
                Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
                Just (MetaString s)        -> [el "lang" $ iso639 s]
                _                          -> []
-             where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
+             where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
   let coverimage url = do
         let img = Image nullAttr mempty (url, "")
         im <- insertImage InlineImage img
@@ -124,7 +122,7 @@ description meta' = do
   return $ el "description"
     [ el "title-info" (genre :
                       (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
-    , el "document-info" [el "program-used" ("pandoc" :: String)]
+    , el "document-info" [el "program-used" ("pandoc" :: Text)]
     ]
 
 booktitle :: PandocMonad m => Meta -> FBM m [Content]
@@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta')
 
 author :: [Inline] -> [Content]
 author ss =
-  let ws = words . cMap plain $ ss
-      email = el "email" <$> take 1 (filter ('@' `elem`) ws)
-      ws' = filter ('@' `notElem`) ws
+  let ws = T.words $ mconcat $ map plain ss
+      email = el "email" <$> take 1 (filter (T.any (=='@')) ws)
+      ws' = filter (not . T.any (== '@')) ws
       names = case ws' of
                 [nickname] -> [ el "nickname" nickname ]
                 [fname, lname] -> [ el "first-name" fname
                                     , el "last-name" lname ]
                 (fname:rest) -> [ el "first-name" fname
-                                , el "middle-name" (concat . init $ rest)
+                                , el "middle-name" (T.concat . init $ rest)
                                 , el "last-name" (last rest) ]
                 [] -> []
   in  list $ el "author" (names ++ email)
@@ -206,7 +204,7 @@ renderFootnotes = do
          el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
   where
     renderFN (n, idstr, cs) =
-        let fn_texts = el "title" (el "p" (show n)) : cs
+        let fn_texts = el "title" (el "p" (tshow n)) : cs
         in  el "section" ([uattr "id" idstr], fn_texts)
 
 -- | Fetch images and encode them for the FictionBook XML.
@@ -282,7 +280,7 @@ isMimeType s =
  where
    types =  ["text","image","audio","video","application","message","multipart"]
    valid c = isAscii c && not (isControl c) && not (isSpace c) &&
-             c `notElem` ("()<>@,;:\\\"/[]?=" :: String)
+             c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char])
 
 footnoteID :: Int -> Text
 footnoteID i = "n" <> tshow i
@@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)])
   = insertImage NormalImage (Image atr alt (src,tit))
 blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss
 blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
-                             map (el "p" . el "code" . T.unpack) . T.lines $ s
+                             map (el "p" . el "code") . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
     then
@@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do
     c <- el "emphasis" <$> cMapM toXml caption
     return [el "table" (hd <> bd), el "p" c]
     where
-      mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content
+      mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content
       mkrow tag cells aligns' =
         el "tr" <$> mapM (mkcell tag) (zip cells aligns')
       --
-      mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content
+      mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content
       mkcell tag (cell, align) = do
         cblocks <- cMapM blockToXml cell
         return $ el tag ([align_attr align], cblocks)
@@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do
   inner <- cMapM toXml ss
   return $ [txt "“"] ++ inner ++ [txt "”"]
 toXml (Cite _ ss) = cMapM toXml ss  -- FIXME: support citation styles
-toXml (Code _ s) = return [el "code" $ T.unpack s]
+toXml (Code _ s) = return [el "code" s]
 toXml Space = return [txt " "]
 toXml SoftBreak = return [txt "\n"]
 toXml LineBreak = return [txt "\n"]
@@ -456,7 +454,7 @@ insertMath immode formula = do
        let imgurl = url <> T.pack (urlEncode $ T.unpack formula)
        let img = Image nullAttr alt (imgurl, "")
        insertImage immode img
-    _ -> return [el "code" $ T.unpack formula]
+    _ -> return [el "code" formula]
 
 insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
 insertImage immode (Image _ alt (url,ttl)) = do
@@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do
          el "image" $
             [ attr ("l","href") ("#" <> fname)
             , attr ("l","type") (tshow immode)
-            , uattr "alt" (T.pack $ cMap plain alt) ]
+            , uattr "alt" (mconcat $ map plain alt) ]
             ++ ttlattr
 insertImage _ _ = error "unexpected inline instead of image"
 
 replaceImagesWithAlt :: [Text] -> Content -> Content
-replaceImagesWithAlt missingHrefs body =
-  let cur = XC.fromContent body
-      cur' = replaceAll cur
-  in  XC.toTree . XC.root $ cur'
+replaceImagesWithAlt missingHrefs = everywhere (mkT go)
   where
-  --
-    replaceAll :: XC.Cursor -> XC.Cursor
-    replaceAll c =
-        let n = XC.current c
-            c' = if isImage n && isMissing n
-                 then XC.modifyContent replaceNode c
-                 else c
-        in  case XC.nextDF c' of
-              (Just cnext) -> replaceAll cnext
-              Nothing      -> c'  -- end of document
-  --
-    isImage :: Content -> Bool
-    isImage (Elem e) = elName e == uname "image"
-    isImage _        = False
-  --
+    go c = if isMissing c
+              then replaceNode c
+              else c
     isMissing (Elem img@Element{}) =
         let imgAttrs = elAttribs img
             badAttrs = map (attr ("l","href")) missingHrefs
@@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body =
     replaceNode :: Content -> Content
     replaceNode n@(Elem img@Element{}) =
         let attrs = elAttribs img
-            alt = getAttrVal attrs (uname "alt")
+            alt = getAttrVal attrs (unqual "alt")
             imtype = getAttrVal attrs (qname "l" "type")
         in case (alt, imtype) of
              (Just alt', Just imtype') ->
-                 if imtype' == show NormalImage
+                 if imtype' == tshow NormalImage
                  then el "p" alt'
-                 else txt $ T.pack alt'
-             (Just alt', Nothing) -> txt $ T.pack alt'  -- no type attribute
+                 else txt alt'
+             (Just alt', Nothing) -> txt alt'  -- no type attribute
              _ -> n   -- don't replace if alt text is not found
     replaceNode n = n
   --
-    getAttrVal :: [X.Attr] -> QName -> Maybe String
+    getAttrVal :: [X.Attr] -> QName -> Maybe Text
     getAttrVal attrs name =
         case filter ((name ==) . attrKey) attrs of
            (a:_) -> Just (attrVal a)
@@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body =
 
 
 -- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
+wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content
 wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
 
 -- " Create a singleton list.
@@ -532,31 +515,31 @@ list :: a -> [a]
 list = (:[])
 
 -- | Convert an 'Inline' to plaintext.
-plain :: Inline -> String
-plain (Str s)               = T.unpack s
-plain (Emph ss)             = cMap plain ss
-plain (Underline ss)        = cMap plain ss
-plain (Span _ ss)           = cMap plain ss
-plain (Strong ss)           = cMap plain ss
-plain (Strikeout ss)        = cMap plain ss
-plain (Superscript ss)      = cMap plain ss
-plain (Subscript ss)        = cMap plain ss
-plain (SmallCaps ss)        = cMap plain ss
-plain (Quoted _ ss)         = cMap plain ss
-plain (Cite _ ss)           = cMap plain ss  -- FIXME
-plain (Code _ s)            = T.unpack s
+plain :: Inline -> Text
+plain (Str s)               = s
+plain (Emph ss)             = mconcat $ map plain ss
+plain (Underline ss)        = mconcat $ map plain ss
+plain (Span _ ss)           = mconcat $ map plain ss
+plain (Strong ss)           = mconcat $ map plain ss
+plain (Strikeout ss)        = mconcat $ map plain ss
+plain (Superscript ss)      = mconcat $ map plain ss
+plain (Subscript ss)        = mconcat $ map plain ss
+plain (SmallCaps ss)        = mconcat $ map plain ss
+plain (Quoted _ ss)         = mconcat $ map plain ss
+plain (Cite _ ss)           = mconcat $ map plain ss  -- FIXME
+plain (Code _ s)            = s
 plain Space                 = " "
 plain SoftBreak             = " "
 plain LineBreak             = "\n"
-plain (Math _ s)            = T.unpack s
+plain (Math _ s)            = s
 plain (RawInline _ _)       = ""
-plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"])
-plain (Image _ alt _)       = cMap plain alt
+plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"])
+plain (Image _ alt _)       = mconcat $ map plain alt
 plain (Note _)              = ""  -- FIXME
 
 -- | Create an XML element.
 el :: (Node t)
-   => String   -- ^ unqualified element name
+   => Text     -- ^ unqualified element name
    -> t        -- ^ node contents
    -> Content  -- ^ XML content
 el name cs = Elem $ unode name cs
@@ -569,22 +552,18 @@ spaceBeforeAfter cs =
 
 -- | Create a plain-text XML content.
 txt :: Text -> Content
-txt s = Text $ CData CDataText (T.unpack s) Nothing
+txt s = Text $ CData CDataText s Nothing
 
 -- | Create an XML attribute with an unqualified name.
-uattr :: String -> Text -> Text.XML.Light.Attr
-uattr name = Attr (uname name) . T.unpack
+uattr :: Text -> Text -> X.Attr
+uattr name = Attr (unqual name)
 
 -- | Create an XML attribute with a qualified name from given namespace.
-attr :: (String, String) -> Text -> Text.XML.Light.Attr
-attr (ns, name) = Attr (qname ns name) . T.unpack
-
--- | Unqualified name
-uname :: String -> QName
-uname name = QName name Nothing Nothing
+attr :: (Text, Text) -> Text -> X.Attr
+attr (ns, name) = Attr (qname ns name)
 
 -- | Qualified name
-qname :: String -> String -> QName
+qname :: Text -> Text -> QName
 qname ns name = QName name Nothing (Just ns)
 
 -- | Abbreviation for 'concatMap'.
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 06369b4db..101b236aa 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 import Text.TeXMath
-import Text.XML.Light
+import qualified Text.XML.Light as XL
 
 newtype ODTState = ODTState { stEntries :: [Entry]
                          }
@@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do
                                         PandocXMLError "styles.xml" msg
                                     Right d -> return $
                                       toEntry "styles.xml" epochtime
-                                      ( fromStringLazy
+                                      ( fromTextLazy
+                                      . TL.fromStrict
                                       . ppTopElement
                                       . addLang lang $ d )
                             else return e) (zEntries arch)
   return arch{ zEntries = entries }
 
+-- TODO FIXME avoid this generic traversal!
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langLanguage lang)
+                           = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langRegion lang)
+                           = Attr n (langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
@@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do
   case writeMathML dt <$> readTeX math of
        Left  _ -> return $ Math t math
        Right r -> do
-         let conf = useShortEmptyTags (const False) defaultConfigPP
-         let mathml = ppcTopElement conf r
+         let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP
+         let mathml = XL.ppcTopElement conf r
          epochtime <- floor `fmap` lift P.getPOSIXTime
          let dirname = "Formula-" ++ show (length entries) ++ "/"
          let fname = dirname ++ "content.xml"
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 8f60e70d5..0533d6c12 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -29,33 +29,32 @@ import Control.Monad.Except (throwError)
 import Text.Pandoc.Error
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
+import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode :: Node t => Text -> [(Text,Text)] -> t -> Element
 mknode s attrs =
   add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) .  node (nodename s)
 
-mktnode :: String -> [(String,String)] -> T.Text -> Element
-mktnode s attrs = mknode s attrs . T.unpack
+mktnode :: Text -> [(Text,Text)] -> T.Text -> Element
+mktnode s attrs = mknode s attrs
 
-nodename :: String -> QName
+nodename :: Text -> QName
 nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
-                             (xs,[])    -> (xs, Nothing)
-                             (ys, _:zs) -> (zs, Just ys)
+ where (name, prefix) = case T.break (==':') s of
+                          (xs,ys) -> case T.uncons ys of
+                                       Nothing     -> (xs, Nothing)
+                                       Just (_,zs) -> (zs, Just xs)
 
 toLazy :: B.ByteString -> BL.ByteString
 toLazy = BL.fromChunks . (:[])
 
 renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
-  UTF8.fromStringLazy (showElement elt)
+renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt))
 
 parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element
 parseXml refArchive distArchive relpath =
@@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath =
 
 -- Copied from Util
 
-attrToNSPair :: XML.Attr -> Maybe (String, String)
-attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair :: Attr -> Maybe (Text, Text)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 -- | Scales the image to fit the page
 -- sizes are passed in emu
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 0a7060895..5caeb0753 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader
 import Control.Monad.State
 import Codec.Archive.Zip
-import Data.Char (toUpper)
 import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
 import Data.Default
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Read
 import Data.Time (formatTime, defaultTimeLocale)
 import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light as XML
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext))
 import Text.TeXMath
 import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
+import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
@@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
   cyS <- findAttr (QName "cy" Nothing Nothing) sldSize
-  (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String)
-  (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String)
+  cx <- readTextAsInteger cxS
+  cy <- readTextAsInteger cyS
   return (cx `div` 12700, cy `div` 12700)
 
+readTextAsInteger :: Text -> Maybe Integer
+readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal
+
 data WriterEnv = WriterEnv { envRefArchive :: Archive
                            , envDistArchive :: Archive
                            , envUTCTime :: UTCTime
@@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st
 
 --------------------------------------------------------------------
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText n = fmap T.pack . findAttr n
-
 monospaceFont :: Monad m => P m T.Text
 monospaceFont = do
   vars <- writerVariables <$> asks envOpts
@@ -171,10 +172,9 @@ monospaceFont = do
     Just s -> return s
     Nothing -> return "Courier"
 
--- Kept as string for XML.Light
-fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)]
+fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)]
 fontSizeAttributes RunProps { rPropForceSize = Just sz } =
-  return [("sz", show $ sz * 100)]
+  return [("sz", tshow $ sz * 100)]
 fontSizeAttributes _ = return []
 
 copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive
@@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
 shapeHasId ns ident element
   | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element
   , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr
-  , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr =
+  , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr =
       nm == ident
   | otherwise = False
 
@@ -396,10 +396,10 @@ getShapeDimensions ns element
       ext <- findChild (elemName ns "a" "ext") xfrm
       cxS <- findAttr (QName "cx" Nothing Nothing) ext
       cyS <- findAttr (QName "cy" Nothing Nothing) ext
-      (x, _) <- listToMaybe $ reads xS
-      (y, _) <- listToMaybe $ reads yS
-      (cx, _) <- listToMaybe $ reads cxS
-      (cy, _) <- listToMaybe $ reads cyS
+      x <- readTextAsInteger xS
+      y <- readTextAsInteger yS
+      cx <- readTextAsInteger cxS
+      cy <- readTextAsInteger cyS
       return ((x `div` 12700, y `div` 12700),
               (cx `div` 12700, cy `div` 12700))
   | otherwise = Nothing
@@ -430,7 +430,7 @@ getContentShapeSize ns layout master
         Nothing -> do let mbSz =
                             findChild (elemName ns "p" "nvSpPr") sp >>=
                             findChild (elemName ns "p" "cNvPr") >>=
-                            findAttrText (QName "id" Nothing Nothing) >>=
+                            findAttr (QName "id" Nothing Nothing) >>=
                             flip getMasterShapeDimensionsById master
                       case mbSz of
                         Just sz' -> return sz'
@@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes =
         fn _        = True
 
 replaceNamedChildren :: NameSpaces
-                     -> String
-                     -> String
+                     -> Text
+                     -> Text
                      -> [Element]
                      -> Element
                      -> Element
@@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do
                        ]
                      , mknode "p:spPr" []
                        [ mknode "a:xfrm" []
-                         [ mknode "a:off" [("x", show $ 12700 * x),
-                                           ("y", show $ 12700 * (y + cy - captionHeight))] ()
-                         , mknode "a:ext" [("cx", show $ 12700 * cx),
-                                           ("cy", show $ 12700 * captionHeight)] ()
+                         [ mknode "a:off" [("x", tshow $ 12700 * x),
+                                           ("y", tshow $ 12700 * (y + cy - captionHeight))] ()
+                         , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                                           ("cy", tshow $ 12700 * captionHeight)] ()
                          ]
                        , mknode "a:prstGeom" [("prst", "rect")]
                          [ mknode "a:avLst" [] ()
@@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do
                                      ,("noChangeAspect","1")] ()
   -- cNvPr will contain the link information so we do that separately,
   -- and register the link if necessary.
-  let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")]
+  let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo),
+                   ("id","0"),
+                   ("name","Picture 1")]
   cNvPr <- case picPropLink picProps of
     Just link -> do idNum <- registerLink link
                     return $ mknode "p:cNvPr" cNvPrAttr $
-                      mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] ()
+                      mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] ()
     Nothing   -> return $ mknode "p:cNvPr" cNvPrAttr ()
   let nvPicPr  = mknode "p:nvPicPr" []
                  [ cNvPr
@@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do
                  , mknode "p:nvPr" [] ()]
   let blipFill = mknode "p:blipFill" []
                  [ mknode "a:blip" [("r:embed", "rId" <>
-          show (mInfoLocalId mInfo))] ()
+                     tshow (mInfoLocalId mInfo))] ()
                  , mknode "a:stretch" [] $
                    mknode "a:fillRect" [] () ]
   let xfrm =    mknode "a:xfrm" []
-                [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] ()
-                , mknode "a:ext" [("cx",show dimX')
-                                 ,("cy",show dimY')] () ]
+                [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] ()
+                , mknode "a:ext" [("cx", tshow dimX')
+                                 ,("cy", tshow dimY')] () ]
   let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                  mknode "a:avLst" [] ()
   let ln =      mknode "a:ln" [("w","9525")]
@@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do
             Just DoubleStrike -> [("strike", "dblStrike")]
             Nothing -> []) <>
         (case rBaseline rpr of
-            Just n -> [("baseline", show n)]
+            Just n -> [("baseline", tshow n)]
             Nothing -> []) <>
         (case rCap rpr of
             Just NoCapitals -> [("cap", "none")]
@@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do
                    return $ case link of
                      InternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              , ("action", "ppaction://hlinksldjump")
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                      -- external
                      ExternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                  Nothing -> return []
   let colorContents = case rSolidFill rpr of
                         Just color ->
                           case fromColor color of
-                            '#':hx ->  [mknode "a:solidFill" []
-                                        [mknode "a:srgbClr" [("val", map toUpper hx)] ()]
-                                       ]
+                            '#':hx ->
+                              [mknode "a:solidFill" []
+                                [mknode "a:srgbClr"
+                                  [("val", T.toUpper $ T.pack hx)] ()]]
                             _ -> []
                         Nothing -> []
   codeFont <- monospaceFont
   let codeContents =
-        [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr]
+        [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr]
   let propContents = linkProps <> colorContents <> codeContents
   return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents
-                                 , mknode "a:t" [] $ T.unpack s
+                                 , mknode "a:t" [] s
                                  ]]
 paraElemToElements (MathElem mathType texStr) = do
   isInSpkrNotes <- asks envInSpeakerNotes
   if isInSpkrNotes
     then paraElemToElements $ Run def $ unTeXString texStr
     else do res <- convertMath writeOMML mathType (unTeXString texStr)
-            case res of
+            case fromXLElement <$> res of
               Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r]
               Left (Str s) -> paraElemToElements (Run def s)
               Left _       -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
 paraElemToElements (RawOOXMLParaElem str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 
 
 -- This is a bit of a kludge -- really requires adding an option to
@@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return
 -- step at a time.
 addMathInfo :: Element -> Element
 addMathInfo element =
-  let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns")
-                       , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
-                       }
+  let mathspace =
+        Attr { attrKey = QName "m" Nothing (Just "xmlns")
+             , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
+             }
   in add_attr mathspace element
 
 -- We look through the element to see if it contains an a14:m
@@ -849,13 +853,13 @@ surroundWithMathAlternate element =
 paragraphToElement :: PandocMonad m => Paragraph -> P m Element
 paragraphToElement par = do
   let
-    attrs = [("lvl", show $ pPropLevel $ paraProps par)] <>
+    attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <>
             (case pPropMarginLeft (paraProps par) of
-               Just px -> [("marL", show $ pixelsToEmu px)]
+               Just px -> [("marL", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropIndent (paraProps par) of
-               Just px -> [("indent", show $ pixelsToEmu px)]
+               Just px -> [("indent", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropAlign (paraProps par) of
@@ -867,7 +871,7 @@ paragraphToElement par = do
     props = [] <>
             (case pPropSpaceBefore $ paraProps par of
                Just px -> [mknode "a:spcBef" [] [
-                              mknode "a:spcPts" [("val", show $ 100 * px)] ()
+                              mknode "a:spcPts" [("val", tshow $ 100 * px)] ()
                               ]
                           ]
                Nothing -> []
@@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do
 shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$>
   graphicFrameToElements layout tbls cptn
 shapeToElements _ (RawOOXMLShape str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 shapeToElements layout shp = do
   element <- shapeToElement layout shp
   return [Elem element]
@@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do
             [mknode "p:ph" [("idx", "1")] ()]
           ]
         , mknode "p:xfrm" []
-          [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] ()
-          , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] ()
+          [ mknode "a:off" [("x", tshow $ 12700 * x),
+                            ("y", tshow $ 12700 * y)] ()
+          , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                            ("cy", tshow $ 12700 * cy)] ()
           ]
         ] <> elements
 
@@ -957,7 +963,7 @@ getDefaultTableStyle = do
   refArchive <- asks envRefArchive
   distArchive <- asks envDistArchive
   tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml"
-  return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst
+  return $ findAttr (QName "def" Nothing Nothing) tblStyleLst
 
 graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element
 graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
@@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
   let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells
 
   let mkgridcol w = mknode "a:gridCol"
-                       [("w", show ((12700 * w) :: Integer))] ()
+                       [("w", tshow ((12700 * w) :: Integer))] ()
   let hasHeader = not (all null hdrCells)
 
   mbDefTblStyle <- getDefaultTableStyle
@@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
                  , ("bandRow", if tblPrBandRow tblPr then "1" else "0")
                  ] (case mbDefTblStyle of
                       Nothing -> []
-                      Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty])
+                      Just sty -> [mknode "a:tableStyleId" [] sty])
 
   return $ mknode "a:graphic" []
     [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")]
@@ -1037,7 +1043,7 @@ findPHType ns spElem phType
         -- if it's a named PHType, we want to check that the attribute
         -- value matches.
         Just phElem | (PHType tp) <- phType ->
-                        case findAttrText (QName "type" Nothing Nothing) phElem of
+                        case findAttr (QName "type" Nothing Nothing) phElem of
                           Just tp' -> tp == tp'
                           Nothing -> False
         -- if it's an ObjType, we want to check that there is NO
@@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster
   , Just txBody <- findChild (elemName ns "p" "txBody") sp
   , Just p <- findChild (elemName ns "a" "p") txBody
   , Just fld <- findChild (elemName ns "a" "fld") p
-  , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld =
+  , Just fldId <- findAttr (QName "id" Nothing Nothing) fld =
       return fldId
   | otherwise = throwError $
                 PandocSomeError
@@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId =
     [ mknode "a:bodyPr" [] ()
     , mknode "a:lstStyle" [] ()
     , mknode "a:p" []
-      [ mknode "a:fld" [ ("id", T.unpack fieldId)
+      [ mknode "a:fld" [ ("id", fieldId)
                        , ("type", "slidenum")
                        ]
         [ mknode "a:rPr" [("lang", "en-US")] ()
-        , mknode "a:t" [] (show pgNum)
+        , mknode "a:t" [] (tshow pgNum)
         ]
       , mknode "a:endParaRPr" [("lang", "en-US")] ()
       ]
@@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do
     Just n -> return n
     Nothing -> throwError $
                PandocShouldNeverHappenError $
-               "Slide Id " <> T.pack (show sldId) <> " not found."
+               "Slide Id " <> tshow sldId <> " not found."
 
 slideNum :: PandocMonad m => Slide -> P m Int
 slideNum slide = getSlideIdNum $ slideId slide
@@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text
 slideToRelId slide = do
   n <- slideNum slide
   offset <- asks envSlideIdOffset
-  return $ "rId" <> T.pack (show $ n + offset)
+  return $ "rId" <> tshow (n + offset)
 
 
 data Relationship = Relationship { relId :: Int
@@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship
 elementToRel element
   | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing =
       do rId <- findAttr (QName "Id" Nothing Nothing) element
-         numStr <- stripPrefix "rId" rId
-         num <- case reads numStr :: [(Int, String)] of
-           (n, _) : _ -> Just n
-           []         -> Nothing
-         type' <- findAttrText (QName "Type" Nothing Nothing) element
+         numStr <- T.stripPrefix "rId" rId
+         num <- fromIntegral <$> readTextAsInteger numStr
+         type' <- findAttr (QName "Type" Nothing Nothing) element
          target <- findAttr (QName "Target" Nothing Nothing) element
-         return $ Relationship num type' target
+         return $ Relationship num type' (T.unpack target)
   | otherwise = Nothing
 
 slideToPresRel :: PandocMonad m => Slide -> P m Relationship
@@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry
 topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels
 
 relToElement :: Relationship -> Element
-relToElement rel = mknode "Relationship" [ ("Id", "rId" <>
-    show (relId rel))
-                                         , ("Type", T.unpack $ relType rel)
-                                         , ("Target", relTarget rel) ] ()
+relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel))
+                                         , ("Type", relType rel)
+                                         , ("Target", T.pack $ relTarget rel) ] ()
 
 relsToElement :: [Relationship] -> Element
 relsToElement rels = mknode "Relationships"
@@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do
       Just element | Just notesIdNum <- mbNotesIdNum ->
                        Just <$>
                        elemToEntry
-                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml")
+                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <>
+                        ".xml")
                        element
       _ -> return Nothing
 
@@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do
     [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
     [ mknode "Relationship" [ ("Id", "rId2")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                            , ("Target", "../slides/slide" <> show idNum <> ".xml")
+                            , ("Target", "../slides/slide" <> tshow idNum <> ".xml")
                             ] ()
     , mknode "Relationship" [ ("Id", "rId1")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
@@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element
 linkRelElement (rIdNum, InternalTarget targetId) = do
   targetIdNum <- getSlideIdNum targetId
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                          , ("Target", "slide" <> show targetIdNum <> ".xml")
+                          , ("Target", "slide" <> tshow targetIdNum <> ".xml")
                           ] ()
 linkRelElement (rIdNum, ExternalTarget (url, _)) =
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink")
-                          , ("Target", T.unpack url)
+                          , ("Target", url)
                           , ("TargetMode", "External")
                           ] ()
 
@@ -1567,10 +1571,10 @@ mediaRelElement mInfo =
   let ext = fromMaybe "" (mInfoExt mInfo)
   in
     mknode "Relationship" [ ("Id", "rId" <>
-      show (mInfoLocalId mInfo))
+      tshow (mInfoLocalId mInfo))
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image")
                           , ("Target", "../media/image" <>
-      show (mInfoGlobalId mInfo) <> T.unpack ext)
+      tshow (mInfoGlobalId mInfo) <> ext)
                           ] ()
 
 speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
@@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do
   return $ case M.lookup idNum mp of
     Nothing -> Nothing
     Just n ->
-      let target = "../notesSlides/notesSlide" <> show n <> ".xml"
+      let target = "../notesSlides/notesSlide" <> tshow n <> ".xml"
       in Just $
          mknode "Relationship" [ ("Id", "rId2")
                                , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide")
@@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do
 slideToSldIdElement :: PandocMonad m => Slide -> P m Element
 slideToSldIdElement slide = do
   n <- slideNum slide
-  let id' = show $ n + 255
+  let id' = tshow $ n + 255
   rId <- slideToRelId slide
-  return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] ()
+  return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] ()
 
 presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element
 presentationToSldIdLst (Presentation _ slides) = do
@@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do
       notesMasterElem =  mknode "p:notesMasterIdLst" []
                          [ mknode
                            "p:NotesMasterId"
-                           [("r:id", "rId" <> show notesMasterRId)]
+                           [("r:id", "rId" <> tshow notesMasterRId)]
                            ()
                          ]
 
@@ -1702,17 +1706,17 @@ docPropsElement docProps = do
     ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
     ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
     $
-      mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps)
+      mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps)
     :
-      mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps)
+      mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps)
     :
-      mknode "cp:keywords" [] (T.unpack keywords)
-    : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)])
-    <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)])
-    <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)])
+      mknode "cp:keywords" [] keywords
+    : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)])
+    <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)])
+    <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)])
     <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
-             , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-             ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+              , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+              ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
 
 docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
 docPropsToEntry docProps = docPropsElement docProps >>=
@@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
 docCustomPropsElement docProps = do
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
-         ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v)
+         ,("pid", tshow pid)
+         ,("name", k)] $ mknode "vt:lpwstr" [] v
   return $ mknode "Properties"
           [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
           ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
@@ -1743,7 +1747,7 @@ viewPropsElement = do
   distArchive <- asks envDistArchive
   viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml"
   -- remove  "lastView" if it exists:
-  let notLastView :: Text.XML.Light.Attr -> Bool
+  let notLastView :: XML.Attr -> Bool
       notLastView attr =
           qName (attrKey attr) /= "lastView"
   return $
@@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml"
 defaultContentTypeToElem :: DefaultContentType -> Element
 defaultContentTypeToElem dct =
   mknode "Default"
-  [("Extension", T.unpack $ defContentTypesExt dct),
-    ("ContentType", T.unpack $ defContentTypesType dct)]
+  [("Extension", defContentTypesExt dct),
+    ("ContentType", defContentTypesType dct)]
   ()
 
 overrideContentTypeToElem :: OverrideContentType -> Element
 overrideContentTypeToElem oct =
   mknode "Override"
-  [("PartName", overrideContentTypesPart oct),
-    ("ContentType", T.unpack $ overrideContentTypesType oct)]
+  [("PartName", T.pack $ overrideContentTypesPart oct),
+   ("ContentType", overrideContentTypesType oct)]
   ()
 
 contentTypesToElement :: ContentTypes -> Element
@@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath]
 getSpeakerNotesFilePaths = do
   mp <- asks envSpeakerNotesIdMap
   let notesIdNums = M.elems mp
-  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums
+  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml")
+               notesIdNums
 
 presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes
 presentationToContentTypes p@(Presentation _ slides) = do
@@ -1885,11 +1890,11 @@ getContentType fp
   | otherwise = Nothing
 
 -- Kept as String for XML.Light
-autoNumAttrs :: ListAttributes -> [(String, String)]
+autoNumAttrs :: ListAttributes -> [(Text, Text)]
 autoNumAttrs (startNum, numStyle, numDelim) =
   numAttr <> typeAttr
   where
-    numAttr = [("startAt", show startNum) | startNum /= 1]
+    numAttr = [("startAt", tshow startNum) | startNum /= 1]
     typeAttr = [("type", typeString <> delimString)]
     typeString = case numStyle of
       Decimal -> "arabic"
diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs
new file mode 100644
index 000000000..38e4df218
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light.hs
@@ -0,0 +1,586 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+xml-light, which we used in pandoc's the XML-based readers, has
+some limitations: in particular, it produces nodes with String
+instead of Text, and the parser falls over on processing instructions
+(see #7091).
+
+This module exports much of the API of xml-light, but using Text instead
+of String. In addition, the xml-light parsers are replaced by xml-conduit's
+well-tested parser.  (The xml-conduit types are mapped to types
+isomorphic to xml-light's, to avoid the need for massive code modifications
+elsewhere.)  Bridge functions to map xml-light types to this module's
+types are also provided (since libraries like texmath still use xml-light).
+
+Another advantage of the xml-conduit parser is that it gives us
+detailed information on xml parse errors.
+
+In the future we may want to move to using xml-conduit or another
+xml library in the code base, but this change gives us
+better performance and accuracy without much change in the
+code that used xml-light.
+-}
+module Text.Pandoc.XML.Light
+  ( -- * Basic types, duplicating those from xml-light but with Text
+    -- instead of String
+    Line
+  , Content(..)
+  , Element(..)
+  , Attr(..)
+  , CData(..)
+  , CDataKind(..)
+  , QName(..)
+  , Node(..)
+  , unode
+  , unqual
+  , add_attr
+  , add_attrs
+    -- * Conversion functions from xml-light types
+  , fromXLQName
+  , fromXLCData
+  , fromXLElement
+  , fromXLAttr
+  , fromXLContent
+    -- * Replacement for xml-light's Text.XML.Proc
+  , strContent
+  , onlyElems
+  , elChildren
+  , onlyText
+  , findChildren
+  , filterChildren
+  , filterChildrenName
+  , findChild
+  , filterChild
+  , filterChildName
+  , findElement
+  , filterElement
+  , filterElementName
+  , findElements
+  , filterElements
+  , filterElementsName
+  , findAttr
+  , lookupAttr
+  , lookupAttrBy
+  , findAttrBy
+    -- * Replacement for xml-light's Text.XML.Output
+  , ppTopElement
+  , ppElement
+  , ppContent
+  , ppcElement
+  , ppcContent
+  , showTopElement
+  , showElement
+  , showContent
+  , useShortEmptyTags
+  , defaultConfigPP
+  , ConfigPP(..)
+    -- * Replacement for xml-light's Text.XML.Input
+  , parseXMLElement
+  , parseXMLContents
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import qualified Data.Map as M
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import Data.Maybe (mapMaybe, listToMaybe)
+import Data.List(find)
+import qualified Text.XML.Light as XL
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Attr (nameToQname n) v) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing ->
+        case T.stripPrefix "xmlns:" localName of
+          Just rest -> QName rest mbns (Just "xmlns")
+          Nothing   -> QName localName mbns mbpref
+      _ -> QName localName mbns mbpref
+
+nodeToContent :: Conduit.Node -> Maybe Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Text (CData CDataText t Nothing))
+nodeToContent _ = Nothing
+
+unqual :: Text -> QName
+unqual x = QName x Nothing Nothing
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+--
+-- type definitions lightly modified from xml-light
+--
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef Text
+                deriving (Show, Typeable, Data)
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: Text
+                } deriving (Eq, Ord, Show, Typeable, Data)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: Text,
+                  cdLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Show, Typeable, Data )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: Text,
+                  qURI    :: Maybe Text,
+                  qPrefix :: Maybe Text
+                } deriving (Show, Typeable, Data)
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = Element { elName     = n
+                                , elAttribs  = attrs
+                                , elContent  = cont
+                                , elLine     = Nothing
+                                }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node [Element]          where node n es     = node n ([]::[Attr],es)
+instance Node Element            where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],Text)      where
+  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
+                                    , cdData = t
+                                    , cdLine = Nothing })
+
+instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
+instance Node Text               where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => Text -> t -> Element
+unode = node . unqual
+
+--
+-- conversion from xml-light
+--
+
+fromXLQName :: XL.QName -> QName
+fromXLQName qn = QName { qName = T.pack $ XL.qName qn
+                       , qURI = T.pack <$> XL.qURI qn
+                       , qPrefix = T.pack <$> XL.qPrefix qn }
+
+fromXLCData :: XL.CData -> CData
+fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
+                                        XL.CDataText -> CDataText
+                                        XL.CDataVerbatim -> CDataVerbatim
+                                        XL.CDataRaw -> CDataRaw
+                       , cdData = T.pack $ XL.cdData cd
+                       , cdLine = XL.cdLine cd }
+
+fromXLElement :: XL.Element -> Element
+fromXLElement el = Element { elName = fromXLQName $ XL.elName el
+                           , elAttribs = map fromXLAttr $ XL.elAttribs el
+                           , elContent = map fromXLContent $ XL.elContent el
+                           , elLine = XL.elLine el }
+
+fromXLAttr :: XL.Attr -> Attr
+fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
+
+fromXLContent :: XL.Content -> Content
+fromXLContent (XL.Elem el) = Elem $ fromXLElement el
+fromXLContent (XL.Text cd) = Text $ fromXLCData cd
+fromXLContent (XL.CRef s)  = CRef (T.pack s)
+
+--
+-- copied from xml-light Text.XML.Proc
+--
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> Text
+strContent          = mconcat . map cdData . onlyText . elContent
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr          :: QName -> Element -> Maybe Text
+findAttr x e       = lookupAttr x (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr        :: QName -> [Attr] -> Maybe Text
+lookupAttr x       = lookupAttrBy (x ==)
+
+-- | Lookup the first attribute whose name satisfies the given predicate.
+lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
+lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
+
+-- | Lookup the value of the first attribute whose name
+-- satisfies the given predicate.
+findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
+findAttrBy p e      = lookupAttrBy p (elAttribs e)
+
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+  { shortEmptyTag :: QName -> Bool
+  , prettify      :: Bool
+  }
+
+-- | Default pretty orinting configuration.
+--  * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+                           , prettify      = False
+                           }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />.  This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c  = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP     :: ConfigPP
+prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> Text
+ppTopElement        = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement          :: Element -> Text
+ppElement           = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent          :: Content -> Text
+ppContent           = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement      :: ConfigPP -> Element -> Text
+ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement         :: ConfigPP -> Element -> Text
+ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent         :: ConfigPP -> Content -> Text
+ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData           :: ConfigPP -> CData -> Text
+ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS         :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+                     Elem e -> ppElementS c i e
+                     Text t -> ppCDataS c i t
+                     CRef r -> showCRefS r
+
+ppElementS         :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+  (case elContent e of
+    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+       | shortEmptyTag c name  -> fromText " />"
+    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+    cs -> singleton '>' <> nl <>
+          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+          i <> tagEnd name
+      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
+  )
+  where name = elName e
+
+ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
+                             then showCDataS t
+                             else foldr cons mempty (T.unpack (showCData t))
+  where cons         :: Char -> Builder -> Builder
+        cons '\n' ys  = singleton '\n' <> i <> ys
+        cons y ys     = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> Text
+showTopElement c    = xmlHeader <> showElement c
+
+showContent        :: Content -> Text
+showContent         = ppcContent defaultConfigPP
+
+showElement        :: Element -> Text
+showElement         = ppcElement defaultConfigPP
+
+showCData          :: CData -> Text
+showCData           = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: Text -> Builder
+showCRefS r         = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+                    fromText "]]>"
+   CDataRaw      -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData           :: Text -> Builder
+escCData t
+  | "]]>" `T.isPrefixOf` t =
+     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+  = case T.uncons t of
+      Nothing     -> mempty
+      Just (c,t') -> singleton c <> escCData t'
+
+escChar            :: Char -> Builder
+escChar c = case c of
+  '<'   -> fromText "&lt;"
+  '>'   -> fromText "&gt;"
+  '&'   -> fromText "&amp;"
+  '"'   -> fromText "&quot;"
+  -- we use &#39 instead of &apos; because IE apparently has difficulties
+  -- rendering &apos; in xhtml.
+  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+  '\''  -> fromText "&#39;"
+  _     -> singleton c
+
+  {- original xml-light version:
+  -- NOTE: We escape '\r' explicitly because otherwise they get lost
+  -- when parsed back in because of then end-of-line normalization rules.
+  _ | isPrint c || c == '\n' -> singleton c
+    | otherwise -> showText "&#" . showsT oc . singleton ';'
+      where oc = ord c
+  -}
+
+escStr             :: Text -> Builder
+escStr cs          = if T.any needsEscape cs
+                        then mconcat (map escChar (T.unpack cs))
+                        else fromText cs
+ where
+  needsEscape '<' = True
+  needsEscape '>' = True
+  needsEscape '&' = True
+  needsEscape '"' = True
+  needsEscape '\'' = True
+  needsEscape _ = False
+
+tagEnd             :: QName -> Builder
+tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart           :: QName -> [Attr] -> Builder
+tagStart qn as      = singleton '<' <> showQName qn <> as_str
+ where as_str       = if null as
+                         then mempty
+                         else mconcat (map showAttr as)
+
+showAttr           :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+                       singleton '=' <>
+                       singleton '"' <> escStr v <> singleton '"'
+
+showQName          :: QName -> Builder
+showQName q         =
+  case qPrefix q of
+    Nothing -> fromText (qName q)
+    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
deleted file mode 100644
index 8ad22a66a..000000000
--- a/src/Text/Pandoc/XMLParser.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.XMLParser
-   Copyright   : Copyright (C) 2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Bridge to allow using xml-conduit's parser with xml-light's types.
--}
-module Text.Pandoc.XMLParser
-  ( parseXMLElement
-  , parseXMLContents
-  , module Text.XML.Light.Types
-  ) where
-
-import qualified Control.Exception as E
-import qualified Text.XML as Conduit
-import Text.XML.Unresolved (InvalidEventStream(..))
-import qualified Text.XML.Light as Light
-import Text.XML.Light.Types
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Map as M
-import Data.Maybe (mapMaybe)
-
--- Drop in replacement for parseXMLDoc in xml-light.
-parseXMLElement :: TL.Text -> Either T.Text Light.Element
-parseXMLElement t =
-  elementToElement .  Conduit.documentRoot <$>
-    either (Left . T.pack . E.displayException) Right
-    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
-
-parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
-parseXMLContents t =
-  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
-    Left e ->
-      case E.fromException e of
-        Just (ContentAfterRoot _) ->
-          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
-        _ -> Left . T.pack . E.displayException $ e
-    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
-
-elementToElement :: Conduit.Element -> Light.Element
-elementToElement (Conduit.Element name attribMap nodes) =
-  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
- where
-  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
-              M.toList attribMap
-  nameToQname (Conduit.Name localName mbns mbpref) =
-    case mbpref of
-      Nothing | "xmlns:" `T.isPrefixOf` localName ->
-           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
-                       (Just "xmlns")
-      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
-                       (T.unpack <$> mbpref)
-
-nodeToContent :: Conduit.Node -> Maybe Light.Content
-nodeToContent (Conduit.NodeElement el) =
-  Just (Light.Elem (elementToElement el))
-nodeToContent (Conduit.NodeContent t) =
-  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
-nodeToContent _ = Nothing
-
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index 376f02c55..c1e47622d 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -43,7 +43,8 @@ compareXMLBool _ _ = False
 
 displayDiff :: Content -> Content -> String
 displayDiff elemA elemB =
-  showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB)
+  showDiff (1,1)
+    (getDiff (lines $ showContent elemA) (lines $ showContent elemB))
 
 goldenArchive :: FilePath -> IO Archive
 goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index d3b16d0f2..ed7d1165c 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 6293ef493..07ae75676 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index 4205a1516..e5f034378 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index adb3f23db..174942135 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 92c8137fe..b5c31a851 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index f53470617..c42ca1b05 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index d6af90a72..1cb4c1fd7 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index e18dbe853..7122456ea 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 820299043..616ba0f81 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index ae0f41d12..c30dcdee9 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 94cd35dfa..8a704b41e 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 879f2a25b..b1906c8c4 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 93f86478f..8adf1cf75 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 967d297f2..584117503 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index c5614e2fa..8859fe55c 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 0f39a831f..b80f3b3ba 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 07046f223..35beed68a 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3656618e6..2c29fd674 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 8798253d5..10a948886 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 0a24d1840..5b90e74a0 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 52bb7a217..cc81b46d1 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 182c06c64..1394dc442 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 7b69a56a3..0d1688694 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 3d3a35701..be1caef2d 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 5ae37b406..a1d2323c2 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index c29aa6716..2f3a831a7 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 664493246..af066107c 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index b6d15340e..9cc7a075f 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f8e1092d2..f8b8dcfde 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index b4cda82f2..1c3baf0bf 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index ee222efa0..28686970d 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c6f8d9c96..7051cefbd 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index ea8146690..9df631640 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index 58070eb3f..5e9c2c630 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index db9b7e371..15232c32d 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index c7b1ed7d5..aab0cc6f5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 6944d92bf..fe5b675f3 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index ae0d28429..de5e68151 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 37c74c69a..89e6fbdf2 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 324e443a1..6bcbd1b9c 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index c81b983e3..79d48560b 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index 30ce33db6..9d46036fe 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index d6c604968..54ec7f305 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index 000e17ecd..a028b346f 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index fdcd2e29b..1158c16fc 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index e73126376..670a825de 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index e3f968e9e..6d297ef11 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index eadb9372e..473b9498d 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 8ca6bab2b..2cdf54474 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index ae188ee68..ffc2eb9f7 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 60301fa50..676954cb8 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 17124a50d..29164af15 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index 19ae7dd4e..1742b3296 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index b650b7585..c6df8e18e 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 0ab029614..cf6e52eef 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index 2a6e35080..e06d9079d 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a7bcf6a4b..449339778 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 21b018c25..258098082 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index 4ec4772a4..2f0213919 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index 5983657b6..9dbfa41a0 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index dd54c7082..f288dde14 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index b3e5ed5b9..0ab1302da 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 1078854bb..6343bffe4 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 5116c6c4e..5d4465f64 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index 0c8e49bd9..d581681aa 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 68695939d..c922df3a8 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 7ed9b946d..13f564bf0 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 79fc82345..f0b302738 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 9f943c279..2d8d00242 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index ac72d8ced..4320128b3 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 15c7b5469..0a3f6e56d 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 926c5e699..e41219844 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index a37e72d2c..82b8aa13d 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 7f86533fe..270a7eeac 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 89e3db0ab..44985d701 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From 73add0578989e1da6e9cd1de68e2e4142f789188 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Feb 2021 09:54:39 -0800
Subject: Docx reader: use Map instead of list for Namespaces.

This gives a speedup of about 5-10%.

The reader is now approximately twice as fast as in the last
release.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 14 +++++++-------
 src/Text/Pandoc/Readers/Docx/Util.hs  | 26 +++++++++++++-------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index c76f3c171..f8ed248d7 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -396,9 +396,9 @@ archiveToNotes zf =
                >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
                >>= parseXMLFromEntry
-      fn_namespaces = maybe [] elemToNameSpaces fnElem
-      en_namespaces = maybe [] elemToNameSpaces enElem
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn_namespaces = maybe mempty elemToNameSpaces fnElem
+      en_namespaces = maybe mempty elemToNameSpaces enElem
+      ns = M.union fn_namespaces en_namespaces
       fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
       en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
@@ -408,7 +408,7 @@ archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
                >>= parseXMLFromEntry
-      cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
+      cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
       cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
                cmtsElem
   in
@@ -518,7 +518,7 @@ levelElemToLevel _ _ = Nothing
 archiveToNumbering' :: Archive -> Maybe Numbering
 archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
-    Nothing -> Just $ Numbering [] [] []
+    Nothing -> Just $ Numbering mempty [] []
     Just entry -> do
       numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
@@ -530,7 +530,7 @@ archiveToNumbering' zf =
 
 archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
-  fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+  fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
 
 elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
@@ -875,7 +875,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index 21df03d9e..ac331cba6 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -22,42 +22,42 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , findAttrByName
                                       ) where
 
-import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.XML.Light
+import qualified Data.Map as M
 
-type NameSpaces = [(Text, Text)]
+type NameSpaces = M.Map Text Text
 
 elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-
-attrToNSPair :: Attr -> Maybe (Text, Text)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _                                     = Nothing
+elemToNameSpaces = foldr (\(Attr qn val) ->
+                             case qn of
+                               QName s _ (Just "xmlns") -> M.insert s val
+                               _ -> id) mempty . elAttribs
 
 elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
+  QName name (M.lookup prefix ns)
+             (if T.null prefix then Nothing else Just prefix)
 
 isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
-  let ns' = ns ++ elemToNameSpaces element
+  let ns' = ns <> elemToNameSpaces element
   in qName (elName element) == name &&
-     qURI (elName element) == lookup prefix ns'
+     qURI (elName element) == M.lookup prefix ns'
 
 findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
 findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
 findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-- 
cgit v1.2.3


From 743f7216de98556a316301ac72b8606bafc2deee Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 18 Feb 2021 21:53:19 +0100
Subject: Org reader: fix bug in org-ref citation parsing.

The org-ref syntax allows to list multiple citations separated by comma.
This fixes a bug that accepted commas as part of the citation id, so all
citation lists were parsed as one single citation.

Fixes: #7101
---
 src/Text/Pandoc/Readers/Org/Inlines.hs    |  2 +-
 test/Tests/Readers/Org/Inline/Citation.hs | 40 +++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 68c2ba5e0..519a6ce04 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -322,7 +322,7 @@ linkLikeOrgRefCite = try $ do
 -- from the `org-ref-cite-re` variable in `org-ref.el`.
 orgRefCiteKey :: PandocMonad m => OrgParser m Text
 orgRefCiteKey =
-  let citeKeySpecialChars = "-_:\\./," :: String
+  let citeKeySpecialChars = "-_:\\./" :: String
       isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
       isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
       endOfCitation = try $ do
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 5538f1ec8..a11804983 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -116,6 +116,46 @@ tests =
                      }
       in (para $ cite [citation] "citep:pandoc")
 
+    , "multiple simple citations" =:
+      "citep:picard,riker" =?>
+      let picard = Citation
+                 { citationId = "picard"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+          riker  = Citation
+                 { citationId = "riker"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+      in (para $ cite [picard,riker] "citep:picard,riker")
+
+    , "multiple simple citations succeeded by comma" =:
+      "citep:picard,riker," =?>
+      let picard = Citation
+                 { citationId = "picard"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+          riker  = Citation
+                 { citationId = "riker"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+      in (para $ cite [picard,riker] "citep:picard,riker" <> str ",")
+
     , "extended citation" =:
       "[[citep:Dominik201408][See page 20::, for example]]" =?>
       let citation = Citation
-- 
cgit v1.2.3


From 98d26c234579a06446a5bef1992ed77bac48a4ac Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Feb 2021 21:24:31 -0800
Subject: DocBook, JATS, OPML readers: performance optimization.

With the new XML parser, we can avoid the expensive tree
normalization step we used to do.

This gives a significant speed boost in docbook and JATS
parsing (e.g. 9.7 to 6 ms).
---
 src/Text/Pandoc/Readers/DocBook.hs | 26 ++++----------------------
 src/Text/Pandoc/Readers/JATS.hs    | 24 +++---------------------
 src/Text/Pandoc/Readers/OPML.hs    | 22 +---------------------
 3 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index e201b54fe..d38b07864 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper, isLetter)
+import Data.Char (isSpace, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@@ -540,8 +540,9 @@ instance Default DBState where
 
 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
-            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents
+              (TL.fromStrict . handleInstructions $ crFilter inp)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
@@ -571,25 +572,6 @@ getFigure e = do
   modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty }
   return res
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <>
-                                    convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr elt =
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 5353f2001..602f3b4f2 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -54,30 +54,11 @@ instance Default JATSState where
 
 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  tree <- either (throwError . PandocXMLError "")
-                 (return . normalizeTree) $
+  tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents (TL.fromStrict $ crFilter inp)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr =
@@ -454,7 +435,8 @@ elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
 parseInline (Text (CData _ s _)) = return $ text s
-parseInline (CRef ref) = return . text . convertEntity $ ref
+parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack)
+                                $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 184d5a63f..5f2ddb876 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -14,12 +14,10 @@ Conversion of OPML to 'Pandoc' document.
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
 import Data.Default
-import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
-import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
@@ -53,7 +51,7 @@ readOPML opts inp  = do
   (bs, st') <- runStateT
                  (case parseXMLContents (TL.fromStrict (crFilter inp)) of
                      Left msg -> throwError $ PandocXMLError "" msg
-                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                     Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
@@ -61,24 +59,6 @@ readOPML opts inp  = do
     setDate (opmlDocDate st') $
     doc $ mconcat bs
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr elt =
-- 
cgit v1.2.3


From 13847267e969b634e9c16c15170e7f217d432e8a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 00:07:38 -0800
Subject: HTML reader: efficiency improvements.

Do a lookahead to find the right parser to use.

Benchmarks from 34ms to 23ms, with less allocation.
Also speeds up the epub reader.
---
 src/Text/Pandoc/Readers/HTML.hs | 210 ++++++++++++++++++++++++----------------
 1 file changed, 129 insertions(+), 81 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index cc60b5501..47856d2f7 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -159,29 +159,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
 
 block :: PandocMonad m => TagParser m Blocks
 block = do
-  res <- choice
-            [ eSection
-            , eSwitch B.para block
-            , mempty <$ eFootnote
-            , mempty <$ eTOC
-            , mempty <$ eTitlePage
-            , pPara
-            , pHeader
-            , pBlockQuote
-            , pCodeBlock
-            , pList
-            , pHrule
-            , pTable block
-            , pHtml
-            , pHead
-            , pBody
-            , pLineBlock
-            , pDiv
-            , pPlain
-            , pFigure
-            , pIframe
-            , pRawHtmlBlock
-            ]
+  exts <- getOption readerExtensions
+  tag <- lookAhead pAny
+  res <-
+   (case tag of
+    TagOpen name attr ->
+      let type' = fromMaybe "" $
+                     lookup "type" attr <|> lookup "epub:type" attr
+          epubExts = extensionEnabled Ext_epub_html_exts exts
+      in
+      case name of
+        _ | name `elem` sectioningContent
+          , epubExts
+          , "chapter" `T.isInfixOf` type'
+          -> eSection
+        _ | epubExts
+          , type' `elem` ["footnote", "rearnote"]
+          -> mempty <$ eFootnote
+        _ | epubExts
+          , type' == "toc"
+          -> mempty <$ eTOC
+        _ | "titlepage" `T.isInfixOf` type'
+          , name `elem` ("section" : groupingContent)
+          -> mempty <$ eTitlePage
+        "p" -> pPara
+        "h1" -> pHeader
+        "h2" -> pHeader
+        "h3" -> pHeader
+        "h4" -> pHeader
+        "h5" -> pHeader
+        "h6" -> pHeader
+        "blockquote" -> pBlockQuote
+        "pre" -> pCodeBlock
+        "ul" -> pBulletList
+        "ol" -> pOrderedList
+        "dl" -> pDefinitionList
+        "table" -> pTable block
+        "hr" -> pHrule
+        "html" -> pHtml
+        "head" -> pHead
+        "body" -> pBody
+        "div"
+          | extensionEnabled Ext_line_blocks exts
+          , Just "line-block" <- lookup "class" attr
+          -> pLineBlock
+          | otherwise
+          -> pDiv
+        "section" -> pDiv
+        "main" -> pDiv
+        "figure" -> pFigure
+        "iframe" -> pIframe
+        "style" -> pRawHtmlBlock
+        "textarea" -> pRawHtmlBlock
+        "switch"
+          | epubExts
+          -> eSwitch B.para block
+        _ -> mzero
+    _ -> mzero)
+   <|> pPlain
+   <|> pRawHtmlBlock
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
@@ -256,9 +292,6 @@ eTOC = try $ do
   guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
   void (pInTags tag block)
 
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
 pBulletList :: PandocMonad m => TagParser m Blocks
 pBulletList = try $ do
   pSatisfy (matchTagOpen "ul" [])
@@ -369,13 +402,15 @@ pLineBlock = try $ do
             B.toList ils
   return $ B.lineBlock lns
 
+isDivLike :: Text -> Bool
+isDivLike "div"     = True
+isDivLike "section" = True
+isDivLike "main"    = True
+isDivLike _         = False
+
 pDiv :: PandocMonad m => TagParser m Blocks
 pDiv = try $ do
   guardEnabled Ext_native_divs
-  let isDivLike "div"     = True
-      isDivLike "section" = True
-      isDivLike "main"    = True
-      isDivLike _         = False
   TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
   let (ident, classes, kvs) = toAttr attr'
   contents <- pInTags tag block
@@ -544,31 +579,47 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = choice
-           [ eNoteref
-           , eSwitch id inline
-           , pTagText
-           , pQ
-           , pEmph
-           , pStrong
-           , pSuperscript
-           , pSubscript
-           , pSpanLike
-           , pSmall
-           , pStrikeout
-           , pUnderline
-           , pLineBreak
-           , pLink
-           , pImage
-           , pSvg
-           , pBdo
-           , pCode
-           , pCodeWithClass [("samp","sample"),("var","variable")]
-           , pSpan
-           , pMath False
-           , pScriptMath
-           , pRawHtmlInline
-           ]
+inline = do
+  exts <- getOption readerExtensions
+  tag <- lookAhead pAny
+  case tag of
+    TagOpen name attr ->
+      case name of
+        "a" | extensionEnabled Ext_epub_html_exts exts
+          , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+          , Just ('#',_) <- lookup "href" attr >>= T.uncons
+            -> eNoteref
+            | otherwise -> pLink
+        "switch" -> eSwitch id inline
+        "q" -> pQ
+        "em" -> pEmph
+        "i"  -> pEmph
+        "strong" -> pStrong
+        "b" -> pStrong
+        "sup" -> pSuperscript
+        "sub" -> pSubscript
+        "small" -> pSmall
+        "s" -> pStrikeout
+        "strike" -> pStrikeout
+        "del" -> pStrikeout
+        "u" -> pUnderline
+        "ins" -> pUnderline
+        "br" -> pLineBreak
+        "img" -> pImage
+        "svg" -> pSvg
+        "bdo" -> pBdo
+        "code" -> pCode
+        "samp" -> pCodeWithClass "samp" "sample"
+        "var" -> pCodeWithClass "var" "variable"
+        "span" -> pSpan
+        "math" -> pMath False
+        "script"
+          | Just x <- lookup "type" attr
+          , "math/tex" `T.isPrefixOf` x -> pScriptMath
+        _ | name `elem` htmlSpanLikeElements -> pSpanLike
+        _ -> pRawHtmlInline
+    TagText _ -> pTagText
+    _ -> pRawHtmlInline
 
 pSelfClosing :: PandocMonad m
              => (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -579,27 +630,25 @@ pSelfClosing f g = do
   return open
 
 pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
-  where citedQuote = do
-          tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
-          url <- canonicalizeUrl $ fromAttrib "cite" tag
-          let uid = fromMaybe (fromAttrib "name" tag) $
-                       maybeFromAttrib "id" tag
-          let cls = T.words $ fromAttrib "class" tag
-
-          makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
-        normalQuote = do
-          pSatisfy $ tagOpenLit "q" (const True)
-          makeQuote id
-        makeQuote wrapper = do
-          ctx <- asks quoteContext
-          let (constructor, innerContext) = case ctx of
-                        InDoubleQuote -> (B.singleQuoted, InSingleQuote)
-                        _             -> (B.doubleQuoted, InDoubleQuote)
-
-          content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
-          return $ extractSpaces (constructor . wrapper) content
+pQ = do
+  TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+  case lookup "cite" attrs of
+    Just url -> do
+      let uid = fromMaybe mempty $
+                   lookup "name" attrs <> lookup "id" attrs
+      let cls = maybe [] T.words $ lookup "class" attrs
+      url' <- canonicalizeUrl url
+      makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+    Nothing -> makeQuote id
+ where
+  makeQuote wrapper = do
+    ctx <- asks quoteContext
+    let (constructor, innerContext) = case ctx of
+                  InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+                  _             -> (B.doubleQuoted, InDoubleQuote)
+    content <- withQuoteContext innerContext
+                  (mconcat <$> manyTill inline (pCloses "q"))
+    return $ extractSpaces (constructor . wrapper) content
 
 pEmph :: PandocMonad m => TagParser m Inlines
 pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -690,13 +739,12 @@ pSvg = do
                    UTF8.toText (encode $ UTF8.fromText rawText)
   return $ B.imageWith (ident,cls,[]) svgData mempty mempty
 
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
-  let tagTest = flip elem . fmap fst $ elemToClass
-  TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+  TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
   result <- manyTill pAny (pCloses open)
   let (ids,cs,kvs) = toAttr attr'
-      cs'          = maybe cs (:cs) . lookup open $ elemToClass
+      cs'          = class' : cs
   return . B.codeWith (ids,cs',kvs) .
     T.unwords . T.lines . innerText $ result
 
-- 
cgit v1.2.3


From 0f955b10b455e9b3d326262d03261f17538a6943 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 18:57:21 -0800
Subject: T.P.Readers.LaTeX.Parsing: improve braced'.

Remove the parameter, have it parse the opening brace,
and make it more efficient.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 313aa6c51..dab4d334e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -680,28 +680,25 @@ grouped parser = try $ do
   -- {{a,b}} should be parsed the same as {a,b}
   try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
-  handleEgroup <|> handleBgroup <|> handleOther
-  where handleEgroup = do
-          t <- symbol '}'
-          if n == 1
-             then return []
-             else (t:) <$> braced' getTok (n - 1)
-        handleBgroup = do
-          t <- symbol '{'
-          (t:) <$> braced' getTok (n + 1)
-        handleOther = do
-          t <- getTok
-          (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+  go n = do
+    t <- getTok
+    case t of
+      Tok _ Symbol "}"
+        | n > 1     -> (t:) <$> go (n - 1)
+        | otherwise -> return []
+      Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+      _ -> (t:) <$> go n
 
 braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
 
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
 
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
-- 
cgit v1.2.3


From 31b8f60ea82d96b370cf4a765c46c18004ff6fa8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:03:29 -0800
Subject: LaTeX reader:  avoid macro resolution code if no macros defined.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index dab4d334e..fc8542894 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -453,20 +453,24 @@ doMacros = do
       updateState $ \st -> st{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp =
-  case inp of
-     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos name ts
-     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos ("end" <> name) ts
-     Tok _ (CtrlSeq "expandafter") _ : t : ts
-        -> combineTok t <$> doMacros' n ts
-     Tok spos (CtrlSeq name) _ : ts
-        -> handleMacros n spos name ts
-     _ -> return inp
-   <|> return inp
+doMacros' n inp = do
+  macros <- sMacros <$> getState
+  if M.null macros
+     then return inp
+     else
+       case inp of
+          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos name ts
+          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos ("end" <> name) ts
+          Tok _ (CtrlSeq "expandafter") _ : t : ts
+             -> combineTok t <$> doMacros' n ts
+          Tok spos (CtrlSeq name) _ : ts
+             -> handleMacros macros n spos name ts
+          _ -> return inp
+        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -507,10 +511,9 @@ doMacros' n inp =
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros n' spos name ts = do
+    handleMacros macros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
-      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From cec541e54cd947c8032f9148db18104cd1a48783 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:14:31 -0800
Subject: LaTeX reader: Another small improvement to macro handling.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index fc8542894..953747d2f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -461,16 +461,15 @@ doMacros' n inp = do
        case inp of
           Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts
+             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
           Tok _ (CtrlSeq "expandafter") _ : t : ts
              -> combineTok t <$> doMacros' n ts
           Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           _ -> return inp
-        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
-- 
cgit v1.2.3


From 321343b2cf8a4a75abe1b6713aa40e278ca57997 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:49:20 -0800
Subject: HTML reader: small efficiency improvements.

Also, remove exported class NamedTag(..) [API change].
This was just intended to smooth over the transition from String to Text
and is no longer needed.

The functions isInlineTag and isBlockTag are no longer
polymorphic.
---
 src/Text/Pandoc/Readers/HTML.hs | 43 +++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 47856d2f7..50201fe77 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -19,7 +19,6 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 , htmlInBalanced
                                 , isInlineTag
                                 , isBlockTag
-                                , NamedTag(..)
                                 , isTextTag
                                 , isCommentTag
                                 ) where
@@ -579,9 +578,9 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = do
+inline = pTagText <|> do
+  tag <- lookAhead (pSatisfy isInlineTag)
   exts <- getOption readerExtensions
-  tag <- lookAhead pAny
   case tag of
     TagOpen name attr ->
       case name of
@@ -935,27 +934,21 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
                then return B.softbreak
                else return B.space
 
-class NamedTag a where
-  getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
-  getTagName (TagOpen t _) = Just t
-  getTagName (TagClose t)  = Just t
-  getTagName _             = Nothing
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t)  = Just t
+getTagName _             = Nothing
 
-instance NamedTag (Tag String) where
-  getTagName (TagOpen t _) = Just (T.pack t)
-  getTagName (TagClose t)  = Just (T.pack t)
-  getTagName _             = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
+isInlineTag :: Tag Text -> Bool
 isInlineTag t =
-  isCommentTag t || case getTagName t of
-                           Nothing  -> False
-                           Just x   -> x `Set.notMember` blockTags ||
-                                       T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+  isCommentTag t ||
+  case getTagName t of
+    Nothing  -> False
+    Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+    Just x   -> x `Set.notMember` blockTags ||
+                T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
                  where isBlockTagName =
                          case getTagName t of
@@ -966,10 +959,10 @@ isBlockTag t = isBlockTagName || isTagComment t
                                     || x `Set.member` eitherBlockOrInline
                               Nothing -> False
 
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
 isTextTag = tagText (const True)
 
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
 isCommentTag = tagComment (const True)
 
 --- parsers for use in markdown, textile readers
@@ -1018,7 +1011,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
         go n (t:ts') = (t :) <$> go n ts'
         go _ [] = mzero
 
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
 hasTagWarning (TagWarning _:_) = True
 hasTagWarning _                = False
 
-- 
cgit v1.2.3


From c0c8865eaac7089b0304c9b21d981f82ea4c2ebd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 23:40:02 -0800
Subject: HTML reader: small performance tweak.

---
 src/Text/Pandoc/Readers/HTML.hs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 50201fe77..b73c138ab 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -157,11 +157,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
           return mempty
 
 block :: PandocMonad m => TagParser m Blocks
-block = do
+block = ((do
+  tag <- lookAhead (pSatisfy isBlockTag)
   exts <- getOption readerExtensions
-  tag <- lookAhead pAny
-  res <-
-   (case tag of
+  case tag of
     TagOpen name attr ->
       let type' = fromMaybe "" $
                      lookup "type" attr <|> lookup "epub:type" attr
@@ -214,11 +213,8 @@ block = do
           | epubExts
           -> eSwitch B.para block
         _ -> mzero
-    _ -> mzero)
-   <|> pPlain
-   <|> pRawHtmlBlock
-  trace (T.take 60 $ tshow $ B.toList res)
-  return res
+    _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+        res <$ trace (T.take 60 $ tshow $ B.toList res)
 
 namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
 namespaces = [(mathMLNamespace, pMath True)]
-- 
cgit v1.2.3


From f43cb5ddcf56ab9387b24ad55c2c30eceb606fad Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 10:26:48 -0800
Subject: LaTeX reader: further performance optimization.

Avoid unnecessary 'doMacros'.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 42 +++++++++++++++-----------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 953747d2f..20311651b 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -446,30 +446,25 @@ satisfyTok f = do
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
-  expanded <- sExpanded <$> getState
-  verbatimMode <- sVerbatimMode <$> getState
-  unless (expanded || verbatimMode) $ do
-      getInput >>= doMacros' 1 >>= setInput
-      updateState $ \st -> st{ sExpanded = True }
+  st <- getState
+  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+    getInput >>= doMacros' 1 >>= setInput
+    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp = do
-  macros <- sMacros <$> getState
-  if M.null macros
-     then return inp
-     else
-       case inp of
-          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
-          Tok _ (CtrlSeq "expandafter") _ : t : ts
-             -> combineTok t <$> doMacros' n ts
-          Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          _ -> return inp
+doMacros' n inp =
+  case inp of
+     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos name ts <|> return inp
+     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos ("end" <> name) ts <|> return inp
+     Tok _ (CtrlSeq "expandafter") _ : t : ts
+        -> combineTok t <$> doMacros' n ts
+     Tok spos (CtrlSeq name) _ : ts
+        -> handleMacros n spos name ts <|> return inp
+     _ -> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -510,9 +505,10 @@ doMacros' n inp = do
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros macros n' spos name ts = do
+    handleMacros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
+      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From db4f8823152578d199691f8084fddcc0f04f679b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:23:04 -0800
Subject: LaTeX reader: removed sExpanded in state.

This isn't actually needed and checking it doesn't change
anything.

Also remove an unnecessary `doMacros` before `satisfyTok`,
which does it anyway.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 20311651b..0e098d9d9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -155,7 +155,6 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sLabels        :: M.Map Text [Inline]
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
-                            , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
                             , sEnableWithRaw :: Bool
                             , sRawTokens     :: IntMap.IntMap [Tok]
@@ -183,7 +182,6 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sLabels        = M.empty
                               , sHasChapters   = False
                               , sToggles       = M.empty
-                              , sExpanded      = False
                               , sFileContents  = M.empty
                               , sEnableWithRaw = True
                               , sRawTokens     = IntMap.empty
@@ -256,7 +254,6 @@ rawLaTeXParser toks retokenize parser valParser = do
        Right toks' -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        doMacros
                                         ts <- many (satisfyTok (const True))
                                         setInput ts
                                       rawparser)
@@ -432,8 +429,7 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False
-                           , sRawTokens =
+    updateState $ \st -> st{ sRawTokens =
                               if sEnableWithRaw st
                                  then IntMap.map (res:) $ sRawTokens st
                                  else sRawTokens st }
@@ -447,9 +443,8 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st || M.null (sMacros st)) $ do
     getInput >>= doMacros' 1 >>= setInput
-    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
 doMacros' n inp =
-- 
cgit v1.2.3


From 2b37ed9f212b711a6381dc89ff7d3431d7c5d916 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:29:38 -0800
Subject: LaTeX reader: further optimizations in satisfyTok.

Benchmarks show 2/3 of the run time and 2/3 of the allocation
of the Feb. 10 benchmarks.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 0e098d9d9..c2e10570d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -429,11 +429,11 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sRawTokens =
-                              if sEnableWithRaw st
-                                 then IntMap.map (res:) $ sRawTokens st
-                                 else sRawTokens st }
-    return res
+    updateState $ \st ->
+      if sEnableWithRaw st
+         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+         else st
+    return $! res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
-- 
cgit v1.2.3


From fae6a204f1a072d5efe4dff35fd08fbad2071198 Mon Sep 17 00:00:00 2001
From: Salim B <salim@posteo.de>
Date: Sat, 27 Feb 2021 01:56:04 +0000
Subject: Fix/update URLs and use HTTP**S** where possible (#7122)

---
 CONTRIBUTING.md                     | 16 ++++++++--------
 COPYING.md                          |  2 +-
 COPYRIGHT                           |  6 +++---
 INSTALL.md                          | 29 ++++++++++++++---------------
 MANUAL.txt                          |  2 +-
 README.template                     | 10 +++++-----
 doc/lua-filters.md                  |  2 +-
 man/pandoc.1                        |  2 +-
 man/pandoc.1.after                  |  2 +-
 src/Text/Pandoc/Citeproc.hs         |  2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs  |  8 ++++----
 src/Text/Pandoc/Readers/Textile.hs  |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs |  2 +-
 test/txt2tags.t2t                   |  2 +-
 trypandoc/index.html                |  2 +-
 windows/Makefile                    |  2 +-
 windows/pandoc.wxs                  |  4 ++--
 17 files changed, 47 insertions(+), 48 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e87c96694..595fb94ff 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -372,7 +372,7 @@ The code
 --------
 
 Pandoc has a publicly accessible git repository on
-GitHub: <http://github.com/jgm/pandoc>.  To get a local copy of the source:
+GitHub: <https://github.com/jgm/pandoc>.  To get a local copy of the source:
 
     git clone https://github.com/jgm/pandoc.git
 
@@ -382,7 +382,7 @@ the pandoc library is in `src/`, the source for the tests is in
 
 The modules `Text.Pandoc.Definition`, `Text.Pandoc.Builder`, and
 `Text.Pandoc.Generic` are in a separate library `pandoc-types`.  The code can
-be found in <http://github.com/jgm/pandoc-types>.
+be found in <https://github.com/jgm/pandoc-types>.
 
 To build pandoc, you will need a working installation of the
 [Haskell platform].
@@ -445,14 +445,14 @@ you may want to consider submitting a pull request to the
 [closed issues]: https://github.com/jgm/pandoc/issues?q=is%3Aissue+is%3Aclosed
 [latest released version]: https://github.com/jgm/pandoc/releases/latest
 [Nightly builds]: https://github.com/jgm/pandoc/actions?query=workflow%3ANightly
-[pandoc-discuss]: http://groups.google.com/group/pandoc-discuss
+[pandoc-discuss]: https://groups.google.com/group/pandoc-discuss
 [issue tracker]: https://github.com/jgm/pandoc/issues
-[User's Guide]: http://pandoc.org/MANUAL.html
-[FAQs]:  http://pandoc.org/faqs.html
-[EditorConfig]: http://editorconfig.org/
-[Haskell platform]: http://www.haskell.org/platform/
+[User's Guide]: https://pandoc.org/MANUAL.html
+[FAQs]:  https://pandoc.org/faqs.html
+[EditorConfig]: https://editorconfig.org/
+[Haskell platform]: https://www.haskell.org/platform/
 [hlint]: https://hackage.haskell.org/package/hlint
-[hsb2hs]: http://hackage.haskell.org/package/hsb2hs
+[hsb2hs]: https://hackage.haskell.org/package/hsb2hs
 [pre-commit hook]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
 [GitHub labels]: https://github.com/jgm/pandoc/labels
 [good first issue]:https://github.com/jgm/pandoc/labels/good%20first%20issue
diff --git a/COPYING.md b/COPYING.md
index af5153d8f..90ae12017 100644
--- a/COPYING.md
+++ b/COPYING.md
@@ -357,5 +357,5 @@ into proprietary programs. If your program is a subroutine library,
 you may consider it more useful to permit linking proprietary
 applications with the library. If this is what you want to do, use the
 [GNU Lesser General Public
-License](http://www.gnu.org/licenses/lgpl.html) instead of this
+License](https://www.gnu.org/licenses/lgpl.html) instead of this
 License.
\ No newline at end of file
diff --git a/COPYRIGHT b/COPYRIGHT
index a6e3a897c..9752c555d 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -22,11 +22,11 @@ The GNU General Public License is available in the file COPYING.md in
 the source distribution.  On Debian systems, the complete text of the
 GPL can be found in `/usr/share/common-licenses/GPL`.
 
-[GPL]: http://www.gnu.org/copyleft/gpl.html
+[GPL]: https://www.gnu.org/copyleft/gpl.html
 
 Pandoc's complete source code is available from the [Pandoc home page].
 
-[Pandoc home page]: http://pandoc.org
+[Pandoc home page]: https://pandoc.org
 
 Pandoc includes some code with different copyrights, or subject to different
 licenses.  The copyright and license statements for these sources are included
@@ -176,7 +176,7 @@ Released under the GNU General Public License version 2 or later.
 ----------------------------------------------------------------------
 The dzslides template contains JavaScript and CSS from Paul Rouget's
 dzslides template.
-http://github.com/paulrouget/dzslides
+https://github.com/paulrouget/dzslides
 
 Released under the Do What the Fuck You Want To Public License.
 
diff --git a/INSTALL.md b/INSTALL.md
index 721dc5d66..d23c349c1 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -45,7 +45,7 @@ by downloading [this script][uninstaller]
 and running it with `perl uninstall-pandoc.pl`.
 
 Alternatively, you can install pandoc using
-[Homebrew](http://brew.sh):
+[Homebrew](https://brew.sh):
 
      brew install pandoc
 
@@ -116,7 +116,7 @@ package repositories.  For example, on Debian/Ubuntu,
 you can install it with `apt-get install haskell-platform`.
 
 For PDF output, you'll need LaTeX.  We recommend installing
-[TeX Live](http://www.tug.org/texlive/) via your package
+[TeX Live](https://www.tug.org/texlive/) via your package
 manager.  (On Debian/Ubuntu, `apt-get install texlive`.)
 
 ## Chrome OS
@@ -238,7 +238,7 @@ The easiest way to build pandoc from source is to use [stack][stack]:
 
         pandoc --help
 
-    [Not sure where `$CABALDIR` is?](http://www.haskell.org/haskellwiki/Cabal-Install#The_cabal-install_configuration_file)
+    [Not sure where `$CABALDIR` is?](https://wiki.haskell.org/Cabal-Install#The_cabal-install_configuration_file)
 
 5.  By default `pandoc` uses the "i;unicode-casemap" method
     to sort bibliography entries (RFC 5051).  If you would like to
@@ -383,30 +383,29 @@ To run just the markdown benchmarks:
 
 
 [Arch]: https://www.archlinux.org/packages/community/x86_64/pandoc/
-[Cabal User's Guide]: http://www.haskell.org/cabal/release/latest/doc/users-guide/builders.html#setup-configure-paths
+[Cabal User's Guide]: https://cabal.readthedocs.io/
 [Debian]: https://packages.debian.org/pandoc
 [Fedora]: https://apps.fedoraproject.org/packages/pandoc
-[FreeBSD ports]: http://www.freshports.org/textproc/hs-pandoc/
-[GHC]:  http://www.haskell.org/ghc/
-[GPL]:  http://www.gnu.org/copyleft/gpl.html
-[Haskell platform]: http://hackage.haskell.org/platform/
-[MacPorts]: http://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile
+[FreeBSD ports]: https://www.freshports.org/textproc/hs-pandoc/
+[GHC]:  https://www.haskell.org/ghc/
+[Haskell platform]: https://hackage.haskell.org/platform/
+[MacPorts]: https://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile
 [MacTeX]: https://tug.org/mactex/
-[BasicTeX]: http://www.tug.org/mactex/morepackages.html
+[BasicTeX]: https://www.tug.org/mactex/morepackages.html
 [LaTeX]: https://www.latex-project.org
-[MiKTeX]: http://miktex.org/
+[MiKTeX]: https://miktex.org/
 [librsvg]: https://wiki.gnome.org/Projects/LibRsvg
 [Python]: https://www.python.org
-[NetBSD]: http://pkgsrc.se/wip/pandoc
+[NetBSD]: https://pkgsrc.se/wip/pandoc
 [NixOS]: https://nixos.org/nixos/packages.html
 [Slackware]: https://www.slackbuilds.org/result/?search=pandoc&sv=
 [Ubuntu]: https://packages.ubuntu.com/pandoc
 [download page]: https://github.com/jgm/pandoc/releases/latest
-[gentoo]: http://packages.gentoo.org/package/app-text/pandoc
+[gentoo]: https://packages.gentoo.org/package/app-text/pandoc
 [haskell repository]: https://wiki.archlinux.org/index.php/Haskell_Package_Guidelines#.5Bhaskell.5D
 [openSUSE]: https://software.opensuse.org/package/pandoc
-[source tarball]: http://hackage.haskell.org/package/pandoc
+[source tarball]: https://hackage.haskell.org/package/pandoc
 [stack]: https://docs.haskellstack.org/en/stable/install_and_upgrade.html
-[cabal-install]: http://hackage.haskell.org/package/cabal-install
+[cabal-install]: https://hackage.haskell.org/package/cabal-install
 [Void]: https://voidlinux.org/
 [uninstaller]: https://raw.githubusercontent.com/jgm/pandoc/master/macos/uninstall-pandoc.pl
diff --git a/MANUAL.txt b/MANUAL.txt
index ba031bcb2..a9465b20c 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -486,7 +486,7 @@ header when requesting a document from a URL:
 [Emacs Org mode]: https://orgmode.org
 [AsciiDoc]: https://www.methods.co.nz/asciidoc/
 [AsciiDoctor]: https://asciidoctor.org/
-[DZSlides]: http://paulrouget.com/dzslides/
+[DZSlides]: https://paulrouget.com/dzslides/
 [Word docx]: https://en.wikipedia.org/wiki/Office_Open_XML
 [PDF]: https://www.adobe.com/pdf/
 [reveal.js]: https://revealjs.com/
diff --git a/README.template b/README.template
index 53d14b584..4b469f22b 100644
--- a/README.template
+++ b/README.template
@@ -7,9 +7,9 @@ Pandoc
 ======
 
 [![github release](https://img.shields.io/github/release/jgm/pandoc.svg?label=current+release)](https://github.com/jgm/pandoc/releases)
-[![hackage release](https://img.shields.io/hackage/v/pandoc.svg?label=hackage)](http://hackage.haskell.org/package/pandoc)
-[![homebrew](https://img.shields.io/homebrew/v/pandoc.svg)](http://brewformulas.org/Pandoc)
-[![stackage LTS package](http://stackage.org/package/pandoc/badge/lts)](http://stackage.org/lts/package/pandoc)
+[![hackage release](https://img.shields.io/hackage/v/pandoc.svg?label=hackage)](https://hackage.haskell.org/package/pandoc)
+[![homebrew](https://img.shields.io/homebrew/v/pandoc.svg)](https://formulae.brew.sh/formula/pandoc)
+[![stackage LTS package](https://stackage.org/package/pandoc/badge/lts)](https://www.stackage.org/lts/package/pandoc-types)
 [![CI tests](https://github.com/jgm/pandoc/workflows/CI%20tests/badge.svg)](https://github.com/jgm/pandoc/actions)
 [![license](https://img.shields.io/badge/license-GPLv2+-lightgray.svg)](https://www.gnu.org/licenses/gpl.html)
 [![pandoc-discuss on google groups](https://img.shields.io/badge/pandoc-discuss-red.svg?style=social)](https://groups.google.com/forum/#!forum/pandoc-discuss)
@@ -85,5 +85,5 @@ License
 [GPL], version 2 or greater.  This software carries no warranty of
 any kind.  (See COPYRIGHT for full copyright and warranty notices.)
 
-[GPL]: http://www.gnu.org/copyleft/gpl.html "GNU General Public License"
-[Haskell]: http://haskell.org
+[GPL]: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html "GNU General Public License"
+[Haskell]: https://haskell.org
diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index a99dc9008..df55dc44e 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -3478,7 +3478,7 @@ Contract a filename, based on a relative path. Note that the
 resulting path will usually not introduce `..` paths, as the
 presence of symlinks means `../b` may not reach `a/b` if it starts
 from `a/c`. For a worked example see [this blog
-post](http://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html).
+post](https://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html).
 
 Set `unsafe` to a truthy value to a allow `..` in paths.
 
diff --git a/man/pandoc.1 b/man/pandoc.1
index 491e24278..bcda05489 100644
--- a/man/pandoc.1
+++ b/man/pandoc.1
@@ -7495,4 +7495,4 @@ This software carries no warranty of any kind.
 of contributors, see the file AUTHORS.md in the pandoc source code.
 .PP
 The Pandoc source code and all documentation may be downloaded
-from <http://pandoc.org>.
+from <https://pandoc.org>.
diff --git a/man/pandoc.1.after b/man/pandoc.1.after
index e5eabb670..7cd7a93f0 100644
--- a/man/pandoc.1.after
+++ b/man/pandoc.1.after
@@ -1,3 +1,3 @@
 .PP
 The Pandoc source code and all documentation may be downloaded
-from <http://pandoc.org>.
+from <https://pandoc.org>.
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 6658c8c0c..38e992ba1 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -544,7 +544,7 @@ linkifyVariables ref =
   fixShortDOI x = let x' = extractText x
                   in  if "10/" `T.isPrefixOf` x'
                          then TextVal $ T.drop 3 x'
-                              -- see http://shortdoi.org
+                              -- see https://shortdoi.org
                          else TextVal x'
   tolink pref x = let x' = extractText x
                       x'' = if "://" `T.isInfixOf` x'
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 416fe439e..c0752dadc 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -577,10 +577,10 @@ itemToReference locale variant item = do
                  eprint <- getRawField "eprint"
                  let baseUrl =
                        case T.toLower etype of
-                         "arxiv"       -> "http://arxiv.org/abs/"
-                         "jstor"       -> "http://www.jstor.org/stable/"
-                         "pubmed"      -> "http://www.ncbi.nlm.nih.gov/pubmed/"
-                         "googlebooks" -> "http://books.google.com?id="
+                         "arxiv"       -> "https://arxiv.org/abs/"
+                         "jstor"       -> "https://www.jstor.org/stable/"
+                         "pubmed"      -> "https://www.ncbi.nlm.nih.gov/pubmed/"
+                         "googlebooks" -> "https://books.google.com?id="
                          _             -> ""
                  if T.null baseUrl
                     then mzero
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 4991c6308..860da2dc3 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -11,7 +11,7 @@
    Portability : portable
 
 Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
+available at https://www.promptworks.com/textile/.
 
 Implemented and parsed:
  - Paragraphs
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 898905603..d33246a63 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -14,7 +14,7 @@
 
 Conversion of 'Pandoc' documents to markdown-formatted plain text.
 
-Markdown:  <http://daringfireball.net/projects/markdown/>
+Markdown:  <https://daringfireball.net/projects/markdown/>
 -}
 module Text.Pandoc.Writers.Markdown (
   writeMarkdown,
diff --git a/test/txt2tags.t2t b/test/txt2tags.t2t
index f736cfa93..b636c1511 100644
--- a/test/txt2tags.t2t
+++ b/test/txt2tags.t2t
@@ -270,7 +270,7 @@ FTP.DOMAIN.COM
 [img.png]
 
 %%% Syntax: Image pointing to a link: [[img] link]
-[[img.png] http://txt2tags.org]
+[[img.png] https://txt2tags.org]
 
 %%% Align: Image position is preserved when inside paragraph
 [img.png] Image at the line beginning.
diff --git a/trypandoc/index.html b/trypandoc/index.html
index 792f522eb..5761153c9 100644
--- a/trypandoc/index.html
+++ b/trypandoc/index.html
@@ -14,7 +14,7 @@
   <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
   <script>
 "use strict";
-(function($) { // http://stackoverflow.com/questions/901115/how-can-i-get-query-string-values
+(function($) { // https://stackoverflow.com/questions/901115/how-can-i-get-query-string-values
 	$.QueryString = (function(a) {
 	    if (a == "") return {};
 	    var b = {};
diff --git a/windows/Makefile b/windows/Makefile
index 354c1b37c..34dda34c2 100644
--- a/windows/Makefile
+++ b/windows/Makefile
@@ -6,5 +6,5 @@ all: $(SIGNED)
 .PHONY: all
 
 pandoc-%.msi: pandoc-%-UNSIGNED.msi
-	osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i http://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $<
+	osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i https://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $<
 
diff --git a/windows/pandoc.wxs b/windows/pandoc.wxs
index d9f6b836e..a582c4de9 100644
--- a/windows/pandoc.wxs
+++ b/windows/pandoc.wxs
@@ -123,8 +123,8 @@
 
 
     <!-- Set properties for add/remove programs -->
-    <Property Id="ARPURLINFOABOUT" Value="http://pandoc.org" />
-    <Property Id="ARPHELPLINK" Value="http://pandoc.org" />
+    <Property Id="ARPURLINFOABOUT" Value="https://pandoc.org" />
+    <Property Id="ARPHELPLINK" Value="https://pandoc.org" />
     <Property Id="ARPNOREPAIR" Value="yes" Secure="yes" />    <!-- Remove repair -->
     <Property Id="ARPNOMODIFY" Value="yes" Secure="yes" />    <!-- Remove modify -->
 
-- 
cgit v1.2.3


From 925815bb33b462e1a4c19a8e2c617d403dec0ce7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 17:02:44 -0800
Subject: Split off T.P.Readers.LaTeX.Accent.

To help reduce memory demands compiling the main LaTeX reader.
---
 pandoc.cabal                            |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs        | 68 ++++------------------------
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 +++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 60 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 83c5c0120..68edb2b64 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -630,6 +630,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Parsing,
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
+                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7d8dfab0e..51c031f78 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -64,7 +65,6 @@ import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
 import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
 import Safe
 
 -- for debugging:
@@ -247,9 +247,6 @@ doxspace =
         startsWithLetter _ = False
 
 
-lit :: Text -> LP m Inlines
-lit = pure . str
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,6 +293,9 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
+lit :: Text -> LP m Inlines
+lit = pure . str
+
 enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
 enquote starred mblang = do
   skipopts
@@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [
   ]
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
+inlineCommands =
+  M.union inlineLanguageCommands $
+  M.union (accentCommands tok) $
+  M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
   , ("textsl", extractSpaces emph <$> tok)
@@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("MakeTextLowercase", makeLowercase <$> tok)
   , ("lowercase", makeLowercase <$> tok)
   , ("/", pure mempty) -- italic correction
-  , ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
                             optional opt
@@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("hyphen", pure (str "-"))
   ]
 
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
-
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
   optional opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
new file mode 100644
index 000000000..f8c53491c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
@@ -0,0 +1,78 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Accent
+  ( accentCommands )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Parsing
+import qualified Data.Text as T
+import qualified Data.Text.Normalize as Normalize
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+      lit = pure . str
+  in M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
+
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       _                 -> return ils
+
-- 
cgit v1.2.3


From 08231f5cdd16e31d38d9d6bf59bc5ca12638b438 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 21:40:56 -0800
Subject: Factor out T.P.Readers.LaTeX.Table.

---
 pandoc.cabal                             |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 368 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  33 +++
 src/Text/Pandoc/Readers/LaTeX/Table.hs   | 373 +++++++++++++++++++++++++++++++
 4 files changed, 412 insertions(+), 363 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Table.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 68edb2b64..9149c4f8f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 51c031f78..831c5df05 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE BangPatterns          #-}
 {-# LANGUAGE CPP                   #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -32,7 +31,6 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isDigit, isLetter, toUpper, chr)
 import Data.Default
-import Data.Functor (($>))
 import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
@@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -551,12 +550,8 @@ inlineCommand' = try $ do
          <|> ignore rawcommand
   lookupListDefault raw names inlineCommands
 
-
 tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
-  where singleChar' = do
-          Tok _ _ t <- singleChar
-          return $ str t
+tok = tokWith inline
 
 opt :: PandocMonad m => LP m Inlines
 opt = do
@@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-label :: PandocMonad m => LP m ()
-label = do
-  controlSeq "label"
-  t <- braced
-  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -1421,13 +1410,6 @@ bracketedNum = do
        Just i -> return i
        _      -> return 0
 
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
-  skipopts
-  ils <- tok
-  optional $ try $ spaces *> label
-  updateState $ \st -> st{ sCaption = Just ils }
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
@@ -1441,10 +1423,6 @@ epigraph = do
   p2 <- grouped block
   return $ divWith ("", ["epigraph"], []) (p1 <> p2)
 
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption   = Nothing
-                                      , sLastLabel = Nothing }
-
 section :: PandocMonad m => Attr -> Int -> LP m Blocks
 section (ident, classes, kvs) lvl = do
   skipopts
@@ -1585,7 +1563,7 @@ blockCommands = M.fromList
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-   , ("caption", mempty <$ setCaption)
+   , ("caption", mempty <$ setCaption inline)
    , ("bibliography", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1623,7 +1601,8 @@ blockCommands = M.fromList
 
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+   M.fromList
    [ ("document", env "document" blocks <* skipMany anyTok)
    , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
    , ("sloppypar", env "sloppypar" blocks)
@@ -1633,13 +1612,6 @@ environments = M.fromList
    , ("figure", env "figure" $ skipopts *> figure)
    , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
    , ("center", divWith ("", ["center"], []) <$> env "center" blocks)
-   , ("longtable",  env "longtable" $
-          resetCaption *> simpTable "longtable" False >>= addTableCaption)
-   , ("table",  env "table" $
-          skipopts *> resetCaption *> blocks >>= addTableCaption)
-   , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
-   , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
-   , ("tabular", env "tabular"  $ simpTable "tabular" False)
    , ("quote", blockQuote <$> env "quote" blocks)
    , ("quotation", blockQuote <$> env "quotation" blocks)
    , ("verse", blockQuote <$> env "verse" blocks)
@@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils]
 italicize (Plain ils) = Plain [Emph ils]
 italicize x = x
 
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
@@ -2045,333 +2014,6 @@ orderedList' = try $ do
   bs <- listenv "enumerate" (many item)
   return $ orderedListWith (start, style, delim) bs
 
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
-  spaces
-  controlSeq "hline" <|>
-    -- booktabs rules:
-    controlSeq "toprule" <|>
-    controlSeq "bottomrule" <|>
-    controlSeq "midrule" <|>
-    controlSeq "endhead" <|>
-    controlSeq "endfirsthead"
-  spaces
-  optional opt
-  return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
-         <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
-  inp <- getInput
-  case inp of
-       (Tok spos Word t : rest) ->
-         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
-       _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
-  let maybeBar = skipMany
-        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
-  let cAlign = AlignCenter <$ symbol 'c'
-  let lAlign = AlignLeft <$ symbol 'l'
-  let rAlign = AlignRight <$ symbol 'r'
-  let parAlign = AlignLeft <$ symbol 'p'
-  -- aligns from tabularx
-  let xAlign = AlignLeft <$ symbol 'X'
-  let mAlign = AlignLeft <$ symbol 'm'
-  let bAlign = AlignLeft <$ symbol 'b'
-  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
-                                 <|> xAlign <|> mAlign <|> bAlign )
-  let alignPrefix = symbol '>' >> braced
-  let alignSuffix = symbol '<' >> braced
-  let colWidth = try $ do
-        symbol '{'
-        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
-        spaces
-        symbol '}'
-        return $ safeRead ds
-  let alignSpec = do
-        pref <- option [] alignPrefix
-        spaces
-        al <- alignChar
-        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
-                                                 pos <- getPosition
-                                                 report $ SkippedContent s pos
-                                                 return Nothing)
-        spaces
-        suff <- option [] alignSuffix
-        return (al, width, (pref, suff))
-  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
-        symbol '*'
-        spaces
-        ds <- trim . untokenize <$> braced
-        spaces
-        spec <- braced
-        case safeRead ds of
-             Just n  ->
-               getInput >>= setInput . (mconcat (replicate n spec) ++)
-             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
-  bgroup
-  spaces
-  maybeBar
-  aligns' <- many $ try $ spaces >> optional starAlign >>
-                            (alignSpec <* maybeBar)
-  spaces
-  egroup
-  spaces
-  return $ map toSpec aligns'
-  where
-    toColWidth (Just w) | w > 0 = ColWidth w
-    toColWidth _                = ColWidthDefault
-    toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
-              => Text   -- ^ table environment name
-              -> [([Tok], [Tok])] -- ^ pref/suffixes
-              -> LP m Row
-parseTableRow envname prefsufs = do
-  notFollowedBy (spaces *> end_ envname)
-  -- add prefixes and suffixes in token stream:
-  let celltoks (pref, suff) = do
-        prefpos <- getPosition
-        contents <- mconcat <$>
-            many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
-                  <|>
-                   snd <$> withRaw (inlineEnvironment <|> dollarsMath)
-                  <|>
-                   (do notFollowedBy
-                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
-                       count 1 anyTok) )
-
-        suffpos <- getPosition
-        option [] (count 1 amp)
-        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
-  rawcells <- mapM celltoks prefsufs
-  cells <- mapM (parseFromToks parseTableCell) rawcells
-  spaces
-  return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
-  spaces
-  updateState $ \st -> st{ sInTableCell = True }
-  cell' <-   multicolumnCell
-         <|> multirowCell
-         <|> parseSimpleCell
-         <|> parseEmptyCell
-  updateState $ \st -> st{ sInTableCell = False }
-  spaces
-  return cell'
-  where
-    -- The parsing of empty cells is important in LaTeX, especially when dealing
-    -- with multirow/multicolumn. See #6603.
-    parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
-  where
-    alignment = do
-      c <- untoken <$> singleChar
-      return $ case c of
-        "l" -> AlignLeft
-        "r" -> AlignRight
-        "c" -> AlignCenter
-        "*" -> AlignDefault
-        _   -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
-                [Para ils] -> plain (fromList ils)
-                _          -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
-  -- Full prototype for \multirow macro is:
-  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-  -- However, everything except `nrows` and `text` make
-  -- sense in the context of the Pandoc AST
-  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
-  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
-  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
-  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
-  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
-  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
-  let singleCell = do
-        content <- plainify <$> blocks
-        return $ cell alignment (RowSpan 1) (ColSpan span') content
-
-  -- Two possible contents: either a \multirow cell, or content.
-  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-  -- Note that a \multirow cell can be nested in a \multicolumn,
-  -- but not the other way around. See #6603
-  let nestedCell = do
-        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
-        return $ cell
-                  alignment
-                  (RowSpan rs)
-                  (ColSpan span')
-                  (fromList bs)
-
-  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
-  where
-    rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
-  = TableBody attr rhc th' tb'
-  where
-    th' = fixTableRows th
-    tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
-  where
-    fixTableRows' oldHang (Row attr cells : rs)
-      = let (newHang, cells') = fixTableRow oldHang cells
-            rs'               = fixTableRows' newHang rs
-        in Row attr cells' : rs'
-    fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
-  -- If there's overhang, drop cells until their total width meets the
-  -- width of the occupied grid spaces (or we run out)
-  | (n, prefHang, restHang) <- splitHang oldHang
-  , n > 0
-  = let cells' = dropToWidth getCellW n cells
-        (restHang', cells'') = fixTableRow restHang cells'
-    in (prefHang restHang', cells'')
-  -- Otherwise record the overhang of a pending cell and fix the rest
-  -- of the row
-  | c@(Cell _ _ h w _):cells' <- cells
-  = let h' = max 1 h
-        w' = max 1 w
-        oldHang' = dropToWidth getHangW w' oldHang
-        (newHang, cells'') = fixTableRow oldHang' cells'
-    in (toHang w' h' <> newHang, c : cells'')
-  | otherwise
-  = (oldHang, [])
-  where
-    getCellW (Cell _ _ _ w _) = w
-    getHangW = maybe 1 fst
-    getCS (ColSpan n) = n
-
-    toHang c r
-      | r > 1     = [Just (c, r)]
-      | otherwise = replicate (getCS c) Nothing
-
-    -- Take the prefix of the overhang list representing filled grid
-    -- spaces. Also return the remainder and the length of this prefix.
-    splitHang = splitHang' 0 id
-
-    splitHang' !n l (Just (c, r):xs)
-      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
-    splitHang' n l xs = (n, l, xs)
-
-    -- Drop list items until the total width of the dropped items
-    -- exceeds the passed width.
-    dropToWidth _     n l | n < 1 = l
-    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
-    dropToWidth _     _ []        = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
-  when hasWidthParameter $ () <$ (spaces >> tok)
-  skipopts
-  colspecs <- parseAligns
-  let (aligns, widths, prefsufs) = unzip3 colspecs
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  skipMany hline
-  spaces
-  header' <- option [] . try . fmap (:[]) $
-             parseTableRow envname prefsufs <* lbreak <* many1 hline
-  spaces
-  rows <- sepEndBy (parseTableRow envname prefsufs)
-                    (lbreak <* optional (skipMany hline))
-  spaces
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  lookAhead $ controlSeq "end" -- make sure we're at end
-  let th  = fixTableHead $ TableHead nullAttr header'
-  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
-  let tf  = TableFoot nullAttr []
-  return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
-  where go (Table attr c spec th tb tf) = do
-          st <- getState
-          let mblabel = sLastLabel st
-          capt <- case (sCaption st, mblabel) of
-                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
-                   (Just ils, Just lab) -> do
-                     num <- getNextNumber sLastTableNum
-                     setState
-                       st{ sLastTableNum = num
-                         , sLabels = M.insert lab
-                                    [Str (renderDottedNum num)]
-                                    (sLabels st) }
-                     return $ caption Nothing (plain ils) -- add number??
-                   (Nothing, _)  -> return c
-          let attr' = case (attr, mblabel) of
-                        ((_,classes,kvs), Just ident) ->
-                           (ident,classes,kvs)
-                        _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
-        go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b       = Div attr [b]
-
 block :: PandocMonad m => LP m Blocks
 block = do
   res <- (mempty <$ spaces1)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index c2e10570d..4a9fa03ad 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , comment
   , anyTok
   , singleChar
+  , tokWith
   , specialChars
   , endline
   , blankline
@@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawopt
   , overlaySpecification
   , getNextNumber
+  , label
+  , setCaption
+  , resetCaption
+  , env
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do
                Just n  -> [n, 1]
                Nothing -> [1]
 
+label :: PandocMonad m => LP m ()
+label = do
+  controlSeq "label"
+  t <- braced
+  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+  skipopts
+  ils <- tokWith inline
+  optional $ try $ spaces *> label
+  updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption   = Nothing
+                                      , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+                                 grouped inlineParser
+                            <|> (lookAhead anyControlSeq >> inlineParser)
+                            <|> singleChar'
+  where singleChar' = do
+          Tok _ _ t <- singleChar
+          return $ str t
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..2ea9caf58
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,373 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+  ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+                  => LP m Blocks
+                  -> LP m Inlines
+                  -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+  M.fromList
+  [ ("longtable",  env "longtable" $
+          resetCaption *>
+            simpTable blocks inline "longtable" False >>= addTableCaption)
+  , ("table",  env "table" $
+          skipopts *> resetCaption *> blocks >>= addTableCaption)
+  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
+  ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+  spaces
+  controlSeq "hline" <|>
+    -- booktabs rules:
+    controlSeq "toprule" <|>
+    controlSeq "bottomrule" <|>
+    controlSeq "midrule" <|>
+    controlSeq "endhead" <|>
+    controlSeq "endfirsthead"
+  spaces
+  optional rawopt
+  return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+         <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+  inp <- getInput
+  case inp of
+       (Tok spos Word t : rest) ->
+         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+       _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+  let maybeBar = skipMany
+        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+  let cAlign = AlignCenter <$ symbol 'c'
+  let lAlign = AlignLeft <$ symbol 'l'
+  let rAlign = AlignRight <$ symbol 'r'
+  let parAlign = AlignLeft <$ symbol 'p'
+  -- aligns from tabularx
+  let xAlign = AlignLeft <$ symbol 'X'
+  let mAlign = AlignLeft <$ symbol 'm'
+  let bAlign = AlignLeft <$ symbol 'b'
+  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
+                                 <|> xAlign <|> mAlign <|> bAlign )
+  let alignPrefix = symbol '>' >> braced
+  let alignSuffix = symbol '<' >> braced
+  let colWidth = try $ do
+        symbol '{'
+        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+        spaces
+        symbol '}'
+        return $ safeRead ds
+  let alignSpec = do
+        pref <- option [] alignPrefix
+        spaces
+        al <- alignChar
+        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+                                                 pos <- getPosition
+                                                 report $ SkippedContent s pos
+                                                 return Nothing)
+        spaces
+        suff <- option [] alignSuffix
+        return (al, width, (pref, suff))
+  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+        symbol '*'
+        spaces
+        ds <- trim . untokenize <$> braced
+        spaces
+        spec <- braced
+        case safeRead ds of
+             Just n  ->
+               getInput >>= setInput . (mconcat (replicate n spec) ++)
+             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+  bgroup
+  spaces
+  maybeBar
+  aligns' <- many $ try $ spaces >> optional starAlign >>
+                            (alignSpec <* maybeBar)
+  spaces
+  egroup
+  spaces
+  return $ map toSpec aligns'
+  where
+    toColWidth (Just w) | w > 0 = ColWidth w
+    toColWidth _                = ColWidthDefault
+    toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+              => LP m Blocks -- ^ block parser
+              -> LP m Inlines -- ^ inline parser
+              -> Text   -- ^ table environment name
+              -> [([Tok], [Tok])] -- ^ pref/suffixes
+              -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+  notFollowedBy (spaces *> end_ envname)
+  -- add prefixes and suffixes in token stream:
+  let celltoks (pref, suff) = do
+        prefpos <- getPosition
+        contents <- mconcat <$>
+            many ( snd <$> withRaw
+                     ((lookAhead (controlSeq "parbox") >>
+                       void blocks) -- #5711
+                      <|>
+                      (lookAhead (controlSeq "begin") >> void inline)
+                      <|>
+                      (lookAhead (symbol '$') >> void inline))
+                  <|>
+                   (do notFollowedBy
+                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
+                       count 1 anyTok) )
+
+        suffpos <- getPosition
+        option [] (count 1 amp)
+        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+  rawcells <- mapM celltoks prefsufs
+  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+  spaces
+  return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+  spaces
+  updateState $ \st -> st{ sInTableCell = True }
+  cell' <-   multicolumnCell blocks
+         <|> multirowCell blocks
+         <|> parseSimpleCell
+         <|> parseEmptyCell
+  updateState $ \st -> st{ sInTableCell = False }
+  spaces
+  return cell'
+  where
+    -- The parsing of empty cells is important in LaTeX, especially when dealing
+    -- with multirow/multicolumn. See #6603.
+    parseEmptyCell = spaces $> emptyCell
+    parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+  where
+    alignment = do
+      c <- untoken <$> singleChar
+      return $ case c of
+        "l" -> AlignLeft
+        "r" -> AlignRight
+        "c" -> AlignCenter
+        "*" -> AlignDefault
+        _   -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+                [Para ils] -> plain (fromList ils)
+                _          -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+  -- Full prototype for \multirow macro is:
+  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+  -- However, everything except `nrows` and `text` make
+  -- sense in the context of the Pandoc AST
+  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
+  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
+  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
+  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+  let singleCell = do
+        content <- plainify <$> blocks
+        return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+  -- Two possible contents: either a \multirow cell, or content.
+  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+  -- Note that a \multirow cell can be nested in a \multicolumn,
+  -- but not the other way around. See #6603
+  let nestedCell = do
+        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+        return $ cell
+                  alignment
+                  (RowSpan rs)
+                  (ColSpan span')
+                  (fromList bs)
+
+  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+  where
+    rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+  = TableBody attr rhc th' tb'
+  where
+    th' = fixTableRows th
+    tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+  where
+    fixTableRows' oldHang (Row attr cells : rs)
+      = let (newHang, cells') = fixTableRow oldHang cells
+            rs'               = fixTableRows' newHang rs
+        in Row attr cells' : rs'
+    fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+  -- If there's overhang, drop cells until their total width meets the
+  -- width of the occupied grid spaces (or we run out)
+  | (n, prefHang, restHang) <- splitHang oldHang
+  , n > 0
+  = let cells' = dropToWidth getCellW n cells
+        (restHang', cells'') = fixTableRow restHang cells'
+    in (prefHang restHang', cells'')
+  -- Otherwise record the overhang of a pending cell and fix the rest
+  -- of the row
+  | c@(Cell _ _ h w _):cells' <- cells
+  = let h' = max 1 h
+        w' = max 1 w
+        oldHang' = dropToWidth getHangW w' oldHang
+        (newHang, cells'') = fixTableRow oldHang' cells'
+    in (toHang w' h' <> newHang, c : cells'')
+  | otherwise
+  = (oldHang, [])
+  where
+    getCellW (Cell _ _ _ w _) = w
+    getHangW = maybe 1 fst
+    getCS (ColSpan n) = n
+
+    toHang c r
+      | r > 1     = [Just (c, r)]
+      | otherwise = replicate (getCS c) Nothing
+
+    -- Take the prefix of the overhang list representing filled grid
+    -- spaces. Also return the remainder and the length of this prefix.
+    splitHang = splitHang' 0 id
+
+    splitHang' !n l (Just (c, r):xs)
+      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+    splitHang' n l xs = (n, l, xs)
+
+    -- Drop list items until the total width of the dropped items
+    -- exceeds the passed width.
+    dropToWidth _     n l | n < 1 = l
+    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
+    dropToWidth _     _ []        = []
+
+simpTable :: PandocMonad m
+          => LP m Blocks
+          -> LP m Inlines
+          -> Text
+          -> Bool
+          -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+  when hasWidthParameter $ () <$ tokWith inline
+  skipopts
+  colspecs <- parseAligns
+  let (aligns, widths, prefsufs) = unzip3 colspecs
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  skipMany hline
+  spaces
+  header' <- option [] . try . fmap (:[]) $
+             parseTableRow blocks inline envname prefsufs <*
+               lbreak <* many1 hline
+  spaces
+  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+                    (lbreak <* optional (skipMany hline))
+  spaces
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  let th  = fixTableHead $ TableHead nullAttr header'
+  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+  let tf  = TableFoot nullAttr []
+  return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+  where go (Table attr c spec th tb tf) = do
+          st <- getState
+          let mblabel = sLastLabel st
+          capt <- case (sCaption st, mblabel) of
+                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
+                   (Just ils, Just lab) -> do
+                     num <- getNextNumber sLastTableNum
+                     setState
+                       st{ sLastTableNum = num
+                         , sLabels = M.insert lab
+                                    [Str (renderDottedNum num)]
+                                    (sLabels st) }
+                     return $ caption Nothing (plain ils) -- add number??
+                   (Nothing, _)  -> return c
+          let attr' = case (attr, mblabel) of
+                        ((_,classes,kvs), Just ident) ->
+                           (ident,classes,kvs)
+                        _ -> attr
+          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+        go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b       = Div attr [b]
-- 
cgit v1.2.3


From 2faa57e8e96d9905676e30f62d34c06b074acf76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:12:09 -0800
Subject: Factor out T.P.Readers.LaTeX.Citation.

---
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs          | 202 +++-------------------------
 src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 ++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |   5 +
 4 files changed, 232 insertions(+), 186 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Citation.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 9149c4f8f..567b650a1 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 831c5df05..2d1b83486 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -56,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
@@ -169,7 +170,7 @@ rawLaTeXInline = do
   let toks = tokenize "source" inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
-              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
               inlines
           <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
               inlines
@@ -311,7 +312,7 @@ blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
                 then (\xs -> para (cite xs mempty))
-                       <$> cites NormalCitation False
+                       <$> cites inline NormalCitation False
                 else option mempty $ para <$> bracketed inline
   let lang = mblang >>= babelLangToBCP47
   let langdiv = case lang of
@@ -425,116 +426,6 @@ pDollarsMath n = do
                 else mzero
        _ -> (tk :) <$> pDollarsMath n
 
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _      = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
-  let k = last ks
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ toList <$> opt
-  second <- optionMaybe $ toList <$> opt
-  keys <- try $ bgroup *> manyTill citationLabel egroup
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> (mempty, s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> (mempty, mempty)
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel  = do
-  sp
-  untokenize <$>
-    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
-          <* sp
-          <* optional (symbol ',')
-          <* sp)
-  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
-  cits <- if multi
-             then do
-               multiprenote <- optionMaybe $ toList <$> paropt
-               multipostnote <- optionMaybe $ toList <$> paropt
-               let (pre, suf) = case (multiprenote, multipostnote) of
-                     (Just s , Nothing) -> (mempty, s)
-                     (Nothing , Just t) -> (mempty, t)
-                     (Just s , Just t ) -> (s, t)
-                     _                  -> (mempty, mempty)
-               tempCits <- many1 simpleCiteArgs
-               case tempCits of
-                 (k:ks) -> case ks of
-                             (_:_) -> return $ (addMprenote pre k : init ks) ++
-                                                 [addMpostnote suf (last ks)]
-                             _ -> return [addMprenote pre (addMpostnote suf k)]
-                 _ -> return [[]]
-             else count 1 simpleCiteArgs
-  let cs = concat cits
-  return $ case mode of
-        AuthorInText -> case cs of
-                             (c:rest) -> c {citationMode = mode} : rest
-                             []       -> []
-        _            -> map (\a -> a {citationMode = mode}) cs
-  where mprenote (k:ks) = (k:ks) ++ [Space]
-        mprenote _ = mempty
-        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
-        mpostnote _ = mempty
-        addMprenote mpn (k:ks) =
-          let mpnfinal = case citationPrefix k of
-                           (_:_) -> mprenote mpn
-                           _ -> mpn
-          in addPrefix mpnfinal (k:ks)
-        addMprenote _ _ = []
-        addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
-  (c,raw) <- withRaw $ cites mode multi
-  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
-  let isCite Cite{} = True
-      isCite _      = False
-      (pref, rest) = break isCite (toList ils)
-  in case rest of
-          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
-          _                -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
-  (cs, raw) <-
-    withRaw $ concat <$> do
-      bgroup
-      items <- mconcat <$>
-                many1 (notFollowedBy (symbol ';') >> inline)
-                  `sepBy1` symbol ';'
-      egroup
-      return $ map handleCitationPart items
-  case cs of
-       []       -> mzero
-       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
-                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
-  note $ para $ ils <> str "."
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -553,19 +444,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-opt :: PandocMonad m => LP m Inlines
-opt = do
-  toks <- try (sp *> bracketedToks <* sp)
-  -- now parse the toks as inlines
-  st <- getState
-  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
-  case parsed of
-    Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
 inBrackets :: Inlines -> Inlines
 inBrackets x = str "[" <> x <> str "]"
 
@@ -629,6 +507,7 @@ inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
+  M.union (citationCommands inline) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -703,7 +582,7 @@ inlineCommands =
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
-                            optional opt
+                            optional rawopt
                             spaces))
   , (",", lit "\8198")
   , ("@", pure mempty)
@@ -761,61 +640,6 @@ inlineCommands =
   , ("proofname", doTerm Translations.Proof)
   , ("glossaryname", doTerm Translations.Glossary)
   , ("lstlistingname", doTerm Translations.Listing)
-  , ("cite", citation "cite" NormalCitation False)
-  , ("Cite", citation "Cite" NormalCitation False)
-  , ("citep", citation "citep" NormalCitation False)
-  , ("citep*", citation "citep*" NormalCitation False)
-  , ("citeal", citation "citeal" NormalCitation False)
-  , ("citealp", citation "citealp" NormalCitation False)
-  , ("citealp*", citation "citealp*" NormalCitation False)
-  , ("autocite", citation "autocite" NormalCitation False)
-  , ("smartcite", citation "smartcite" NormalCitation False)
-  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
-  , ("parencite", citation "parencite" NormalCitation False)
-  , ("supercite", citation "supercite" NormalCitation False)
-  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
-  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
-  , ("citeyear", citation "citeyear" SuppressAuthor False)
-  , ("autocite*", citation "autocite*" SuppressAuthor False)
-  , ("cite*", citation "cite*" SuppressAuthor False)
-  , ("parencite*", citation "parencite*" SuppressAuthor False)
-  , ("textcite", citation "textcite" AuthorInText False)
-  , ("citet", citation "citet" AuthorInText False)
-  , ("citet*", citation "citet*" AuthorInText False)
-  , ("citealt", citation "citealt" AuthorInText False)
-  , ("citealt*", citation "citealt*" AuthorInText False)
-  , ("textcites", citation "textcites" AuthorInText True)
-  , ("cites", citation "cites" NormalCitation True)
-  , ("autocites", citation "autocites" NormalCitation True)
-  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
-  , ("parencites", citation "parencites" NormalCitation True)
-  , ("supercites", citation "supercites" NormalCitation True)
-  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
-  , ("Autocite", citation "Autocite" NormalCitation False)
-  , ("Smartcite", citation "Smartcite" NormalCitation False)
-  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
-  , ("Parencite", citation "Parencite" NormalCitation False)
-  , ("Supercite", citation "Supercite" NormalCitation False)
-  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
-  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
-  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
-  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
-  , ("Cite*", citation "Cite*" SuppressAuthor False)
-  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
-  , ("Textcite", citation "Textcite" AuthorInText False)
-  , ("Textcites", citation "Textcites" AuthorInText True)
-  , ("Cites", citation "Cites" NormalCitation True)
-  , ("Autocites", citation "Autocites" NormalCitation True)
-  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
-  , ("Parencites", citation "Parencites" NormalCitation True)
-  , ("Supercites", citation "Supercites" NormalCitation True)
-  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
-  , ("citetext", complexNatbibCitation NormalCitation)
-  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
-                        complexNatbibCitation AuthorInText)
-                   <|> citation "citeauthor" AuthorInText False)
-  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
-                          addMeta "nocite"))
   , ("hyperlink", hyperlink)
   , ("hypertarget", hypertargetInline)
   -- glossaries package
@@ -918,7 +742,7 @@ inlineCommands =
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
-  optional opt
+  optional rawopt
   x <- tok
   y <- tok
   return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -1168,6 +992,16 @@ inline = (mempty <$ comment)
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
 
+opt :: PandocMonad m => LP m Inlines
+opt = do
+  toks <- try (sp *> bracketedToks <* sp)
+  -- now parse the toks as inlines
+  st <- getState
+  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
 -- block elements:
 
 preamble :: PandocMonad m => LP m Blocks
@@ -1261,10 +1095,6 @@ insertIncluded defaultExtension f' = do
   getInput >>= setInput . (tokenize f contents ++)
   updateState dropLatestIncludeFile
 
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
-   st{ sMeta = addMetaField field val $ sMeta st }
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..655823dab
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+  ( citationCommands
+  , cites
+  )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+  let citation = citationWith inline
+      tok = spaces *> grouped inline
+   in M.fromList
+  [ ("cite", citation "cite" NormalCitation False)
+  , ("Cite", citation "Cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("smartcite", citation "smartcite" NormalCitation False)
+  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Smartcite", citation "Smartcite" NormalCitation False)
+  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation inline NormalCitation)
+  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+                        complexNatbibCitation inline AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
+  ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _      = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
+  keys <- try $ bgroup *> manyTill citationLabel egroup
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+  opt :: PandocMonad m => LP m Inlines
+  opt = do
+    toks <- try (sp *> bracketedToks <* sp)
+    -- now parse the toks as inlines
+    st <- getState
+    parsed <- lift $
+      runParserT (mconcat <$> many inline) st "bracketed option" toks
+    case parsed of
+      Right result -> return result
+      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel  = do
+  sp
+  untokenize <$>
+    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+          <* sp
+          <* optional (symbol ',')
+          <* sp)
+  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+  let paropt = parenWrapped inline
+  cits <- if multi
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 $ simpleCiteArgs inline
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ (addMprenote pre k : init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
+             else count 1 $ simpleCiteArgs inline
+  let cs = concat cits
+  return $ case mode of
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+  (c,raw) <- withRaw $ cites inline mode multi
+  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+  let isCite Cite{} = True
+      isCite _      = False
+      (pref, rest) = break isCite (toList ils)
+  in case rest of
+          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+          _                -> []
+
+complexNatbibCitation :: PandocMonad m
+                      => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+  (cs, raw) <-
+    withRaw $ concat <$> do
+      bgroup
+      items <- mconcat <$>
+                many1 (notFollowedBy (symbol ';') >> inline)
+                  `sepBy1` symbol ';'
+      egroup
+      return $ map handleCitationPart items
+  case cs of
+       []       -> mzero
+       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+  note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 4a9fa03ad..a5a39d3c9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -85,6 +85,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , setCaption
   , resetCaption
   , env
+  , addMeta
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -947,3 +948,7 @@ tokWith inlineParser = try $ spaces >>
   where singleChar' = do
           Tok _ _ t <- singleChar
           return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+   st{ sMeta = addMetaField field val $ sMeta st }
-- 
cgit v1.2.3


From 5e571d963587866957a26d382aeab9935311fb9d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:39:01 -0800
Subject: LaTeX reader: remove two unnecessary parsers in inline.

These are handled anyway by regularSymbol.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2d1b83486..3935c92ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -975,9 +975,7 @@ inline = (mempty <$ comment)
      <|> doubleQuote
      <|> singleQuote
      <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "”" <$ symbol '”')
      <|> (str "’" <$ symbol '\'')
-     <|> (str "’" <$ symbol '’')
      <|> (str "\160" <$ symbol '~')
      <|> dollarsMath
      <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-- 
cgit v1.2.3


From 564c39beef36bf008fa5d2c840560ef064152e7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:49:34 -0800
Subject: Move setDefaultLanguage to T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs      | 16 ++--------------
 src/Text/Pandoc/Readers/LaTeX/Lang.hs | 22 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3935c92ef..2155379db 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -44,7 +44,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
                                       readFileFromDirs, report, setResourcePath,
-                                      setTranslations, translateTerm)
+                                      translateTerm)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
@@ -59,7 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
-                                       babelLangToBCP47)
+                                       babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
@@ -1856,15 +1856,3 @@ block = do
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  polylang <- untokenize <$> braced
-  case M.lookup polylang polyglossiaLangToBCP47 of
-       Nothing -> return mempty -- TODO mzero? warning?
-       Just langFunc -> do
-         let l = langFunc o
-         setTranslations l
-         updateState $ setMeta "lang" $ str (renderLang l)
-         return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 5f634818e..adbeaa6d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -12,13 +12,31 @@ Functions for parsing polyglossia and babel language specifiers to
 BCP47 'Lang'.
 -}
 module Text.Pandoc.Readers.LaTeX.Lang
-  ( polyglossiaLangToBCP47
+  ( setDefaultLanguage
+  , polyglossiaLangToBCP47
   , babelLangToBCP47
   )
 where
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option)
+import Text.Pandoc.Builder (Blocks, setMeta, str)
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  polylang <- untokenize <$> braced
+  case M.lookup polylang polyglossiaLangToBCP47 of
+       Nothing -> return mempty -- TODO mzero? warning?
+       Just langFunc -> do
+         let l = langFunc o
+         setTranslations l
+         updateState $ setMeta "lang" $ str (renderLang l)
+         return mempty
 
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
-- 
cgit v1.2.3


From f6cf03857b59776f4f44ea831787231f7f93da96 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 12:52:41 -0800
Subject: LaTeX reader efficiency improvements.

In conjunction with other changes this makes the reader
almost twice as fast on our benchmark as it was on Feb. 10.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 73 +++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 31 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2155379db..4956b90cb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -191,12 +191,6 @@ inlineCommand = do
 word :: PandocMonad m => LP m Inlines
 word = str . untoken <$> satisfyTok isWordTok
 
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
-  where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
-        isRegularSymbol _                = False
-        isSpecial c = c `Set.member` specialChars
-
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
   ils <- grouped inline
@@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
 
 inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
-     <|> (space  <$ whitespace)
-     <|> (softbreak <$ endline)
-     <|> word
-     <|> macroDef (rawInline "latex")
-     <|> inlineCommand'
-     <|> inlineEnvironment
-     <|> inlineGroup
-     <|> (symbol '-' *>
-           option (str "-") (symbol '-' *>
-             option (str "–") (str "—" <$ symbol '-')))
-     <|> doubleQuote
-     <|> singleQuote
-     <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "’" <$ symbol '\'')
-     <|> (str "\160" <$ symbol '~')
-     <|> dollarsMath
-     <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-     <|> (str . T.singleton <$> primEscape)
-     <|> regularSymbol
-     <|> (do res <- symbolIn "#^'`\"[]&"
-             pos <- getPosition
-             let s = untoken res
-             report $ ParsingUnescaped s pos
-             return $ str s)
+inline = do
+  Tok pos toktype t <- lookAhead anyTok
+  let symbolAsString = str . untoken <$> anySymbol
+  let unescapedSymbolAsString =
+        do s <- untoken <$> anySymbol
+           report $ ParsingUnescaped s pos
+           return $ str s
+  case toktype of
+    Comment     -> mempty <$ comment
+    Spaces      -> space <$ whitespace
+    Newline     -> softbreak <$ endline
+    Word        -> word
+    Esc1        -> str . T.singleton <$> primEscape
+    Esc2        -> str . T.singleton <$> primEscape
+    Symbol      ->
+      case t of
+        "-"     -> symbol '-' *>
+                    option (str "-") (symbol '-' *>
+                      option (str "–") (str "—" <$ symbol '-'))
+        "'"     -> symbol '\'' *>
+                  option (str "’") (str  "”" <$ symbol '\'')
+        "~"     -> str "\160" <$ symbol '~'
+        "`"     -> doubleQuote <|> singleQuote <|> symbolAsString
+        "\""    -> doubleQuote <|> singleQuote <|> symbolAsString
+        "“"     -> doubleQuote <|> symbolAsString
+        "‘"     -> singleQuote <|> symbolAsString
+        "$"     -> dollarsMath <|> unescapedSymbolAsString
+        "|"     -> (guardEnabled Ext_literate_haskell *>
+                    symbol '|' *> doLHSverb) <|> symbolAsString
+        "{"     -> inlineGroup
+        "#"     -> unescapedSymbolAsString
+        "&"     -> unescapedSymbolAsString
+        "_"     -> unescapedSymbolAsString
+        "^"     -> unescapedSymbolAsString
+        "\\"    -> mzero
+        "}"     -> mzero
+        _       -> symbolAsString
+    CtrlSeq _   -> macroDef (rawInline "latex")
+                  <|> inlineCommand'
+                  <|> inlineEnvironment
+                  <|> inlineGroup
+    _           -> mzero
 
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
-- 
cgit v1.2.3


From cc543cf5b64cccc91ad6a1a455831b8c3f6d80b2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 14:34:04 -0800
Subject: LaTeX reader: another small efficiency improvement.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4956b90cb..4062e8a53 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1855,12 +1855,18 @@ orderedList' = try $ do
 
 block :: PandocMonad m => LP m Blocks
 block = do
-  res <- (mempty <$ spaces1)
-    <|> environment
-    <|> macroDef (rawBlock "latex")
-    <|> blockCommand
-    <|> paragraph
-    <|> grouped block
+  Tok _ toktype _ <- lookAhead anyTok
+  res <- (case toktype of
+            Newline           -> mempty <$ spaces1
+            Spaces            -> mempty <$ spaces1
+            Comment           -> mempty <$ spaces1
+            Word              -> paragraph
+            CtrlSeq "begin"   -> environment
+            CtrlSeq _         -> macroDef (rawBlock "latex")
+                               <|> blockCommand
+            _                 -> mzero)
+          <|> paragraph
+          <|> grouped block
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-- 
cgit v1.2.3


From 7229d068c9f63b6f5cda198815a1bd03473cfdcf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 15:18:31 -0800
Subject: Markdown reader efficiency improvements.

Benchmarks show that these make the reader 13-17% faster,
depending on extensions.
---
 src/Text/Pandoc/Readers/Markdown.hs | 390 +++++++++++++++++++-----------------
 1 file changed, 208 insertions(+), 182 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e46553dd8..91691c675 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -247,51 +247,60 @@ titleBlock :: PandocMonad m => MarkdownParser m ()
 titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
 pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
+pandocTitleBlock = do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
-  title <- option mempty titleLine
-  author <- option (return []) authorsLine
-  date <- option mempty dateLine
-  optional blanklines
-  let meta' = do title' <- title
-                 author' <- author
-                 date' <- date
-                 return $
-                     (if null title' then id else B.setMeta "title" title')
-                   . (if null author' then id else B.setMeta "author" author')
-                   . (if null date' then id else B.setMeta "date" date')
-                   $ nullMeta
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+  try $ do
+    title <- option mempty titleLine
+    author <- option (return []) authorsLine
+    date <- option mempty dateLine
+    optional blanklines
+    let meta' = do title' <- title
+                   author' <- author
+                   date' <- date
+                   return $
+                       (if null title'
+                           then id
+                           else B.setMeta "title" title')
+                     . (if null author'
+                           then id
+                           else B.setMeta "author" author')
+                     . (if null date'
+                           then id
+                           else B.setMeta "date" date')
+                     $ nullMeta
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
 yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
+yamlMetaBlock = do
   guardEnabled Ext_yaml_metadata_block
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-              $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-  -- Since `<>` is left-biased, existing values are not touched:
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
-  return mempty
+  try $ do
+    string "---"
+    blankline
+    notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+    rawYamlLines <- manyTill anyLine stopLine
+    -- by including --- and ..., we allow yaml blocks with just comments:
+    let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+    optional blanklines
+    newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
+                $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+    -- Since `<>` is left-biased, existing values are not touched:
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
+    return mempty
 
 stopLine :: PandocMonad m => MarkdownParser m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
+mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
-  firstPair <- kvPair False
-  restPairs <- many (kvPair True)
-  let kvPairs = firstPair : restPairs
-  blanklines
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
-                             return (Meta $ M.fromList kvPairs) }
+  try $ do
+    firstPair <- kvPair False
+    restPairs <- many (kvPair True)
+    let kvPairs = firstPair : restPairs
+    blanklines
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                               return (Meta $ M.fromList kvPairs) }
 
 kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
 kvPair allowEmpty = try $ do
@@ -661,15 +670,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
 codeBlockFenced = try $ do
   indentchars <- nonindentSpaces
   let indentLevel = T.length indentchars
-  c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+  c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
      <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
   size <- blockDelimiter (== c) Nothing
   skipMany spaceChar
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+         ((guardEnabled Ext_fenced_code_attributes >> try attributes)
           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
   blankline
   contents <- T.intercalate "\n" <$>
@@ -1157,11 +1166,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
 --
 
 lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
+lineBlock = do
   guardEnabled Ext_line_blocks
-  lines' <- lineBlockLines >>=
-            mapM (parseFromString' (trimInlinesF <$> inlines))
-  return $ B.lineBlock <$> sequence lines'
+  try $ do
+    lines' <- lineBlockLines >>=
+              mapM (parseFromString' (trimInlinesF <$> inlines))
+    return $ B.lineBlock <$> sequence lines'
 
 --
 -- Tables
@@ -1263,11 +1273,12 @@ multilineRow indices = do
 -- Parses a table caption:  inlines beginning with 'Table:'
 -- and followed by blank lines.
 tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
+tableCaption = do
   guardEnabled Ext_table_captions
-  skipNonindentSpaces
-  (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
-  trimInlinesF <$> inlines1 <* blanklines
+  try $ do
+    skipNonindentSpaces
+    (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+    trimInlinesF <$> inlines1 <* blanklines
 
 -- Parse a simple table with '---' header and one line per row.
 simpleTable :: PandocMonad m
@@ -1436,15 +1447,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks)
 table = try $ do
   frontCaption <- option Nothing (Just <$> tableCaption)
   (aligns, widths, heads, lns) <-
-         try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable False) <|>
-         try (guardEnabled Ext_simple_tables >>
-                (simpleTable True <|> simpleTable False)) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable True) <|>
-         try (guardEnabled Ext_grid_tables >>
-                (gridTable False <|> gridTable True)) <?> "table"
+         (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
+         (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
+         (guardEnabled Ext_simple_tables >>
+                try (simpleTable True <|> simpleTable False)) <|>
+         (guardEnabled Ext_multiline_tables >>
+                try (multilineTable True)) <|>
+         (guardEnabled Ext_grid_tables >>
+                try (gridTable False <|> gridTable True)) <?> "table"
   optional blanklines
   caption <- case frontCaption of
                   Nothing -> option (return mempty) tableCaption
@@ -1478,35 +1488,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
 inlines1 = mconcat <$> many1 inline
 
 inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
-                , bareURL
-                , str
-                , endline
-                , code
-                , strongOrEmph
-                , note
-                , cite
-                , bracketedSpan
-                , link
-                , image
-                , math
-                , strikeout
-                , subscript
-                , superscript
-                , inlineNote  -- after superscript because of ^[link](/foo)^
-                , autoLink
-                , spanHtml
-                , rawHtmlInline
-                , escapedNewline
-                , escapedChar
-                , rawLaTeXInline'
-                , exampleRef
-                , smart
-                , return . B.singleton <$> charRef
-                , emoji
-                , symbol
-                , ltSign
-                ] <?> "inline"
+inline = do
+  c <- lookAhead anyChar
+  ((case c of
+     ' '     -> whitespace
+     '\t'    -> whitespace
+     '\n'    -> endline
+     '`'     -> code
+     '_'     -> strongOrEmph
+     '*'     -> strongOrEmph
+     '^'     -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^
+     '['     -> note <|> cite <|> bracketedSpan <|> link
+     '!'     -> image
+     '$'     -> math
+     '~'     -> strikeout <|> subscript
+     '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
+     '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '@'     -> exampleRef
+     '"'     -> smart
+     '\''    -> smart
+     '\8216' -> smart
+     '\145'  -> smart
+     '\8220' -> smart
+     '\147'  -> smart
+     '-'     -> smart
+     '.'     -> smart
+     '&'     -> return . B.singleton <$> charRef
+     ':'     -> emoji
+     _       -> mzero)
+   <|> bareURL
+   <|> str
+   <|> symbol) <?> "inline"
 
 escapedChar' :: PandocMonad m => MarkdownParser m Char
 escapedChar' = try $ do
@@ -1517,11 +1529,12 @@ escapedChar' = try $ do
      <|> oneOf "\\`*_{}[]()>#+-.!~\""
 
 escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedNewline = try $ do
+escapedNewline = do
   guardEnabled Ext_escaped_line_breaks
-  char '\\'
-  lookAhead (char '\n') -- don't consume the newline (see #3730)
-  return $ return B.linebreak
+  try $ do
+    char '\\'
+    lookAhead (char '\n') -- don't consume the newline (see #3730)
+    return $ return B.linebreak
 
 escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
 escapedChar = do
@@ -1543,19 +1556,20 @@ ltSign = do
 -- whole document has been parsed.  But we need this parser
 -- here in case citations is disabled.
 exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
+exampleRef = do
   guardEnabled Ext_example_lists
-  char '@'
-  lab <- mconcat . map T.pack <$>
-                    many (many1 alphaNum <|>
-                          try (do c <- char '_' <|> char '-'
-                                  cs <- many1 alphaNum
-                                  return (c:cs)))
-  return $ do
-    st <- askF
-    return $ case M.lookup lab (stateExamples st) of
-                  Just n  -> B.str $ tshow n
-                  Nothing -> B.str $ "@" <> lab
+  try $ do
+    char '@'
+    lab <- mconcat . map T.pack <$>
+                      many (many1 alphaNum <|>
+                            try (do c <- char '_' <|> char '-'
+                                    cs <- many1 alphaNum
+                                    return (c:cs)))
+    return $ do
+      st <- askF
+      return $ case M.lookup lab (stateExamples st) of
+                    Just n  -> B.str $ tshow n
+                    Nothing -> B.str $ "@" <> lab
 
 symbol :: PandocMonad m => MarkdownParser m (F Inlines)
 symbol = do
@@ -1582,10 +1596,10 @@ code = try $ do
                   >> count (length starts) (char '`')
                   >> notFollowedBy (char '`'))
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+         (guardEnabled Ext_inline_code_attributes >> try attributes))
   return $ return $
     case rawattr of
          Left syn   -> B.rawInline syn result
@@ -1678,20 +1692,22 @@ strikeout = fmap B.strikeout <$>
           strikeEnd   = try $ string "~~"
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript = do
   guardEnabled Ext_superscript
-  char '^'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '^'))
+  fmap B.superscript <$> try (do
+    char '^'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '^'))
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript = do
   guardEnabled Ext_subscript
-  char '~'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '~'))
+  fmap B.subscript <$> try (do
+    char '~'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '~'))
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1792,15 +1808,16 @@ link = try $ do
   regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
 
 bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
+bracketedSpan = do
   guardEnabled Ext_bracketed_spans
-  (lab,_) <- reference
-  attr <- attributes
-  return $ if isSmallCaps attr
-              then B.smallcaps <$> lab
-              else if isUnderline attr
-                      then B.underline <$> lab
-                      else B.spanWith attr <$> lab
+  try $ do
+    (lab,_) <- reference
+    attr <- attributes
+    return $ if isSmallCaps attr
+                then B.smallcaps <$> lab
+                else if isUnderline attr
+                        then B.underline <$> lab
+                        else B.spanWith attr <$> lab
 
 -- | We treat a span as SmallCaps if class is "smallcaps" (and
 -- no other attributes are set or if style is "font-variant:small-caps"
@@ -1879,12 +1896,13 @@ dropBrackets = dropRB . dropLB
         dropLB xs                          = xs
 
 bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
+bareURL = do
   guardEnabled Ext_autolink_bare_uris
   getState >>= guard . stateAllowLinks
-  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
-  return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
+  try $ do
+    (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
+    notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
+    return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
 
 autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
 autoLink = try $ do
@@ -1937,21 +1955,23 @@ note = try $ do
           return $ B.note $ walk adjustCite contents'
 
 inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
+inlineNote = do
   guardEnabled Ext_inline_notes
-  char '^'
-  updateState $ \st -> st{ stateInNote = True
-                         , stateNoteNumber = stateNoteNumber st + 1 }
-  contents <- inlinesInBalancedBrackets
-  updateState $ \st -> st{ stateInNote = False }
-  return $ B.note . B.para <$> contents
+  try $ do
+    char '^'
+    updateState $ \st -> st{ stateInNote = True
+                           , stateNoteNumber = stateNoteNumber st + 1 }
+    contents <- inlinesInBalancedBrackets
+    updateState $ \st -> st{ stateInNote = False }
+    return $ B.note . B.para <$> contents
 
 rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
+rawLaTeXInline' = do
   guardEnabled Ext_raw_tex
   notFollowedBy' rawConTeXtEnvironment
-  s <- rawLaTeXInline
-  return $ return $ B.rawInline "tex" s -- "tex" because it might be context
+  try $ do
+    s <- rawLaTeXInline
+    return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
 rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
 rawConTeXtEnvironment = try $ do
@@ -1970,55 +1990,60 @@ inBrackets parser = do
   return $ "[" <> contents <> "]"
 
 spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
+spanHtml = do
   guardEnabled Ext_native_spans
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
-  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
-  let ident = fromMaybe "" $ lookup "id" attrs
-  let classes = maybe [] T.words $ lookup "class" attrs
-  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ if isSmallCaps (ident, classes, keyvals)
-              then B.smallcaps <$> contents
-              else if isUnderline (ident, classes, keyvals)
-                      then B.underline <$> contents
-                      else B.spanWith (ident, classes, keyvals) <$> contents
+  try $ do
+    (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+    contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
+    let ident = fromMaybe "" $ lookup "id" attrs
+    let classes = maybe [] T.words $ lookup "class" attrs
+    let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+    return $ if isSmallCaps (ident, classes, keyvals)
+                then B.smallcaps <$> contents
+                else if isUnderline (ident, classes, keyvals)
+                        then B.underline <$> contents
+                        else B.spanWith (ident, classes, keyvals) <$> contents
 
 divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
+divHtml = do
   guardEnabled Ext_native_divs
-  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-  -- we set stateInHtmlBlock so that closing tags that can be either block or
-  -- inline will not be parsed as inline tags
-  oldInHtmlBlock <- stateInHtmlBlock <$> getState
-  updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
-  bls <- option "" (blankline >> option "" blanklines)
-  contents <- mconcat <$>
-              many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
-  closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
-  if closed
-     then do
-       updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
-       let ident = fromMaybe "" $ lookup "id" attrs
-       let classes = maybe [] T.words $ lookup "class" attrs
-       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-       return $ B.divWith (ident, classes, keyvals) <$> contents
-     else -- avoid backtracing
-       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+  try $ do
+    (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+    -- we set stateInHtmlBlock so that closing tags that can be either block
+    -- or inline will not be parsed as inline tags
+    oldInHtmlBlock <- stateInHtmlBlock <$> getState
+    updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+    bls <- option "" (blankline >> option "" blanklines)
+    contents <- mconcat <$>
+                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+    closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
+    if closed
+       then do
+         updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+         let ident = fromMaybe "" $ lookup "id" attrs
+         let classes = maybe [] T.words $ lookup "class" attrs
+         let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+         return $ B.divWith (ident, classes, keyvals) <$> contents
+       else -- avoid backtracing
+         return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
 
 divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-divFenced = try $ do
+divFenced = do
   guardEnabled Ext_fenced_divs
-  string ":::"
-  skipMany (char ':')
-  skipMany spaceChar
-  attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
-  skipMany spaceChar
-  skipMany (char ':')
-  blankline
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
-  bs <- mconcat <$> manyTill block divFenceEnd
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
-  return $ B.divWith attribs <$> bs
+  try $ do
+    string ":::"
+    skipMany (char ':')
+    skipMany spaceChar
+    attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+    skipMany spaceChar
+    skipMany (char ':')
+    blankline
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+    bs <- mconcat <$> manyTill block divFenceEnd
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+    return $ B.divWith attribs <$> bs
 
 divFenceEnd :: PandocMonad m => MarkdownParser m ()
 divFenceEnd = try $ do
@@ -2050,14 +2075,15 @@ emojiChars :: [Char]
 emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
 
 emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
+emoji = do
   guardEnabled Ext_emoji
-  char ':'
-  emojikey <- many1Char (oneOf emojiChars)
-  char ':'
-  case emojiToInline emojikey of
-    Just i -> return (return $ B.singleton i)
-    Nothing -> mzero
+  try $ do
+    char ':'
+    emojikey <- many1Char (oneOf emojiChars)
+    char ':'
+    case emojiToInline emojikey of
+      Just i -> return (return $ B.singleton i)
+      Nothing -> mzero
 
 -- Citations
 
-- 
cgit v1.2.3


From 36456070c4cf8a6a122bc2ec05e86dc75fe49551 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 15:36:46 -0800
Subject: Fix bug in last commit.

---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 91691c675..b1b99dfe5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1504,7 +1504,7 @@ inline = do
      '~'     -> strikeout <|> subscript
      '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
      '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
-     '@'     -> exampleRef
+     '@'     -> cite <|> exampleRef
      '"'     -> smart
      '\''    -> smart
      '\8216' -> smart
-- 
cgit v1.2.3


From d2bb0c7c8d599e6cd2aaef787b207bbfa66d4b9e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 21:05:25 -0800
Subject: Factor out T.P.Readers.LaTeX.Math.

---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs      | 201 ++-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 ++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+), 193 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Math.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 61b1de0dd..56a9491f2 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -632,6 +632,7 @@ library
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4062e8a53..772263578 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,6 @@ import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -57,6 +56,11 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+                                       inlineEnvironment,
+                                       mathDisplay, mathInline,
+                                       newtheorem, theoremstyle, proof,
+                                       theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
@@ -387,39 +391,6 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
-  symbol '$'
-  display <- option False (True <$ symbol '$')
-  (do contents <- try $ untokenize <$> pDollarsMath 0
-      if display
-         then mathDisplay contents <$ symbol '$'
-         else return $ mathInline contents)
-   <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
-  tk@(Tok _ toktype t) <- anyTok
-  case toktype of
-       Symbol | t == "$"
-              , n == 0 -> return []
-              | t == "\\" -> do
-                  tk' <- anyTok
-                  (tk :) . (tk' :) <$> pDollarsMath n
-              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
-              | t == "}" ->
-                if n > 0
-                then (tk :) <$> pDollarsMath (n-1)
-                else mzero
-       _ -> (tk :) <$> pDollarsMath n
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -452,51 +423,6 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       , isEscapable c = t
       | otherwise = "\\" <> t
 
-mathEnvWith :: PandocMonad m
-            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
-   where inner x = case innerEnv of
-                        Nothing -> x
-                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
-                                   "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
-  skipopts
-  optional blankline
-  res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
-  controlSeq "begin"
-  name <- untokenize <$> braced
-  M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
-    ("displaymath", mathEnvWith id Nothing "displaymath")
-  , ("math", math <$> mathEnv "math")
-  , ("equation", mathEnvWith id Nothing "equation")
-  , ("equation*", mathEnvWith id Nothing "equation*")
-  , ("gather", mathEnvWith id (Just "gathered") "gather")
-  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
-  , ("multline", mathEnvWith id (Just "gathered") "multline")
-  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
-  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
-  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
-  , ("align", mathEnvWith id (Just "aligned") "align")
-  , ("align*", mathEnvWith id (Just "aligned") "align*")
-  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
-  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
-  , ("dmath", mathEnvWith id Nothing "dmath")
-  , ("dmath*", mathEnvWith id Nothing "dmath*")
-  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
-  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
-  , ("darray", mathEnvWith id (Just "aligned") "darray")
-  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
-  ]
-
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
@@ -1354,7 +1280,7 @@ blockCommands = M.fromList
    , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
    , ("signature", mempty <$ (skipopts *> authors))
    , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-   , ("newtheorem", newtheorem)
+   , ("newtheorem", newtheorem inline)
    , ("theoremstyle", theoremstyle)
    -- KOMA-Script metadata commands
    , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1473,7 +1399,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("lilypond", rawVerbEnv "lilypond")
    , ("ly", rawVerbEnv "ly")
    -- amsthm
-   , ("proof", proof)
+   , ("proof", proof blocks opt)
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -1494,128 +1420,17 @@ filecontents = try $ do
     st{ sFileContents = M.insert fp txt (sFileContents st) }
   return mempty
 
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
-  stylename <- untokenize <$> braced
-  let mbstyle = case stylename of
-                  "plain"      -> Just PlainStyle
-                  "definition" -> Just DefinitionStyle
-                  "remark"     -> Just RemarkStyle
-                  _            -> Nothing
-  case mbstyle of
-    Nothing  -> return ()
-    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
-  return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
-  number <- option True (False <$ symbol '*' <* sp)
-  name <- untokenize <$> braced
-  sp
-  series <- option Nothing $ Just . untokenize <$> bracketedToks
-  sp
-  showName <- tok
-  sp
-  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
-  sty <- sLastTheoremStyle <$> getState
-  let spec = TheoremSpec { theoremName = showName
-                         , theoremStyle = sty
-                         , theoremSeries = series
-                         , theoremSyncTo = syncTo
-                         , theoremNumber = number
-                         , theoremLastNum = DottedNum [0] }
-  tmap <- sTheoremMap <$> getState
-  updateState $ \s -> s{ sTheoremMap =
-                            M.insert name spec tmap }
-  return mempty
-
-proof :: PandocMonad m => LP m Blocks
-proof = do
-  title <- option (B.text "Proof") opt
-  bs <- env "proof" blocks
-  return $
-    B.divWith ("", ["proof"], []) $
-      addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
-  case B.toList bs of
-    (Para xs : rest)
-      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
-    _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
-  case Seq.viewr (B.unMany bs) of
-    s Seq.:> Para ils
-      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
-    _ -> bs <> B.para qedSign
- where
-  qedSign = B.str "\xa0\x25FB"
-
 environment :: PandocMonad m => LP m Blocks
 environment = try $ do
   controlSeq "begin"
   name <- untokenize <$> braced
   M.findWithDefault mzero name environments <|>
-    theoremEnvironment name <|>
+    theoremEnvironment blocks opt name <|>
     if M.member name (inlineEnvironments
                        :: M.Map Text (LP PandocPure Inlines))
        then mzero
        else try (rawEnv name) <|> rawVerbEnv name
 
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
-  tmap <- sTheoremMap <$> getState
-  case M.lookup name tmap of
-    Nothing -> mzero
-    Just tspec -> do
-       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
-       bs <- env name blocks
-       number <-
-         if theoremNumber tspec
-            then do
-               let name' = fromMaybe name $ theoremSeries tspec
-               num <- getNextNumber
-                   (maybe (DottedNum [0]) theoremLastNum .
-                    M.lookup name' . sTheoremMap)
-               updateState $ \s ->
-                 s{ sTheoremMap =
-                       M.adjust
-                       (\spec -> spec{ theoremLastNum = num })
-                       name'
-                       (sTheoremMap s)
-                  }
-
-               case mblabel of
-                 Just ident ->
-                   updateState $ \s ->
-                     s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
-                 Nothing -> return ()
-               return $ space <> B.text (renderDottedNum num)
-            else return mempty
-       let titleEmph = case theoremStyle tspec of
-                         PlainStyle      -> B.strong
-                         DefinitionStyle -> B.strong
-                         RemarkStyle     -> B.emph
-       let title = titleEmph (theoremName tspec <> number)
-                      <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
-              $ case theoremStyle tspec of
-                  PlainStyle -> walk italicize bs
-                  _          -> bs
-
-italicize :: Block -> Block
-italicize x@(Para [Image{}]) = x -- see #6925
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+  ( dollarsMath
+  , inlineEnvironments
+  , inlineEnvironment
+  , mathInline
+  , mathDisplay
+  , theoremstyle
+  , theoremEnvironment
+  , newtheorem
+  , proof
+  )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+  symbol '$'
+  display <- option False (True <$ symbol '$')
+  (do contents <- try $ untokenize <$> pDollarsMath 0
+      if display
+         then mathDisplay contents <$ symbol '$'
+         else return $ mathInline contents)
+   <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+  tk@(Tok _ toktype t) <- anyTok
+  case toktype of
+       Symbol | t == "$"
+              , n == 0 -> return []
+              | t == "\\" -> do
+                  tk' <- anyTok
+                  (tk :) . (tk' :) <$> pDollarsMath n
+              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+              | t == "}" ->
+                if n > 0
+                then (tk :) <$> pDollarsMath (n-1)
+                else mzero
+       _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+   where inner x = case innerEnv of
+                        Nothing -> x
+                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
+                                   "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+  skipopts
+  optional blankline
+  res <- manyTill anyTok (end_ name)
+  return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+    ("displaymath", mathEnvWith id Nothing "displaymath")
+  , ("math", math <$> mathEnv "math")
+  , ("equation", mathEnvWith id Nothing "equation")
+  , ("equation*", mathEnvWith id Nothing "equation*")
+  , ("gather", mathEnvWith id (Just "gathered") "gather")
+  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+  , ("multline", mathEnvWith id (Just "gathered") "multline")
+  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+  , ("align", mathEnvWith id (Just "aligned") "align")
+  , ("align*", mathEnvWith id (Just "aligned") "align*")
+  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+  ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+  stylename <- untokenize <$> braced
+  let mbstyle = case stylename of
+                  "plain"      -> Just PlainStyle
+                  "definition" -> Just DefinitionStyle
+                  "remark"     -> Just RemarkStyle
+                  _            -> Nothing
+  case mbstyle of
+    Nothing  -> return ()
+    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+  return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+  number <- option True (False <$ symbol '*' <* sp)
+  name <- untokenize <$> braced
+  sp
+  series <- option Nothing $ Just . untokenize <$> bracketedToks
+  sp
+  showName <- tokWith inline
+  sp
+  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+  sty <- sLastTheoremStyle <$> getState
+  let spec = TheoremSpec { theoremName = showName
+                         , theoremStyle = sty
+                         , theoremSeries = series
+                         , theoremSyncTo = syncTo
+                         , theoremNumber = number
+                         , theoremLastNum = DottedNum [0] }
+  tmap <- sTheoremMap <$> getState
+  updateState $ \s -> s{ sTheoremMap =
+                            M.insert name spec tmap }
+  return mempty
+
+theoremEnvironment :: PandocMonad m
+                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+  tmap <- sTheoremMap <$> getState
+  case M.lookup name tmap of
+    Nothing -> mzero
+    Just tspec -> do
+       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+       mblabel <- option Nothing $ Just . untokenize <$>
+                   try (spaces >> controlSeq "label" >> spaces >> braced)
+       bs <- env name blocks
+       number <-
+         if theoremNumber tspec
+            then do
+               let name' = fromMaybe name $ theoremSeries tspec
+               num <- getNextNumber
+                   (maybe (DottedNum [0]) theoremLastNum .
+                    M.lookup name' . sTheoremMap)
+               updateState $ \s ->
+                 s{ sTheoremMap =
+                       M.adjust
+                       (\spec -> spec{ theoremLastNum = num })
+                       name'
+                       (sTheoremMap s)
+                  }
+
+               case mblabel of
+                 Just ident ->
+                   updateState $ \s ->
+                     s{ sLabels = M.insert ident
+                         (B.toList $
+                           theoremName tspec <> "\160" <>
+                           str (renderDottedNum num)) (sLabels s) }
+                 Nothing -> return ()
+               return $ space <> B.text (renderDottedNum num)
+            else return mempty
+       let titleEmph = case theoremStyle tspec of
+                         PlainStyle      -> B.strong
+                         DefinitionStyle -> B.strong
+                         RemarkStyle     -> B.emph
+       let title = titleEmph (theoremName tspec <> number)
+                      <> optTitle <> "." <> space
+       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+              $ case theoremStyle tspec of
+                  PlainStyle -> walk italicize bs
+                  _          -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+  title <- option (B.text "Proof") opt
+  bs <- env "proof" blocks
+  return $
+    B.divWith ("", ["proof"], []) $
+      addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+  case B.toList bs of
+    (Para xs : rest)
+      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+    _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+  case Seq.viewr (B.unMany bs) of
+    s Seq.:> Para ils
+      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+    _ -> bs <> B.para qedSign
+ where
+  qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
-- 
cgit v1.2.3


From 7e38b8e55a49f027b6aaa690d84225b0374eb057 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 22:51:26 -0800
Subject: T.P.Readers.LaTeX: Don't export tokenize, untokenize.

[API change]

These were only exported for testing, which seems the
wrong thing to do.  They don't belong in the public
API and are not really usable as they are, without access
to the Tok type which is not exported.

Removed the tokenize/untokenize roundtrip test.

We put a quickcheck property in the comments which
may be used when this code is touched (if it is).
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  2 --
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  9 +++++++++
 test/Tests/Readers/LaTeX.hs              | 17 +----------------
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 772263578..e63fbc185 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -22,8 +22,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
                                    inlineCommand,
-                                   tokenize,
-                                   untokenize
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a5a39d3c9..db58b333d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -292,6 +292,15 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
            Left e   -> Prelude.fail (show e)
            Right s' -> return s'
 
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+>   let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 8385b751e..9388fd040 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -15,10 +15,8 @@ module Tests.Readers.LaTeX (tests) where
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Readers.LaTeX (tokenize, untokenize)
 import Test.Tasty
 import Test.Tasty.HUnit
-import Test.Tasty.QuickCheck
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
@@ -47,21 +45,8 @@ simpleTable' aligns rows
   where
     toRow = Row nullAttr . map simpleCell
 
-tokUntokRt :: String -> Bool
-tokUntokRt s = untokenize (tokenize "random" t) == t
-  where t = T.pack s
-
 tests :: [TestTree]
-tests = [ testGroup "tokenization"
-          [ testCase "tokenizer round trip on test case" $ do
-                 orig <- UTF8.readFile "../test/latex-reader.latex"
-                 let new = untokenize $ tokenize "../test/latex-reader.latex"
-                             orig
-                 assertEqual "untokenize . tokenize is identity" orig new
-          , testProperty "untokenize . tokenize is identity" tokUntokRt
-          ]
-
-        , testGroup "basic"
+tests = [ testGroup "basic"
           [ "simple" =:
             "word" =?> para "word"
           , "space" =:
-- 
cgit v1.2.3


From 6a6291d9e3c1ce9c3453318d87239b6f9260d924 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 23:05:35 -0800
Subject: Change T.P.Readers.LaTeX.SIunitx to export a command map...

instead of individual commands.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 11 ++---------
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index e63fbc185..304584072 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
@@ -426,6 +426,7 @@ inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
   M.union (citationCommands inline) $
+  M.union (siunitxCommands tok) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -594,14 +595,6 @@ inlineCommands =
   , ("Acfp", doAcronymPlural "full")
   , ("Acsp", doAcronymPlural "abbrv")
   , ("Aclp", doAcronymPlural "long")
-  -- siuntix
-  , ("si", skipopts *> dosi tok)
-  , ("SI", doSI tok)
-  , ("SIrange", doSIrange True tok)
-  , ("numrange", doSIrange False tok)
-  , ("numlist", doSInumlist)
-  , ("num", doSInum)
-  , ("ang", doSIang)
   -- hyphenat
   , ("bshyp", lit "\\\173")
   , ("fshyp", lit "/\173")
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..1952f4e1a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
-  ( dosi
-  , doSI
-  , doSIrange
-  , doSInum
-  , doSInumlist
-  , doSIang
-  )
+  ( siunitxCommands )
 where
 import Text.Pandoc.Builder
 import Text.Pandoc.Readers.LaTeX.Parsing
@@ -21,6 +15,19 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
 
+
+siunitxCommands :: PandocMonad m
+                 => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+  [ ("si", skipopts *> dosi tok)
+  , ("SI", doSI tok)
+  , ("SIrange", doSIrange True tok)
+  , ("numrange", doSIrange False tok)
+  , ("numlist", doSInumlist)
+  , ("num", doSInum)
+  , ("ang", doSIang)
+  ]
+
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
 dosi tok = grouped (siUnit tok) <|> siUnit tok
 
-- 
cgit v1.2.3


From 3793ed8beb5da0a8afbe8d3b121c2a1cd5bece44 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 23:43:55 -0800
Subject: Removed unnecessary pragmas.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 304584072..9ad94e417 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,6 +1,4 @@
 {-# LANGUAGE CPP                   #-}
-{-# LANGUAGE FlexibleInstances     #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
-- 
cgit v1.2.3


From 382f0e23d22b15aaa9fe2aeb6117ef0a102e379d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Mar 2021 08:55:42 -0800
Subject: Factor out T.P.Readers.LaTeX.Macro.

---
 pandoc.cabal                           |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs       | 141 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Macro.hs | 153 +++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 139 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Macro.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 56a9491f2..c3317f24b 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -634,6 +634,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad94e417..fa77595b9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
@@ -58,6 +57,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        newtheorem, theoremstyle, proof,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
@@ -1027,143 +1027,6 @@ authors = try $ do
   egroup
   addMeta "author" (map trimInlines auths)
 
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
-    (_, s) <- withRaw (commandDef <|> environmentDef)
-    (constructor (untokenize s) <$
-      guardDisabled Ext_latex_macros)
-     <|> return mempty
-  where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
-          guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
-        environmentDef = do
-          mbenv <- newenvironment
-          case mbenv of
-            Nothing -> return ()
-            Just (name, macro1, macro2) ->
-              guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
-        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
-        -- is equivalent to
-        -- @\newcommand{\envname}[n-args][default]{begin}@
-        -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
-  controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    optional $ symbol '='
-    spaces
-    -- we first parse in verbatim mode, and then expand macros,
-    -- because we don't want \let\foo\bar to turn into
-    -- \let\foo hello if we have previously \def\bar{hello}
-    contents <- bracedOrToken
-    return (name, contents)
-  contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
-  -- we use withVerbatimMode, because macros are to be expanded
-  -- at point of use, not point of definition
-  withVerbatimMode $ do
-    controlSeq "def"
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    argspecs <- many (argspecArg <|> argspecPattern)
-    contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
-  Tok _ (Arg i) _ <- satisfyTok isArgTok
-  return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
-  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
-                              (toktype' == Symbol || toktype' == Word) &&
-                              (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
-                             controlSeq "renewcommand" <|>
-                             controlSeq "providecommand" <|>
-                             controlSeq "DeclareMathOperator" <|>
-                             controlSeq "DeclareRobustCommand"
-  withVerbatimMode $ do
-    Tok _ (CtrlSeq name) txt <- do
-      optional (symbol '*')
-      anyControlSeq <|>
-        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    let argspecs = map ArgNum [1..numargs]
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    spaces
-    contents' <- bracedOrToken
-    let contents =
-         case mtype of
-              "DeclareMathOperator" ->
-                 Tok pos (CtrlSeq "mathop") "\\mathop"
-                 : Tok pos Symbol "{"
-                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
-                 : Tok pos Symbol "{"
-                 : (contents' ++
-                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
-              _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
-                             controlSeq "renewenvironment" <|>
-                             controlSeq "provideenvironment"
-  withVerbatimMode $ do
-    optional $ symbol '*'
-    spaces
-    name <- untokenize <$> braced
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    let argspecs = map (\i -> ArgNum i) [1..numargs]
-    startcontents <- spaces >> bracedOrToken
-    endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
-  ds <- untokenize <$> bracketedToks
-  case safeRead ds of
-       Just i -> return i
-       _      -> return 0
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..607f5438c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,153 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+  ( macroDef
+  )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+    (_, s) <- withRaw (commandDef <|> environmentDef)
+    (constructor (untokenize s) <$
+      guardDisabled Ext_latex_macros)
+     <|> return mempty
+  where commandDef = do
+          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          guardDisabled Ext_latex_macros <|>
+           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+        environmentDef = do
+          mbenv <- newenvironment
+          case mbenv of
+            Nothing -> return ()
+            Just (name, macro1, macro2) ->
+              guardDisabled Ext_latex_macros <|>
+                do updateState $ \s -> s{ sMacros =
+                    M.insert name macro1 (sMacros s) }
+                   updateState $ \s -> s{ sMacros =
+                    M.insert ("end" <> name) macro2 (sMacros s) }
+        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+        -- is equivalent to
+        -- @\newcommand{\envname}[n-args][default]{begin}@
+        -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro = do
+  controlSeq "let"
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    optional $ symbol '='
+    spaces
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
+    contents <- bracedOrToken
+    return (name, contents)
+  contents' <- doMacros' 0 contents
+  return (name, Macro ExpandWhenDefined [] Nothing contents')
+
+defmacro :: PandocMonad m => LP m (Text, Macro)
+defmacro = try $
+  -- we use withVerbatimMode, because macros are to be expanded
+  -- at point of use, not point of definition
+  withVerbatimMode $ do
+    controlSeq "def"
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    argspecs <- many (argspecArg <|> argspecPattern)
+    contents <- bracedOrToken
+    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+  Tok _ (Arg i) _ <- satisfyTok isArgTok
+  return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                              (toktype' == Symbol || toktype' == Word) &&
+                              (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+                             controlSeq "renewcommand" <|>
+                             controlSeq "providecommand" <|>
+                             controlSeq "DeclareMathOperator" <|>
+                             controlSeq "DeclareRobustCommand"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) txt <- do
+      optional (symbol '*')
+      anyControlSeq <|>
+        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    let argspecs = map ArgNum [1..numargs]
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    spaces
+    contents' <- bracedOrToken
+    let contents =
+         case mtype of
+              "DeclareMathOperator" ->
+                 Tok pos (CtrlSeq "mathop") "\\mathop"
+                 : Tok pos Symbol "{"
+                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+                 : Tok pos Symbol "{"
+                 : (contents' ++
+                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+              _                     -> contents'
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+        Just macro
+          | mtype == "newcommand" -> do
+              report $ MacroAlreadyDefined txt pos
+              return (name, macro)
+          | mtype == "providecommand" -> return (name, macro)
+        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+                             controlSeq "renewenvironment" <|>
+                             controlSeq "provideenvironment"
+  withVerbatimMode $ do
+    optional $ symbol '*'
+    spaces
+    name <- untokenize <$> braced
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    let argspecs = map (\i -> ArgNum i) [1..numargs]
+    startcontents <- spaces >> bracedOrToken
+    endcontents <- spaces >> bracedOrToken
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+         Just _
+           | mtype == "newenvironment" -> do
+               report $ MacroAlreadyDefined name pos
+               return Nothing
+           | mtype == "provideenvironment" ->
+               return Nothing
+         _ -> return $ Just (name,
+                      Macro ExpandWhenUsed argspecs optarg startcontents,
+                      Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+  ds <- untokenize <$> bracketedToks
+  case safeRead ds of
+       Just i -> return i
+       _      -> return 0
-- 
cgit v1.2.3


From 7f1b933aaacf4a01fb23fa4989f190098e96e702 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Mar 2021 09:02:10 -0800
Subject: Make T.P.Readers.LaTeX.Types an unexported module.

[API change]

This is really an implementation detail that shouldn't be
exposed in the public API.
---
 pandoc.cabal                     | 2 +-
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index c3317f24b..ad325ee24 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -518,7 +518,6 @@ library
                    Text.Pandoc.Readers,
                    Text.Pandoc.Readers.HTML,
                    Text.Pandoc.Readers.LaTeX,
-                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.Markdown,
                    Text.Pandoc.Readers.CommonMark,
                    Text.Pandoc.Readers.Creole,
@@ -627,6 +626,7 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
+                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.LaTeX.Parsing,
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fa77595b9..fc85f0545 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -19,7 +19,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    applyMacros,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
-                                   inlineCommand,
+                                   inlineCommand
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
-- 
cgit v1.2.3


From bbcc1501a5fa6b40ded88f6738d35ce7a8079313 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 10:05:46 -0800
Subject: Split out T.P.Readers.LaTeX.Inline.

---
 pandoc.cabal                            |  11 +-
 src/Text/Pandoc/Readers/LaTeX.hs        | 474 ++++++++++----------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 275 ++++++++++++++++++
 3 files changed, 419 insertions(+), 341 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Inline.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index d457a0620..211327642 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,15 +626,16 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Types,
-                   Text.Pandoc.Readers.LaTeX.Parsing,
-                   Text.Pandoc.Readers.LaTeX.Lang,
-                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Lang,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.LaTeX.Math,
+                   Text.Pandoc.Readers.LaTeX.Parsing,
+                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Table,
-                   Text.Pandoc.Readers.LaTeX.Macro,
+                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fc85f0545..a27135fd2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,13 +35,13 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report, setResourcePath,
-                                      translateTerm)
+                                      readFileFromDirs, report,
+                                      setResourcePath)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
@@ -61,10 +61,12 @@ import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+                                         nameCommands, charCommands,
+                                         verbCommands, rawInlineOr,
+                                         listingsLanguage)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
 import Safe
 
 -- for debugging:
@@ -317,76 +319,6 @@ blockquote cvariant mblang = do
   optional $ symbolIn (".:;?!" :: [Char])  -- currently ignored
   return $ blockQuote . langdiv $ (bs <> citepar)
 
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
-  acro <- braced
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "singular+" <> form)])
-    $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
-  acro <- braced
-  plural <- lit "s"
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "plural+" <> form)]) $
-   mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  withVerbatimMode $
-    code . untokenize <$>
-      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
-  t@(Tok pos toktype txt) <- anyTok
-  case T.findIndex (== stopchar) txt of
-       Nothing -> return t
-       Just i  -> do
-         let (t1, t2) = T.splitAt i txt
-         inp <- getInput
-         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
-                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
-         return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
-  case lookup "language" opts of
-    Nothing  -> Nothing
-    Just l   -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
-  options <- option [] keyvals
-  let classes = maybeToList $ listingsLanguage options
-  doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
-  skipopts
-  cls <- untokenize <$> braced
-  doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  let stopchar = if marker == '{' then '}' else marker
-  withVerbatimMode $
-    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
-      manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x    = x
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -405,9 +337,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
 unescapeURL :: Text -> Text
 unescapeURL = T.concat . go . T.splitOn "\\"
   where
@@ -420,234 +349,136 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       | otherwise = "\\" <> t
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands =
-  M.union inlineLanguageCommands $
-  M.union (accentCommands tok) $
-  M.union (citationCommands inline) $
-  M.union (siunitxCommands tok) $
-  M.fromList
-  [ ("emph", extractSpaces emph <$> tok)
-  , ("textit", extractSpaces emph <$> tok)
-  , ("textsl", extractSpaces emph <$> tok)
-  , ("textsc", extractSpaces smallcaps <$> tok)
-  , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
-  , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
-  , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
-  , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-  , ("texttt", ttfamily)
-  , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-  , ("lq", return (str "‘"))
-  , ("rq", return (str "’"))
-  , ("textquoteleft", return (str "‘"))
-  , ("textquoteright", return (str "’"))
-  , ("textquotedblleft", return (str "“"))
-  , ("textquotedblright", return (str "”"))
-  , ("textsuperscript", extractSpaces superscript <$> tok)
-  , ("textsubscript", extractSpaces subscript <$> tok)
-  , ("textbackslash", lit "\\")
-  , ("backslash", lit "\\")
-  , ("slash", lit "/")
-  , ("textbf", extractSpaces strong <$> tok)
-  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
-  , ("underline", underline <$> tok)
-  , ("ldots", lit "…")
-  , ("vdots", lit "\8942")
-  , ("dots", lit "…")
-  , ("mdots", lit "…")
-  , ("sim", lit "~")
-  , ("sep", lit ",")
-  , ("label", rawInlineOr "label" dolabel)
-  , ("ref", rawInlineOr "ref" $ doref "ref")
-  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
-  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
-  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
-  , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
-  , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
-  , ("lettrine", rawInlineOr "lettrine" lettrine)
-  , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
-  , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
-  , ("ensuremath", mathInline . untokenize <$> braced)
-  , ("texorpdfstring", const <$> tok <*> tok)
-  , ("P", lit "¶")
-  , ("S", lit "§")
-  , ("$", lit "$")
-  , ("%", lit "%")
-  , ("&", lit "&")
-  , ("#", lit "#")
-  , ("_", lit "_")
-  , ("{", lit "{")
-  , ("}", lit "}")
-  , ("qed", lit "\a0\x25FB")
-  -- old TeX commands
-  , ("em", extractSpaces emph <$> inlines)
-  , ("it", extractSpaces emph <$> inlines)
-  , ("sl", extractSpaces emph <$> inlines)
-  , ("bf", extractSpaces strong <$> inlines)
-  , ("tt", code . stringify . toList <$> inlines)
-  , ("rm", inlines)
-  , ("itshape", extractSpaces emph <$> inlines)
-  , ("slshape", extractSpaces emph <$> inlines)
-  , ("scshape", extractSpaces smallcaps <$> inlines)
-  , ("bfseries", extractSpaces strong <$> inlines)
-  , ("MakeUppercase", makeUppercase <$> tok)
-  , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
-  , ("uppercase", makeUppercase <$> tok)
-  , ("MakeLowercase", makeLowercase <$> tok)
-  , ("MakeTextLowercase", makeLowercase <$> tok)
-  , ("lowercase", makeLowercase <$> tok)
-  , ("/", pure mempty) -- italic correction
-  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
-                            guard $ not inTableCell
-                            optional rawopt
-                            spaces))
-  , (",", lit "\8198")
-  , ("@", pure mempty)
-  , (" ", lit "\160")
-  , ("ps", pure $ str "PS." <> space)
-  , ("TeX", lit "TeX")
-  , ("LaTeX", lit "LaTeX")
-  , ("bar", lit "|")
-  , ("textless", lit "<")
-  , ("textgreater", lit ">")
-  , ("thanks", skipopts >> note <$> grouped block)
-  , ("footnote", skipopts >> note <$> grouped block)
-  , ("passthrough", tok) -- \passthrough macro used by latex writer
-                         -- for listings
-  , ("verb", doverb)
-  , ("lstinline", dolstinline)
-  , ("mintinline", domintinline)
-  , ("Verb", doverb)
-  , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                  bracedUrl)
-  , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
-  , ("href", do url <- bracedUrl
-                sp
-                link (unescapeURL $ untokenize url) "" <$> tok)
-  , ("includegraphics", do options <- option [] keyvals
-                           src <- braced
-                           mkImage options . unescapeURL . removeDoubleQuotes $
-                               untokenize src)
-  , ("enquote*", enquote True Nothing)
-  , ("enquote", enquote False Nothing)
-  -- foreignquote is supposed to use native quote marks
-  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-  , ("foreignquote", braced >>= enquote False . Just . untokenize)
-  -- hypehnquote uses regular quotes
-  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
-  , ("figurename", doTerm Translations.Figure)
-  , ("prefacename", doTerm Translations.Preface)
-  , ("refname", doTerm Translations.References)
-  , ("bibname", doTerm Translations.Bibliography)
-  , ("chaptername", doTerm Translations.Chapter)
-  , ("partname", doTerm Translations.Part)
-  , ("contentsname", doTerm Translations.Contents)
-  , ("listfigurename", doTerm Translations.ListOfFigures)
-  , ("listtablename", doTerm Translations.ListOfTables)
-  , ("indexname", doTerm Translations.Index)
-  , ("abstractname", doTerm Translations.Abstract)
-  , ("tablename", doTerm Translations.Table)
-  , ("enclname", doTerm Translations.Encl)
-  , ("ccname", doTerm Translations.Cc)
-  , ("headtoname", doTerm Translations.To)
-  , ("pagename", doTerm Translations.Page)
-  , ("seename", doTerm Translations.See)
-  , ("seealsoname", doTerm Translations.SeeAlso)
-  , ("proofname", doTerm Translations.Proof)
-  , ("glossaryname", doTerm Translations.Glossary)
-  , ("lstlistingname", doTerm Translations.Listing)
-  , ("hyperlink", hyperlink)
-  , ("hypertarget", hypertargetInline)
-  -- glossaries package
-  , ("gls", doAcronym "short")
-  , ("Gls", doAcronym "short")
-  , ("glsdesc", doAcronym "long")
-  , ("Glsdesc", doAcronym "long")
-  , ("GLSdesc", doAcronym "long")
-  , ("acrlong", doAcronym "long")
-  , ("Acrlong", doAcronym "long")
-  , ("acrfull", doAcronym "full")
-  , ("Acrfull", doAcronym "full")
-  , ("acrshort", doAcronym "abbrv")
-  , ("Acrshort", doAcronym "abbrv")
-  , ("glspl", doAcronymPlural "short")
-  , ("Glspl", doAcronymPlural "short")
-  , ("glsdescplural", doAcronymPlural "long")
-  , ("Glsdescplural", doAcronymPlural "long")
-  , ("GLSdescplural", doAcronymPlural "long")
-  -- acronyms package
-  , ("ac", doAcronym "short")
-  , ("acf", doAcronym "full")
-  , ("acs", doAcronym "abbrv")
-  , ("acl", doAcronym "long")
-  , ("acp", doAcronymPlural "short")
-  , ("acfp", doAcronymPlural "full")
-  , ("acsp", doAcronymPlural "abbrv")
-  , ("aclp", doAcronymPlural "long")
-  , ("Ac", doAcronym "short")
-  , ("Acf", doAcronym "full")
-  , ("Acs", doAcronym "abbrv")
-  , ("Acl", doAcronym "long")
-  , ("Acp", doAcronymPlural "short")
-  , ("Acfp", doAcronymPlural "full")
-  , ("Acsp", doAcronymPlural "abbrv")
-  , ("Aclp", doAcronymPlural "long")
-  -- hyphenat
-  , ("bshyp", lit "\\\173")
-  , ("fshyp", lit "/\173")
-  , ("dothyp", lit ".\173")
-  , ("colonhyp", lit ":\173")
-  , ("hyp", lit "-")
-  , ("nohyphens", tok)
-  , ("textnhtt", ttfamily)
-  , ("nhttfamily", ttfamily)
-  -- LaTeX colors
-  , ("textcolor", coloredInline "color")
-  , ("colorbox", coloredInline "background-color")
-  -- fontawesome
-  , ("faCheck", lit "\10003")
-  , ("faClose", lit "\10007")
-  -- xspace
-  , ("xspace", doxspace)
-  -- etoolbox
-  , ("ifstrequal", ifstrequal)
-  , ("newtoggle", braced >>= newToggle)
-  , ("toggletrue", braced >>= setToggle True)
-  , ("togglefalse", braced >>= setToggle False)
-  , ("iftoggle", try $ ifToggle >> inline)
-  -- biblatex misc
-  , ("RN", romanNumeralUpper)
-  , ("Rn", romanNumeralLower)
-  -- babel
-  , ("foreignlanguage", foreignlanguage)
-  -- include
-  , ("input", rawInlineOr "input" $ include "input")
-  -- soul package
-  , ("ul", underline <$> tok)
-  -- ulem package
-  , ("uline", underline <$> tok)
-  -- plain tex stuff that should just be passed through as raw tex
-  , ("ifdim", ifdim)
-  -- bibtex
-  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-  , ("mkbibparens",
-       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-  , ("mkbibbrackets",
-       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-  , ("autocap", spanWith nullAttr <$> tok)
-  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-  , ("bibstring",
-       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-         <$> braced)
-  , ("adddot", pure (str "."))
-  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-  , ("addabbrvspace", pure space)
-  , ("hyphen", pure (str "-"))
-  ]
+inlineCommands = M.unions
+  [ inlineLanguageCommands
+  , accentCommands tok
+  , citationCommands inline
+  , siunitxCommands tok
+  , acronymCommands
+  , refCommands
+  , nameCommands
+  , verbCommands
+  , charCommands
+  , rest ]
+ where
+  rest = M.fromList
+    [ ("emph", extractSpaces emph <$> tok)
+    , ("textit", extractSpaces emph <$> tok)
+    , ("textsl", extractSpaces emph <$> tok)
+    , ("textsc", extractSpaces smallcaps <$> tok)
+    , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+    , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+    , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+    , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+    , ("texttt", ttfamily)
+    , ("sout", extractSpaces strikeout <$> tok)
+    , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+    , ("lq", return (str "‘"))
+    , ("rq", return (str "’"))
+    , ("textquoteleft", return (str "‘"))
+    , ("textquoteright", return (str "’"))
+    , ("textquotedblleft", return (str "“"))
+    , ("textquotedblright", return (str "”"))
+    , ("textsuperscript", extractSpaces superscript <$> tok)
+    , ("textsubscript", extractSpaces subscript <$> tok)
+    , ("textbf", extractSpaces strong <$> tok)
+    , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+    , ("underline", underline <$> tok)
+    , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+    , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+    , ("lettrine", rawInlineOr "lettrine" lettrine)
+    , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+    , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+    , ("ensuremath", mathInline . untokenize <$> braced)
+    , ("texorpdfstring", const <$> tok <*> tok)
+    -- old TeX commands
+    , ("em", extractSpaces emph <$> inlines)
+    , ("it", extractSpaces emph <$> inlines)
+    , ("sl", extractSpaces emph <$> inlines)
+    , ("bf", extractSpaces strong <$> inlines)
+    , ("tt", code . stringify . toList <$> inlines)
+    , ("rm", inlines)
+    , ("itshape", extractSpaces emph <$> inlines)
+    , ("slshape", extractSpaces emph <$> inlines)
+    , ("scshape", extractSpaces smallcaps <$> inlines)
+    , ("bfseries", extractSpaces strong <$> inlines)
+    , ("MakeUppercase", makeUppercase <$> tok)
+    , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+    , ("uppercase", makeUppercase <$> tok)
+    , ("MakeLowercase", makeLowercase <$> tok)
+    , ("MakeTextLowercase", makeLowercase <$> tok)
+    , ("lowercase", makeLowercase <$> tok)
+    , ("thanks", skipopts >> note <$> grouped block)
+    , ("footnote", skipopts >> note <$> grouped block)
+    , ("passthrough", tok) -- \passthrough macro used by latex writer
+                           -- for listings
+    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+                    bracedUrl)
+    , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+    , ("href", do url <- bracedUrl
+                  sp
+                  link (unescapeURL $ untokenize url) "" <$> tok)
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
+    , ("enquote*", enquote True Nothing)
+    , ("enquote", enquote False Nothing)
+    -- foreignquote is supposed to use native quote marks
+    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+    , ("foreignquote", braced >>= enquote False . Just . untokenize)
+    -- hypehnquote uses regular quotes
+    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
+    , ("hyperlink", hyperlink)
+    , ("hypertarget", hypertargetInline)
+    -- hyphenat
+    , ("nohyphens", tok)
+    , ("textnhtt", ttfamily)
+    , ("nhttfamily", ttfamily)
+    -- LaTeX colors
+    , ("textcolor", coloredInline "color")
+    , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
+    -- etoolbox
+    , ("ifstrequal", ifstrequal)
+    , ("newtoggle", braced >>= newToggle)
+    , ("toggletrue", braced >>= setToggle True)
+    , ("togglefalse", braced >>= setToggle False)
+    , ("iftoggle", try $ ifToggle >> inline)
+    -- biblatex misc
+    , ("RN", romanNumeralUpper)
+    , ("Rn", romanNumeralLower)
+    -- babel
+    , ("foreignlanguage", foreignlanguage)
+    -- include
+    , ("input", rawInlineOr "input" $ include "input")
+    -- soul package
+    , ("ul", underline <$> tok)
+    -- ulem package
+    , ("uline", underline <$> tok)
+    -- plain tex stuff that should just be passed through as raw tex
+    , ("ifdim", ifdim)
+    -- bibtex
+    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+    , ("mkbibparens",
+         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+    , ("mkbibbrackets",
+         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+    , ("autocap", spanWith nullAttr <$> tok)
+    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+    , ("bibstring",
+         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+           <$> braced)
+    , ("adddot", pure (str "."))
+    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+    , ("addabbrvspace", pure space)
+    , ("hyphen", pure (str "-"))
+    ]
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
@@ -766,9 +597,6 @@ ifToggle = do
                   report $ UndefinedToggle name' pos
   return ()
 
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
 ifstrequal :: (PandocMonad m, Monoid a) => LP m a
 ifstrequal = do
   str1 <- tok
@@ -789,13 +617,6 @@ coloredInline stylename = do
 ttfamily :: PandocMonad m => LP m Inlines
 ttfamily = code . stringify . toList <$> tok
 
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
-  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
-  if parseRaw
-     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
-     else fallback
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
@@ -846,25 +667,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
-  v <- braced
-  let refstr = untokenize v
-  updateState $ \st ->
-    st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
-  v <- braced
-  let refstr = untokenize v
-  return $ linkWith ("",[],[ ("reference-type", cls)
-                           , ("reference", refstr)])
-                    ("#" <> refstr)
-                    ""
-                    (inBrackets $ str refstr)
-
 lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
 lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..66014a77f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+  ( acronymCommands
+  , verbCommands
+  , charCommands
+  , nameCommands
+  , refCommands
+  , rawInlineOr
+  , listingsLanguage
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional)
+import Control.Monad (guard, mzero, mplus)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+                            manyTill, getInput, setInput, incSourceColumn,
+                            option)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+  if parseRaw
+     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+     else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+  v <- braced
+  let refstr = untokenize v
+  updateState $ \st ->
+    st{ sLastLabel = Just refstr }
+  return $ spanWith (refstr,[],[("label", refstr)])
+    $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+  v <- braced
+  let refstr = untokenize v
+  return $ linkWith ("",[],[ ("reference-type", cls)
+                           , ("reference", refstr)])
+                    ("#" <> refstr)
+                    ""
+                    (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  withVerbatimMode $
+    code . untokenize <$>
+      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+  t@(Tok pos toktype txt) <- anyTok
+  case T.findIndex (== stopchar) txt of
+       Nothing -> return t
+       Just i  -> do
+         let (t1, t2) = T.splitAt i txt
+         inp <- getInput
+         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+         return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+  case lookup "language" opts of
+    Nothing  -> Nothing
+    Just l   -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+  options <- option [] keyvals
+  let classes = maybeToList $ listingsLanguage options
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- untokenize <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  let stopchar = if marker == '{' then '}' else marker
+  withVerbatimMode $
+    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+      manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x    = x
+
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+  [ ("verb", doverb)
+  , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
+  , ("Verb", doverb)
+  ]
+
+
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+  [ ("ldots", lit "…")
+  , ("vdots", lit "\8942")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("sep", lit ",")
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  , ("qed", lit "\a0\x25FB")
+  , ("/", pure mempty) -- italic correction
+  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+                            guard $ not inTableCell
+                            optional rawopt
+                            spaces))
+  , (",", lit "\8198")
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("slash", lit "/")
+  -- fontawesome
+  , ("faCheck", lit "\10003")
+  , ("faClose", lit "\10007")
+  -- hyphenat
+  , ("bshyp", lit "\\\173")
+  , ("fshyp", lit "/\173")
+  , ("dothyp", lit ".\173")
+  , ("colonhyp", lit ":\173")
+  , ("hyp", lit "-")
+  ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+  [ ("figurename", doTerm Translations.Figure)
+  , ("prefacename", doTerm Translations.Preface)
+  , ("refname", doTerm Translations.References)
+  , ("bibname", doTerm Translations.Bibliography)
+  , ("chaptername", doTerm Translations.Chapter)
+  , ("partname", doTerm Translations.Part)
+  , ("contentsname", doTerm Translations.Contents)
+  , ("listfigurename", doTerm Translations.ListOfFigures)
+  , ("listtablename", doTerm Translations.ListOfTables)
+  , ("indexname", doTerm Translations.Index)
+  , ("abstractname", doTerm Translations.Abstract)
+  , ("tablename", doTerm Translations.Table)
+  , ("enclname", doTerm Translations.Encl)
+  , ("ccname", doTerm Translations.Cc)
+  , ("headtoname", doTerm Translations.To)
+  , ("pagename", doTerm Translations.Page)
+  , ("seename", doTerm Translations.See)
+  , ("seealsoname", doTerm Translations.SeeAlso)
+  , ("proofname", doTerm Translations.Proof)
+  , ("glossaryname", doTerm Translations.Glossary)
+  , ("lstlistingname", doTerm Translations.Listing)
+  ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+  [ ("label", rawInlineOr "label" dolabel)
+  , ("ref", rawInlineOr "ref" $ doref "ref")
+  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
+  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
+  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+  -- glossaries package
+  [ ("gls", doAcronym "short")
+  , ("Gls", doAcronym "short")
+  , ("glsdesc", doAcronym "long")
+  , ("Glsdesc", doAcronym "long")
+  , ("GLSdesc", doAcronym "long")
+  , ("acrlong", doAcronym "long")
+  , ("Acrlong", doAcronym "long")
+  , ("acrfull", doAcronym "full")
+  , ("Acrfull", doAcronym "full")
+  , ("acrshort", doAcronym "abbrv")
+  , ("Acrshort", doAcronym "abbrv")
+  , ("glspl", doAcronymPlural "short")
+  , ("Glspl", doAcronymPlural "short")
+  , ("glsdescplural", doAcronymPlural "long")
+  , ("Glsdescplural", doAcronymPlural "long")
+  , ("GLSdescplural", doAcronymPlural "long")
+  -- acronyms package
+  , ("ac", doAcronym "short")
+  , ("acf", doAcronym "full")
+  , ("acs", doAcronym "abbrv")
+  , ("acl", doAcronym "long")
+  , ("acp", doAcronymPlural "short")
+  , ("acfp", doAcronymPlural "full")
+  , ("acsp", doAcronymPlural "abbrv")
+  , ("aclp", doAcronymPlural "long")
+  , ("Ac", doAcronym "short")
+  , ("Acf", doAcronym "full")
+  , ("Acs", doAcronym "abbrv")
+  , ("Acl", doAcronym "long")
+  , ("Acp", doAcronymPlural "short")
+  , ("Acfp", doAcronymPlural "full")
+  , ("Acsp", doAcronymPlural "abbrv")
+  , ("Aclp", doAcronymPlural "long")
+  ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+  acro <- braced
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "singular+" <> form)])
+    $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+  acro <- braced
+  let plural = str "s"
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "plural+" <> form)]) $
+   mconcat [str $ untokenize acro, plural]]
+
+
-- 
cgit v1.2.3


From 044bc44fc621e421b74367765022f108494b4e2e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:08:02 -0800
Subject: Moved more into T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 84 +++------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 65 +++++++++++++++++++++++--
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 30 +++++++++++-
 3 files changed, 97 insertions(+), 82 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a27135fd2..4ec038b94 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -34,7 +34,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
+import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
@@ -58,11 +58,12 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
 import Text.Pandoc.Shared
@@ -350,8 +351,7 @@ unescapeURL = T.concat . go . T.splitOn "\\"
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands = M.unions
-  [ inlineLanguageCommands
-  , accentCommands tok
+  [ accentCommands tok
   , citationCommands inline
   , siunitxCommands tok
   , acronymCommands
@@ -359,6 +359,8 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , inlineLanguageCommands tok
+  , biblatexInlineCommands tok
   , rest ]
  where
   rest = M.fromList
@@ -373,12 +375,6 @@ inlineCommands = M.unions
     , ("texttt", ttfamily)
     , ("sout", extractSpaces strikeout <$> tok)
     , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-    , ("lq", return (str "‘"))
-    , ("rq", return (str "’"))
-    , ("textquoteleft", return (str "‘"))
-    , ("textquoteright", return (str "’"))
-    , ("textquotedblleft", return (str "“"))
-    , ("textquotedblright", return (str "”"))
     , ("textsuperscript", extractSpaces superscript <$> tok)
     , ("textsubscript", extractSpaces subscript <$> tok)
     , ("textbf", extractSpaces strong <$> tok)
@@ -447,11 +443,6 @@ inlineCommands = M.unions
     , ("toggletrue", braced >>= setToggle True)
     , ("togglefalse", braced >>= setToggle False)
     , ("iftoggle", try $ ifToggle >> inline)
-    -- biblatex misc
-    , ("RN", romanNumeralUpper)
-    , ("Rn", romanNumeralLower)
-    -- babel
-    , ("foreignlanguage", foreignlanguage)
     -- include
     , ("input", rawInlineOr "input" $ include "input")
     -- soul package
@@ -460,24 +451,6 @@ inlineCommands = M.unions
     , ("uline", underline <$> tok)
     -- plain tex stuff that should just be passed through as raw tex
     , ("ifdim", ifdim)
-    -- bibtex
-    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-    , ("mkbibparens",
-         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-    , ("mkbibbrackets",
-         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-    , ("autocap", spanWith nullAttr <$> tok)
-    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-    , ("bibstring",
-         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-           <$> braced)
-    , ("adddot", pure (str "."))
-    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-    , ("addabbrvspace", pure space)
-    , ("hyphen", pure (str "-"))
     ]
 
 lettrine :: PandocMonad m => LP m Inlines
@@ -502,26 +475,6 @@ alterStr :: (Text -> Text) -> Inline -> Inline
 alterStr f (Str xs) = Str (f xs)
 alterStr _ x = x
 
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
-  babelLang <- untokenize <$> braced
-  case babelLangToBCP47 babelLang of
-       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
-       _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
-  where
-    mk (polyglossia, bcp47Func) =
-      ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  let lang = renderLang $ bcp47Func o
-  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
 hyperlink :: PandocMonad m => LP m Inlines
 hyperlink = try $ do
   src <- untokenize <$> braced
@@ -542,31 +495,6 @@ hypertargetInline = try $ do
   ils <- grouped inline
   return $ spanWith (ref, [], []) ils
 
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
-  str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
-  str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
-  where
-    inBraces = do
-      symbol '{'
-      spaces
-      res <- parser
-      spaces
-      symbol '}'
-      return res
-    parser = do
-      s <- untokenize <$> many1 (satisfyTok isWordTok)
-      let (digits, rest) = T.span isDigit s
-      unless (T.null rest) $
-        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
-      safeRead digits
-
 newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
 newToggle name = do
   updateState $ \st ->
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 66014a77f..37c29188e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -13,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   , verbCommands
   , charCommands
   , nameCommands
+  , biblatexInlineCommands
   , refCommands
   , rawInlineOr
   , listingsLanguage
@@ -23,15 +24,17 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
-import Control.Applicative (optional)
-import Control.Monad (guard, mzero, mplus)
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option)
+                            option, many1)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -127,6 +130,31 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+  str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+  str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+  where
+    inBraces = do
+      symbol '{'
+      spaces
+      res <- parser
+      spaces
+      symbol '}'
+      return res
+    parser = do
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
+      let (digits, rest) = T.span isDigit s
+      unless (T.null rest) $
+        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+      safeRead digits
+
 
 
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -157,6 +185,12 @@ charCommands = M.fromList
   , ("{", lit "{")
   , ("}", lit "}")
   , ("qed", lit "\a0\x25FB")
+  , ("lq", return (str "‘"))
+  , ("rq", return (str "’"))
+  , ("textquoteleft", return (str "‘"))
+  , ("textquoteright", return (str "’"))
+  , ("textquotedblleft", return (str "“"))
+  , ("textquotedblright", return (str "”"))
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
@@ -185,6 +219,31 @@ charCommands = M.fromList
   , ("hyp", lit "-")
   ]
 
+biblatexInlineCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+  -- biblatex misc
+  [ ("RN", romanNumeralUpper)
+  , ("Rn", romanNumeralLower)
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
+  ]
+
 nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 nameCommands = M.fromList
   [ ("figurename", doTerm Translations.Figure)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index adbeaa6d4..24acbdbe4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,15 +15,43 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , inlineLanguageCommands
   )
 where
 import qualified Data.Map as M
+import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, setMeta, str)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+  babelLang <- untokenize <$> braced
+  case babelLangToBCP47 babelLang of
+       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
+       _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+  M.fromList $
+    ("foreignlanguage", foreignlanguage tok) :
+    (mk <$> M.toList polyglossiaLangToBCP47)
+  where
+    mk (polyglossia, bcp47Func) =
+      ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+               => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  let lang = renderLang $ bcp47Func o
+  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
 
 setDefaultLanguage :: PandocMonad m => LP m Blocks
 setDefaultLanguage = do
-- 
cgit v1.2.3


From da5e9e5956aae3ac83edef7831939553360b8964 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:22:42 -0800
Subject: Move enquote commands to T.P.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 22 ++-------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs |  2 --
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 34 +++++++++++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4ec038b94..a4261bbeb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -59,6 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+                                       enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
@@ -293,18 +294,6 @@ quoted' f starter ender = do
 lit :: Text -> LP m Inlines
 lit = pure . str
 
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
-  skipopts
-  let lang = mblang >>= babelLangToBCP47
-  let langspan = case lang of
-                      Nothing -> id
-                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
-  quoteContext <- sQuoteContext <$> getState
-  if starred || quoteContext == InDoubleQuote
-     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
-
 blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
@@ -359,6 +348,7 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , enquoteCommands tok
   , inlineLanguageCommands tok
   , biblatexInlineCommands tok
   , rest ]
@@ -418,14 +408,6 @@ inlineCommands = M.unions
                              src <- braced
                              mkImage options . unescapeURL . removeDoubleQuotes $
                                  untokenize src)
-    , ("enquote*", enquote True Nothing)
-    , ("enquote", enquote False Nothing)
-    -- foreignquote is supposed to use native quote marks
-    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-    , ("foreignquote", braced >>= enquote False . Just . untokenize)
-    -- hypehnquote uses regular quotes
-    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 37c29188e..8bdff58f7 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -155,8 +155,6 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
-
-
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 24acbdbe4..08e217bdb 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , enquoteCommands
   , inlineLanguageCommands
   )
 where
@@ -25,8 +26,37 @@ import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+                            withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+                            singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+        => LP m Inlines
+        -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+  skipopts
+  let lang = mblang >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
+  quoteContext <- sQuoteContext <$> getState
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+                => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+  [ ("enquote*", enquote tok True Nothing)
+  , ("enquote", enquote tok False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+  ]
 
 foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
 foreignlanguage tok = do
-- 
cgit v1.2.3


From 33e4c8dd6c2bbc8109880f43b379d074ceb38391 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:21:32 -0800
Subject: Remove T.P.Readers.LaTeX.Accent.

Incorporate accentCommands into T.P.Readers.LaTeX.Inline.
---
 pandoc.cabal                            |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 ---------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 71 ++++++++++++++++++++++++++++--
 4 files changed, 69 insertions(+), 83 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 211327642..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a4261bbeb..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -49,7 +49,6 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        inlineEnvironment,
@@ -64,6 +63,7 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         accentCommands,
                                          biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
deleted file mode 100644
index f8c53491c..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Accent.hs
+++ /dev/null
@@ -1,78 +0,0 @@
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE OverloadedStrings #-}
-module Text.Pandoc.Readers.LaTeX.Accent
-  ( accentCommands )
-where
-
-import Text.Pandoc.Class
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Builder as B
-import qualified Data.Map as M
-import Data.Text (Text)
-import Data.Maybe (fromMaybe)
-import Text.Pandoc.Parsing
-import qualified Data.Text as T
-import qualified Data.Text.Normalize as Normalize
-
-accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
-accentCommands tok =
-  let accent = accentWith tok
-      lit = pure . str
-  in M.fromList
-  [ ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
-  ]
-
-accentWith :: PandocMonad m
-           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
-accentWith tok combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 8bdff58f7..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Inline
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
@@ -12,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   ( acronymCommands
   , verbCommands
   , charCommands
+  , accentCommands
   , nameCommands
   , biblatexInlineCommands
   , refCommands
@@ -33,11 +35,12 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1)
+                            option, many1, try)
 import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
-import Data.Maybe (maybeToList)
+import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
 import qualified Text.Pandoc.Translations as Translations
 
 rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
@@ -155,6 +158,22 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space] -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       []      -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       _       -> return ils
+
+
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
@@ -163,7 +182,53 @@ verbCommands = M.fromList
   , ("Verb", doverb)
   ]
 
-
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+  in  M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
 
 charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 charCommands = M.fromList
-- 
cgit v1.2.3


From b569b0226d4bd5e0699077089d54fb03d4394b7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:47:17 -0800
Subject: Add T.P.Readers.LaTeX.Include.

---
 pandoc.cabal                             |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 ++++-----------------------
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 ++++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +++++++-
 4 files changed, 87 insertions(+), 52 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 090c28287..11d34a19a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,6 +626,7 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
+                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 552411db8..dd6c2a1fa 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,9 +38,8 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report,
-                                      setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
+                                      report, setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -61,6 +60,8 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
+                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -235,19 +236,10 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -406,8 +398,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL . removeDoubleQuotes $
-                                 untokenize src)
+                             mkImage options . unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -417,8 +409,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -698,39 +688,6 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
new file mode 100644
index 000000000..618a89284
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Include.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Include
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Include
+  ( readFileFromTexinputs
+  , insertIncluded
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Shared (splitTextBy)
+import System.FilePath (takeExtension, addExtension)
+import Control.Monad (when)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParseError))
+import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
+import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
+                            getPosition, addIncludeFile, getIncludeFiles,
+                            dropLatestIncludeFile)
+import Data.Maybe (fromMaybe)
+
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 7b8bca4af..3b37ee50e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try)
-import Data.Char (isDigit)
+                            option, many1, try, lookAhead)
+import Data.Char (isDigit, isLetter)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,6 +50,15 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -280,6 +289,8 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
+  -- xspace
+  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From 92ea8a0cb63241dbc8f89e73a359ac5efca2ab87 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 19:07:16 -0800
Subject: Revert "Add T.P.Readers.LaTeX.Include."

This reverts commit b569b0226d4bd5e0699077089d54fb03d4394b7d.

Memory usage improvement in compilation wasn't very significant.
---
 pandoc.cabal                             |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 +++++++++++++++++++++++----
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 --------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +-------
 4 files changed, 52 insertions(+), 87 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 11d34a19a..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index dd6c2a1fa..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,8 +38,9 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      report, setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParsecError))
+                                      readFileFromDirs, report,
+                                      setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -60,8 +61,6 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
-import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
-                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -236,10 +235,19 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -398,8 +406,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL .
-                               removeDoubleQuotes $ untokenize src)
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -409,6 +417,8 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -688,6 +698,39 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
deleted file mode 100644
index 618a89284..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Include.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings     #-}
-{- |
-   Module      : Text.Pandoc.Readers.LaTeX.Include
-   Copyright   : Copyright (C) 2006-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
--}
-module Text.Pandoc.Readers.LaTeX.Include
-  ( readFileFromTexinputs
-  , insertIncluded
-  )
-where
-
-import qualified Data.Map as M
-import Data.Text (Text)
-import qualified Data.Text as T
-import Text.Pandoc.Shared (splitTextBy)
-import System.FilePath (takeExtension, addExtension)
-import Control.Monad (when)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error (PandocError(PandocParseError))
-import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
-import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
-                            getPosition, addIncludeFile, getIncludeFiles,
-                            dropLatestIncludeFile)
-import Data.Maybe (fromMaybe)
-
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 3b37ee50e..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try, lookAhead)
-import Data.Char (isDigit, isLetter)
+                            option, many1, try)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,15 +50,6 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -289,8 +280,6 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
-  -- xspace
-  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From e461b7dd45f717f3317216c7d3207a1d24bf1c85 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:01:40 -0800
Subject: Relax `--abbreviations` rules so that a period isn't required.

Partially addresses #7124.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 52b82a3d0..667a784e0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings ending in a period that are found in this list will
-    be followed by a nonbreaking space, so that the period will
-    not produce sentence-ending space in formats like LaTeX.
+    Strings found in this list will be followed by a nonbreaking
+    space, and the period will not produce sentence-ending space
+    in formats like LaTeX.  The strings may not contain spaces.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b1b99dfe5..34edbcc17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From 916ce4d51121e0529b938fda71f37e947882abe5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:22:08 -0800
Subject: Revert "Relax `--abbreviations` rules so that a period isn't
 required."

This reverts commit e461b7dd45f717f3317216c7d3207a1d24bf1c85.

Ill-advised change.  This doesn't work because we parse
strings in chunks.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 667a784e0..52b82a3d0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings found in this list will be followed by a nonbreaking
-    space, and the period will not produce sentence-ending space
-    in formats like LaTeX.  The strings may not contain spaces.
+    Strings ending in a period that are found in this list will
+    be followed by a nonbreaking space, so that the period will
+    not produce sentence-ending space in formats like LaTeX.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34edbcc17..b1b99dfe5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if result `Set.member` abbrevs
+      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From 030209fc292bd51700189653fd38ae5b2a723ef1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:25:13 -0800
Subject: Revert "Revert "Relax `--abbreviations` rules so that a period isn't
 required.

This reverts commit 916ce4d51121e0529b938fda71f37e947882abe5.

I was confused in thinking it wouldn't work.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 52b82a3d0..667a784e0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings ending in a period that are found in this list will
-    be followed by a nonbreaking space, so that the period will
-    not produce sentence-ending space in formats like LaTeX.
+    Strings found in this list will be followed by a nonbreaking
+    space, and the period will not produce sentence-ending space
+    in formats like LaTeX.  The strings may not contain spaces.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b1b99dfe5..34edbcc17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From c652dcc16b208954ab30a27b1c72a889b20ef2e9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 7 Mar 2021 13:21:45 -0800
Subject: LaTeX reader: support hyperref command.

Closes #7127.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 552411db8..ceac261d2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -398,17 +398,20 @@ inlineCommands = M.unions
     , ("footnote", skipopts >> note <$> grouped block)
     , ("passthrough", tok) -- \passthrough macro used by latex writer
                            -- for listings
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options .
+                               unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
+    -- hyperref
     , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
                     bracedUrl)
     , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
     , ("href", do url <- bracedUrl
                   sp
                   link (unescapeURL $ untokenize url) "" <$> tok)
-    , ("includegraphics", do options <- option [] keyvals
-                             src <- braced
-                             mkImage options . unescapeURL . removeDoubleQuotes $
-                                 untokenize src)
     , ("hyperlink", hyperlink)
+    , ("hyperref", hyperref)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
     , ("nohyphens", tok)
@@ -463,6 +466,12 @@ hyperlink = try $ do
   lab <- tok
   return $ link ("#" <> src) "" lab
 
+hyperref :: PandocMonad m => LP m Inlines
+hyperref = try $ do
+  url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp))
+       <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl)
+  link url "" <$> tok
+
 hypertargetBlock :: PandocMonad m => LP m Blocks
 hypertargetBlock = try $ do
   ref <- untokenize <$> braced
-- 
cgit v1.2.3


From 5aa73bd0a2820a0c89b5990dbe53abfdd5ade32d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 7 Mar 2021 15:49:02 -0800
Subject: LaTeX reader: handle table cells containing `&` in `\verb`.

Closes #7129.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs |  7 ++++++-
 test/command/7129.md                   | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7129.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 2ea9caf58..7833da081 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -134,6 +134,11 @@ parseTableRow :: PandocMonad m
               -> LP m Row
 parseTableRow blocks inline envname prefsufs = do
   notFollowedBy (spaces *> end_ envname)
+  -- contexts that can contain & that is not colsep:
+  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
+      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
+      canContainAmp _       = False
   -- add prefixes and suffixes in token stream:
   let celltoks (pref, suff) = do
         prefpos <- getPosition
@@ -142,7 +147,7 @@ parseTableRow blocks inline envname prefsufs = do
                      ((lookAhead (controlSeq "parbox") >>
                        void blocks) -- #5711
                       <|>
-                      (lookAhead (controlSeq "begin") >> void inline)
+                      (lookAhead (satisfyTok canContainAmp) >> void inline)
                       <|>
                       (lookAhead (symbol '$') >> void inline))
                   <|>
diff --git a/test/command/7129.md b/test/command/7129.md
new file mode 100644
index 000000000..fef4ca2c3
--- /dev/null
+++ b/test/command/7129.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f latex -t native
+\begin{tabular}{ll} \hline
+  FOO & BAR         \\ \hline
+  foo & \verb|b&r|  \\ \hline
+\end{tabular}
+^D
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignLeft,ColWidthDefault)
+ ,(AlignLeft,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "FOO"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "BAR"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "foo"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Code ("",[],[]) "b&r"]]]])]
+ (TableFoot ("",[],[])
+ [])]
+```
-- 
cgit v1.2.3


From 1c23e3a824bc4d850a908193443ad1915a3ebc61 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 9 Mar 2021 13:03:27 -0800
Subject: RST reader: fix logic for ending comments.

Previously comments sometimes got extended too far.  Closes #7134.
---
 src/Text/Pandoc/Readers/RST.hs |  3 ++-
 test/command/7134.md           | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7134.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 29f81b046..514e3b88d 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -613,8 +613,9 @@ comment = try $ do
   string ".."
   skipMany1 spaceChar <|> (() <$ lookAhead newline)
   -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
-  manyTill anyChar blanklines
+  _ <- anyLine
   optional indentedBlock
+  optional blanklines
   return mempty
 
 directiveLabel :: Monad m => RSTParser m Text
diff --git a/test/command/7134.md b/test/command/7134.md
new file mode 100644
index 000000000..c69ae4bd0
--- /dev/null
+++ b/test/command/7134.md
@@ -0,0 +1,16 @@
+```
+% pandoc -f rst -t native
+This is a paragraph.
+
+    This is a block quote.
+
+..
+
+    This should be a second block quote.
+^D
+[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph."]
+,BlockQuote
+ [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."]]
+,BlockQuote
+ [Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "second",Space,Str "block",Space,Str "quote."]]]
+```
-- 
cgit v1.2.3


From 00e8d0678edac34af7a412642fbf7d85442b15aa Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Mar 2021 14:28:30 +0100
Subject: Jira reader: mark divs created from panels with class "panel".

Closes: tarleb/jira-wiki-markup#2
---
 src/Text/Pandoc/Readers/Jira.hs | 4 ++--
 test/Tests/Readers/Jira.hs      | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 5a5d0ee1e..89aecbf56 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -71,10 +71,10 @@ toPandocCodeBlocks langMay params txt =
                   Nothing                   -> []
   in codeBlockWith ("", classes, map paramToPair params) txt
 
--- | Create a pandoc @'Div'@
+-- | Create a pandoc @'Div'@ from a panel.
 toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks
 toPandocDiv params =
-  divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks
+  divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks
 
 paramToPair :: Jira.Parameter -> (Text, Text)
 paramToPair (Jira.Parameter key value) = (key, value)
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index 189b504f7..cb7dde4ea 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -96,6 +96,12 @@ tests =
       simpleTable [para "Name"] [[para "Test"]]
     ]
 
+  , testGroup "panel"
+    [ "simple panel" =:
+      "{panel}\nInterviewer: Jane Doe{panel}\n" =?>
+      divWith ("", ["panel"], []) (para "Interviewer: Jane Doe")
+    ]
+
   , testGroup "inlines"
     [ "emphasis" =:
       "*quid pro quo*" =?>
-- 
cgit v1.2.3


From 35b66a76718205c303f416bf0afc01c098e8a171 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Mar 2021 12:50:44 -0800
Subject: MediaWiki reader: Allow block-level content in notes (ref).

Closes #7145.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++++++-
 test/command/7145.md                 | 12 ++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7145.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index cdb746c67..9f4d5e170 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -112,12 +112,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
 isBlockTag' :: Tag Text -> Bool
 isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
+isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
 isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
 isBlockTag' tag = isBlockTag tag
 
 isInlineTag' :: Tag Text -> Bool
 isInlineTag' (TagComment _) = True
+isInlineTag' (TagClose "ref") = False -- see below inlineTag
 isInlineTag' t              = not (isBlockTag' t)
 
 eitherBlockOrInline :: [Text]
@@ -554,11 +556,17 @@ variable = try $ do
   contents <- manyTillChar anyChar (try $ string "}}}")
   return $ "{{{" <> contents <> "}}}"
 
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain bs =
+  case B.toList bs of
+    [Para ils] -> B.fromList [Plain ils]
+    _ -> bs
+
 inlineTag :: PandocMonad m => MWParser m Inlines
 inlineTag = do
   (tag, _) <- lookAhead $ htmlTag isInlineTag'
   case tag of
-       TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+       TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
        TagOpen "nowiki" _ -> try $ do
           (_,raw) <- htmlTag (~== tag)
           if T.any (== '/') raw
diff --git a/test/command/7145.md b/test/command/7145.md
new file mode 100644
index 000000000..a04345890
--- /dev/null
+++ b/test/command/7145.md
@@ -0,0 +1,12 @@
+```
+% pandoc -f mediawiki -t native
+Maecenas at sapien tempor, pretium turpis ut, imperdiet augue.<ref>This is a multiline
+
+reference
+<i>with</i>
+empty
+
+linebreaks</ref>  Nulla ut massa eget ex venenatis lobortis id in eros.
+^D
+[Para [Str "Maecenas",Space,Str "at",Space,Str "sapien",Space,Str "tempor,",Space,Str "pretium",Space,Str "turpis",Space,Str "ut,",Space,Str "imperdiet",Space,Str "augue.",Note [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "multiline"],Para [Str "reference",SoftBreak,RawInline (Format "html") "<i>",Str "with",RawInline (Format "html") "</i>",SoftBreak,Str "empty"],Para [Str "linebreaks"]],Space,Str "Nulla",Space,Str "ut",Space,Str "massa",Space,Str "eget",Space,Str "ex",Space,Str "venenatis",Space,Str "lobortis",Space,Str "id",Space,Str "in",Space,Str "eros."]]
+```
-- 
cgit v1.2.3


From 24191a2a278c0dec30bacd66b78cbb8cc8d91324 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 15 Mar 2021 10:37:35 -0700
Subject: Use foldl' instead of foldl everywhere.

---
 src/Text/Pandoc/App/CommandLineOptions.hs           | 4 ++--
 src/Text/Pandoc/Citeproc/Locator.hs                 | 3 ++-
 src/Text/Pandoc/Class/PandocMonad.hs                | 3 ++-
 src/Text/Pandoc/Extensions.hs                       | 3 ++-
 src/Text/Pandoc/Lua/Filter.hs                       | 3 ++-
 src/Text/Pandoc/Readers/Docx/Combine.hs             | 4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs       | 1 +
 src/Text/Pandoc/Readers/Markdown.hs                 | 6 +++---
 src/Text/Pandoc/Readers/Odt/Arrows/State.hs         | 4 ++--
 src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 4 ++--
 src/Text/Pandoc/Readers/Odt/StyleReader.hs          | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs                  | 4 ++--
 src/Text/Pandoc/Shared.hs                           | 4 ++--
 src/Text/Pandoc/Writers/HTML.hs                     | 8 ++++----
 src/Text/Pandoc/Writers/RST.hs                      | 4 ++--
 src/Text/Pandoc/Writers/Texinfo.hs                  | 4 ++--
 16 files changed, 34 insertions(+), 29 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index b4483f756..a6df12715 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -31,7 +31,7 @@ import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
          defConfig, Indent(..), NumberFormat(..))
 import Data.Bifunctor (second)
 import Data.Char (toLower)
-import Data.List (intercalate, sort)
+import Data.List (intercalate, sort, foldl')
 #ifdef _WINDOWS
 #if MIN_VERSION_base(4,12,0)
 import Data.List (isPrefixOf)
@@ -93,7 +93,7 @@ parseOptionsFromArgs options' defaults prg rawArgs = do
         ("Try " ++ prg ++ " --help for more information.")
 
   -- thread option data structure through all supplied option actions
-  opts <- foldl (>>=) (return defaults) actions
+  opts <- foldl' (>>=) (return defaults) actions
   let mbArgs = case args of
                  [] -> Nothing
                  xs -> Just xs
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index dba762c02..44416ca12 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -7,6 +7,7 @@ where
 import Citeproc.Types
 import Data.Text (Text)
 import qualified Data.Text as T
+import Data.List (foldl')
 import Text.Parsec
 import Text.Pandoc.Definition
 import Text.Pandoc.Parsing (romanNumeral)
@@ -139,7 +140,7 @@ pBalancedBraces braces p = try $ do
   where
       except = notFollowedBy pBraces >> p
       -- outer and inner
-      surround = foldl (\a (open, close) -> sur open close except <|> a)
+      surround = foldl' (\a (open, close) -> sur open close except <|> a)
                        except
                        braces
 
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 86c8de79e..293a822a0 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -59,6 +59,7 @@ import Control.Monad.Except (MonadError (catchError, throwError),
                              MonadTrans, lift, when)
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Data.Maybe (fromMaybe)
+import Data.List (foldl')
 import Data.Time (UTCTime)
 import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
                              posixSecondsToUTCTime)
@@ -612,7 +613,7 @@ checkExistence fn = do
 -- | Canonicalizes a file path by removing redundant @.@ and @..@.
 makeCanonical :: FilePath -> FilePath
 makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- where  transformPathParts = reverse . foldl go []
+ where  transformPathParts = reverse . foldl' go []
         go as     "."  = as
         go (_:as) ".." = as
         go as     x    = x : as
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 3b96f9e04..266a09e3c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..)
 where
 import Data.Bits (clearBit, setBit, testBit, (.|.))
 import Data.Data (Data)
+import Data.List (foldl')
 import qualified Data.Text as T
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
@@ -593,7 +594,7 @@ parseFormatSpec :: T.Text
 parseFormatSpec = parse formatSpec ""
   where formatSpec = do
           name <- formatName
-          (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$>
+          (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$>
                                              many extMod
           return (T.pack name, reverse extsToEnable, reverse extsToDisable)
         formatName = many1 $ noneOf "-+"
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index bffe01a34..90967f295 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -22,6 +22,7 @@ import Control.Monad.Catch (finally, try)
 import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
                   showConstr, toConstr, tyconUQname)
 import Data.Foldable (foldrM)
+import Data.List (foldl')
 import Data.Map (Map)
 import Data.Maybe (fromMaybe)
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
@@ -204,7 +205,7 @@ walkMeta lf (Pandoc m bs) = do
 
 walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
 walkPandoc (LuaFilter fnMap) =
-  case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+  case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
     Just fn -> \x -> runFilterFunction fn x *> singleElement x
     Nothing -> return
 
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index bcf26c4a3..7c6d01769 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -182,7 +182,7 @@ isAttrModifier _                  = False
 
 smushInlines :: [Inlines] -> Inlines
 smushInlines xs = combineInlines xs' mempty
-  where xs' = foldl combineInlines mempty xs
+  where xs' = foldl' combineInlines mempty xs
 
 smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index b7bd40fee..67aba1cb1 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -23,6 +23,7 @@ where
 
 import Data.Set (Set, fromList, unions)
 import Data.Text (Text)
+import Data.List (foldl')
 
 eitherBlockOrInline :: Set Text
 eitherBlockOrInline = fromList
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34edbcc17..dc94fc2d6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,7 +21,7 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
@@ -357,7 +357,7 @@ referenceKey = try $ do
   addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
                           >> many (try $ spnl >> keyValAttr)
   blanklines
-  let attr'  = extractIdClass $ foldl (\x f -> f x) attr addKvs
+  let attr'  = extractIdClass $ foldl' (\x f -> f x) attr addKvs
       target = (escapeURI $ trimr src, tit)
   st <- getState
   let oldkeys = stateKeys st
@@ -613,7 +613,7 @@ attributes = try $ do
   spnl
   attrs <- many (attribute <* spnl)
   char '}'
-  return $ foldl (\x f -> f x) nullAttr attrs
+  return $ foldl' (\x f -> f x) nullAttr attrs
 
 attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
 attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
 import Control.Arrow
 import qualified Control.Category as Cat
 import Control.Monad
-
+import Data.List (foldl')
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 import Text.Pandoc.Readers.Odt.Generic.Fallible
 
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
 iterateSL :: (Foldable f, MonadPlus m)
           => ArrowState s    x     y
           -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
   where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
 
 
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 0d921e23b..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -64,12 +64,12 @@ import qualified Data.Map             as M
 import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
+import           Data.List (foldl')
 
 import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
-
 import           Text.Pandoc.Readers.Odt.Generic.Namespaces
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -293,7 +293,7 @@ readNSattributes         = fromState $ \state -> maybe (state, failEmpty     )
                          => XMLConverterState nsID extraState
                          -> Maybe (XMLConverterState nsID extraState)
     extractNSAttrs startState
-                         = foldl (\state d -> state >>= addNS d)
+                         = foldl' (\state d -> state >>= addNS d)
                                  (Just startState)
                                  nsAttribs
       where nsAttribs    = mapMaybe readNSattr (XML.elAttribs element)
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 5e10f896c..b722aa07d 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -44,7 +44,7 @@ import Control.Arrow
 
 import Data.Default
 import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import Data.Text (Text)
@@ -120,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
                               &&&
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
-                    >>?^ ( M.fromList . foldl accumLegalPitches [] )
+                    >>?^ ( M.fromList . foldl' accumLegalPitches [] )
                   ) `ifFailedDo` returnV (Right M.empty)
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 860da2dc3..99238c7f0 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
 import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -627,7 +627,7 @@ code2 = do
 
 -- | Html / CSS attributes
 attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d11ad13f5..0ce9396b3 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -109,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -840,7 +840,7 @@ mapLeft = Bifunctor.first
 -- > collapseFilePath "parent/foo/.." ==  "parent"
 -- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
 collapseFilePath :: FilePath -> FilePath
-collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
+collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
   where
     go rs "." = rs
     go r@(p:rs) ".." = case p of
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 2f33cd467..332de1545 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -30,7 +30,7 @@ module Text.Pandoc.Writers.HTML (
   ) where
 import Control.Monad.State.Strict
 import Data.Char (ord)
-import Data.List (intercalate, intersperse, partition, delete, (\\))
+import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
 import Data.List.NonEmpty (NonEmpty((:|)))
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import qualified Data.Set as Set
@@ -544,7 +544,7 @@ tagWithAttributes opts html5 selfClosing tagname attr =
 
 addAttrs :: PandocMonad m
          => WriterOptions -> Attr -> Html -> StateT WriterState m Html
-addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr
+addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr
 
 toAttrs :: PandocMonad m
         => [(Text, Text)] -> StateT WriterState m [Attribute]
@@ -926,7 +926,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
                                    numstyle']
                    else [])
   l <- ordList opts contents
-  return $ foldl (!) l attribs
+  return $ foldl' (!) l attribs
 blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
                   do term' <- liftM H.dt $ inlineListToHtml opts term
@@ -1407,7 +1407,7 @@ inlineToHtml opts inline = do
                               Just "audio" -> mediaTag H5.audio "Audio"
                               Just _       -> (H5.embed, [])
                               _            -> imageTag
-                        return $ foldl (!) tag $ attributes ++ specAttrs
+                        return $ foldl' (!) tag $ attributes ++ specAttrs
                         -- note:  null title included, as in Markdown.pl
     (Note contents) -> do
                         notes <- gets stNotes
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d01e13db4..54d042332 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -16,7 +16,7 @@ reStructuredText:  <http://docutils.sourceforge.net/rst.html>
 module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
-import Data.List (transpose, intersperse)
+import Data.List (transpose, intersperse, foldl')
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -509,7 +509,7 @@ flatten outer
   | null contents = [outer]
   | otherwise     = combineAll contents
   where contents = dropInlineParent outer
-        combineAll = foldl combine []
+        combineAll = foldl' combine []
 
         combine :: [Inline] -> Inline -> [Inline]
         combine f i =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 53da70f84..9d695563f 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
 import Data.Char (chr, ord)
-import Data.List (maximumBy, transpose)
+import Data.List (maximumBy, transpose, foldl')
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -271,7 +271,7 @@ tableAnyRowToTexinfo :: PandocMonad m
                      -> [[Block]]
                      -> TI m (Doc Text)
 tableAnyRowToTexinfo itemtype aligns cols =
-  (literal itemtype $$) . foldl (\row item -> row $$
+  (literal itemtype $$) . foldl' (\row item -> row $$
   (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols
 
 alignedBlock :: PandocMonad m
-- 
cgit v1.2.3


From 805d12ac9cc91913cbb6275e8ef94f49985df5e1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 15 Mar 2021 14:21:52 -0700
Subject: Remove an unneeded import

---
 src/Text/Pandoc/Readers/HTML/TagCategories.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 67aba1cb1..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -23,7 +23,6 @@ where
 
 import Data.Set (Set, fromList, unions)
 import Data.Text (Text)
-import Data.List (foldl')
 
 eitherBlockOrInline :: Set Text
 eitherBlockOrInline = fromList
-- 
cgit v1.2.3


From 7bf4be04b0c04ccfd02a2a907071b8747380904f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Mar 2021 09:10:44 -0700
Subject: Fix regression with `tex_math_backslash` in Markdown reader.

Added regression test.  Closes #7155.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index dc94fc2d6..c836a896b 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1503,7 +1503,7 @@ inline = do
      '$'     -> math
      '~'     -> strikeout <|> subscript
      '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
-     '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '\\'    -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline'
      '@'     -> cite <|> exampleRef
      '"'     -> smart
      '\''    -> smart
-- 
cgit v1.2.3


From fd76e605cde2639d6bb1b6891dd4b097a6eded01 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:02:05 -0700
Subject: T.P.Readers.Odt.StyleReader: rewrite foldr1 use as foldr.

This avoids a partial function.
---
 src/Text/Pandoc/Readers/Odt/StyleReader.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index b722aa07d..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -563,12 +563,13 @@ readListLevelStyle levelType =      readAttr NsText "level"
 
 --
 chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
-                                    | otherwise    = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
   where
-   select ( ListLevelStyle       t1            p1          s1          f1          b1 )
-          ( ListLevelStyle       t2            p2          s2          f2          _ )
-        =   ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+   select l Nothing = Just l
+   select ( ListLevelStyle t1 p1 s1 f1 b1 )
+          ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+        =   Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+                                  (selectLinf f1 f2) b1
    select' LltNumbered _           = LltNumbered
    select' _           LltNumbered = LltNumbered
    select' _           _           = LltBullet
-- 
cgit v1.2.3


From 67e173bda116ba9793dd625f782a12ab8e35839b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:10:22 -0700
Subject: Remove another foldr1 partial function use.

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 08083b177..0ce8e286f 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -261,7 +261,7 @@ table = try $ do
   rows <- many1 (many commentLine *> tableRow)
   let columns = transpose rows
   let ncolumns = length columns
-  let aligns = map (foldr1 findAlign . map fst) columns
+  let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
   let size = maximum (map length rows')
   let rowsPadded = map (pad size) rows'
@@ -278,10 +278,11 @@ pad :: (Monoid a) => Int -> [a] -> [a]
 pad n xs = xs ++ replicate (n - length xs) mempty
 
 
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
-  | x == y = x
-  | otherwise = AlignDefault
+findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment
+findAlign (x,_) (Just y)
+  | x == y = Just x
+  | otherwise = Just AlignDefault
+findAlign (x,_) Nothing = Just x
 
 headerRow :: T2T [(Alignment, Blocks)]
 headerRow = genericRow (string "||")
-- 
cgit v1.2.3


From 1da62083156cf493771274164be67a2bead196dc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:30:59 -0700
Subject: Rewrite a foldl1 as a foldl'.

---
 src/Text/Pandoc/Readers/HTML.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b73c138ab..c3e68afd8 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -32,6 +32,7 @@ import Data.Char (isAlphaNum, isLetter)
 import Data.Default (Default (..), def)
 import Data.Foldable (for_)
 import Data.List.Split (splitWhen)
+import Data.List (foldl')
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Monoid (First (..))
@@ -360,7 +361,10 @@ pDefListItem = try $ do
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+  let term = foldl' (\x y -> if null x
+                                then trimInlines y
+                                else x <> B.linebreak <> trimInlines y)
+                    mempty terms
   return (term, map (fixPlains True) defs)
 
 fixPlains :: Bool -> Blocks -> Blocks
-- 
cgit v1.2.3


From f0e4b9cc3cde01d64e25d331c5b4f3d62d2129b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:37:56 -0700
Subject: Require safe >= 0.3.18 and remove cpp.

---
 pandoc.cabal                     | 2 +-
 src/Text/Pandoc/Readers/LaTeX.hs | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 5936d113f..e46518e96 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -477,7 +477,7 @@ library
                  parsec                >= 3.1      && < 3.2,
                  process               >= 1.2.3    && < 1.7,
                  random                >= 1        && < 1.3,
-                 safe                  >= 0.3      && < 0.4,
+                 safe                  >= 0.3.18   && < 0.4,
                  scientific            >= 0.3      && < 0.4,
                  skylighting           >= 0.10.4.1 && < 0.11,
                  skylighting-core      >= 0.10.4.1 && < 0.11,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ceac261d2..21a563bed 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP                   #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
@@ -97,11 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-#if MIN_VERSION_safe(0,3,18)
   let bottomLevel = minimumBound 1 $ query headerLevel doc'
-#else
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
-#endif
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
-- 
cgit v1.2.3


From 3428248debb14066e7fb1ef216927d3d62e3a43a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 23:01:12 -0700
Subject: Use minimumDef instead of minimum (partial function).

---
 src/Text/Pandoc/Readers/LaTeX.hs             | 2 +-
 src/Text/Pandoc/Readers/Org/Blocks.hs        | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs           | 4 +++-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 7 ++-----
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 21a563bed..32f7fc5af 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -96,7 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-  let bottomLevel = minimumBound 1 $ query headerLevel doc'
+  let bottomLevel = minimumDef 1 $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index d1aff701e..c6f27118b 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,6 +39,7 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
+import Safe (minimumDef)
 
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
@@ -542,8 +543,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         -- CAVE: partial function in else
-                         curMin = if null levels then 0 else minimum levels
+                         curMin = minimumDef 0 levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 99238c7f0..50ffd36ec 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Safe (minimumDef)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -376,7 +377,8 @@ table = try $ do
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
   let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+  let aligns = map (minimumDef AlignDefault) $
+                transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
   return $ B.table (B.simpleCaption $ B.plain caption)
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 5caeb0753..b26a7ff3e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -50,6 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
 import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
+import Safe (minimumDef)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
 type EMU = Integer
@@ -1427,11 +1428,7 @@ presentationToRels pres@(Presentation _ slides) = do
   -- all relWithoutSlide rels (unless they're 1)
   -- 3. If we have a notesmaster slide, we make space for that as well.
 
-  let minRelNotOne = case filter (1<) $ map relId relsWeKeep of
-        [] -> 0                 -- doesn't matter in this case, since
-                                -- there will be nothing to map the
-                                -- function over
-        l  -> minimum l
+  let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep
 
       modifyRelNum :: Int -> Int
       modifyRelNum 1 = 1
-- 
cgit v1.2.3


From a31731b8e2825c5bc8d7fcc9a61ce92b9d28d040 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 10:11:08 -0700
Subject: Docx reader: Don't reimplement NonEmpty.

---
 src/Text/Pandoc/Readers/Docx.hs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00de6a0cd..22dd54193 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -85,6 +85,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
+import Data.List.NonEmpty (nonEmpty)
 
 readDocx :: PandocMonad m
          => ReaderOptions
@@ -648,11 +649,6 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
   cells <- mapM rowToBlocksList rows
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
-      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
-      -- our own, see
-      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
-      nonEmpty [] = Nothing
-      nonEmpty l  = Just l
       rowLength :: Docx.Row -> Int
       rowLength (Docx.Row c) = length c
 
-- 
cgit v1.2.3


From 8d5116381b20442bb3fa58dac1ef7d44db618823 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 10:30:32 -0700
Subject: Use NonEmpty instead of minimumDef.

---
 src/Text/Pandoc/Readers/LaTeX.hs             | 4 ++--
 src/Text/Pandoc/Readers/Org/Blocks.hs        | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs           | 4 ++--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 5 +++--
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 32f7fc5af..851756065 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -68,7 +68,7 @@ import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          listingsLanguage)
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
-import Safe
+import Data.List.NonEmpty (nonEmpty)
 
 -- for debugging:
 -- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -96,7 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
+  let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index c6f27118b..883434cdc 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,7 +39,7 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
@@ -543,7 +543,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         curMin = minimumDef 0 levels
+                         curMin = maybe 0 minimum $ nonEmpty levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 50ffd36ec..e26b902f1 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,7 +53,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -377,7 +377,7 @@ table = try $ do
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
   let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map (minimumDef AlignDefault) $
+  let aligns = map (maybe AlignDefault minimum . nonEmpty) $
                 transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index b26a7ff3e..4dbf32c4e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -50,7 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
 import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
 type EMU = Integer
@@ -1428,7 +1428,8 @@ presentationToRels pres@(Presentation _ slides) = do
   -- all relWithoutSlide rels (unless they're 1)
   -- 3. If we have a notesmaster slide, we make space for that as well.
 
-  let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep
+  let minRelNotOne = maybe 0 minimum $ nonEmpty
+                                     $ filter (1 <) $ map relId relsWeKeep
 
       modifyRelNum :: Int -> Int
       modifyRelNum 1 = 1
-- 
cgit v1.2.3


From 4002c35a9184ecc1c9a6553e9ee28e283cb1fd0a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 11:55:59 -0700
Subject: Protect partial uses of maximum with NonEmpty.

---
 src/Text/Pandoc/Readers/DocBook.hs                 |  6 ++--
 src/Text/Pandoc/Readers/HTML/Table.hs              |  3 +-
 src/Text/Pandoc/Readers/Haddock.hs                 |  3 +-
 src/Text/Pandoc/Readers/Markdown.hs                |  3 +-
 src/Text/Pandoc/Readers/Textile.hs                 |  4 +--
 src/Text/Pandoc/Readers/Txt2Tags.hs                | 22 +++++++-----
 src/Text/Pandoc/Writers/AsciiDoc.hs                |  3 +-
 src/Text/Pandoc/Writers/ConTeXt.hs                 |  6 ++--
 src/Text/Pandoc/Writers/DokuWiki.hs                |  4 ++-
 src/Text/Pandoc/Writers/Man.hs                     |  4 +--
 src/Text/Pandoc/Writers/Markdown.hs                | 37 ++++++++++---------
 src/Text/Pandoc/Writers/Markdown/Inline.hs         |  9 ++---
 src/Text/Pandoc/Writers/Ms.hs                      |  4 +--
 src/Text/Pandoc/Writers/Muse.hs                    |  9 +++--
 src/Text/Pandoc/Writers/Org.hs                     |  5 +--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 42 +++++++++++-----------
 src/Text/Pandoc/Writers/Powerpoint/Presentation.hs |  5 ++-
 src/Text/Pandoc/Writers/RST.hs                     |  6 ++--
 src/Text/Pandoc/Writers/Shared.hs                  |  6 ++--
 src/Text/Pandoc/Writers/Texinfo.hs                 |  9 +++--
 src/Text/Pandoc/Writers/ZimWiki.hs                 |  4 ++-
 21 files changed, 108 insertions(+), 86 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d38b07864..6f5bb0ad4 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -18,6 +18,7 @@ import Data.Either (rights)
 import Data.Foldable (asum)
 import Data.Generics
 import Data.List (intersperse,elemIndex)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -949,9 +950,8 @@ parseBlock (Elem e) =
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
                             if n > 0 then Just n else Nothing
-                      let numrows = case bodyrows of
-                                         [] -> 0
-                                         xs -> maximum $ map length xs
+                      let numrows = maybe 0 maximum $ nonEmpty
+                                                    $ map length bodyrows
                       let aligns = case colspecs of
                                      [] -> replicate numrows AlignDefault
                                      cs -> map toAlignment cs
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6179ea8e7..ad0b51253 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks)
@@ -216,7 +217,7 @@ normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
   let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
   let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
-  let ncols = maximum (map rowLength rows)
+  let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
   if null rows
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 25d69f040..48454e353 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.Haddock
 
 import Control.Monad.Except (throwError)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text, unpack)
 import qualified Data.Text as T
@@ -92,7 +93,7 @@ docHToBlocks d' =
                   then ([], map toCells bodyRows)
                   else (toCells (head headerRows),
                         map toCells (tail headerRows ++ bodyRows))
-             colspecs = replicate (maximum (map length body))
+             colspecs = replicate (maybe 0 maximum (nonEmpty (map length body)))
                              (AlignDefault, ColWidthDefault)
          in  B.table B.emptyCaption
                      colspecs
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c836a896b..a86286b3a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -22,6 +22,7 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
 import Data.List (transpose, elemIndex, sortOn, foldl')
+import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
@@ -1364,7 +1365,7 @@ pipeTable = try $ do
   lines' <- many pipeTableRow
   let lines'' = map (take (length aligns) <$>) lines'
   let maxlength = maximum $
-       map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
+       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
   numColumns <- getOption readerColumns
   let widths = if maxlength > numColumns
                   then map (\len ->
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index e26b902f1..8d7900de4 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -39,6 +39,7 @@ import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
 import Data.List (intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -53,7 +54,6 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
-import Data.List.NonEmpty (nonEmpty)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -376,7 +376,7 @@ table = try $ do
                              (toprow:rest) | any (fst . fst) toprow ->
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
-  let nbOfCols = maximum $ map length (headers:rows)
+  let nbOfCols = maximum $ fmap length (headers :| rows)
   let aligns = map (maybe AlignDefault minimum . nonEmpty) $
                 transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 0ce8e286f..f27a3fc2c 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
 {- |
    Module      : Text.Pandoc.Readers.Txt2Tags
    Copyright   : Copyright (C) 2014 Matthew Pickering
@@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader (Reader, asks, runReader)
 import Data.Default
 import Data.List (intercalate, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -53,14 +55,16 @@ getT2TMeta = do
     inps <- P.getInputFiles
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
-    let getModTime = fmap (formatTime defaultTimeLocale "%T") .
-                       P.getModificationTime
-    curMtime <- case inps of
-                  [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
-                  _ -> catchError
-                        (maximum <$> mapM getModTime inps)
-                        (const (return ""))
-    return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
+    curMtime <- catchError
+                 ((nonEmpty <$> mapM P.getModificationTime inps) >>=
+                    \case
+                       Nothing ->
+                         formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+                       Just ts -> return $
+                         formatTime defaultTimeLocale "%T" $ maximum ts)
+                (const (return ""))
+    return $ T2TMeta (T.pack curDate) (T.pack curMtime)
+                     (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
 readTxt2Tags :: PandocMonad m
@@ -263,7 +267,7 @@ table = try $ do
   let ncolumns = length columns
   let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
-  let size = maximum (map length rows')
+  let size = maybe 0 maximum $ nonEmpty $ map length rows'
   let rowsPadded = map (pad size) rows'
   let headerPadded = if null tableHeader then mempty else pad size tableHeader
   let toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index b4ef7c8b9..69e608ef9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -22,6 +22,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where
 import Control.Monad.State.Strict
 import Data.Char (isPunctuation, isSpace)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (NonEmpty(..))
 import Data.Maybe (fromMaybe, isJust)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -274,7 +275,7 @@ blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do
   let colwidth = if writerWrapText opts == WrapAuto
                     then writerColumns opts
                     else 100000
-  let maxwidth = maximum $ map offset (head':rows')
+  let maxwidth = maximum $ fmap offset (head' :| rows')
   let body = if maxwidth > colwidth then vsep rows' else vcat rows'
   let border = separator <> text "==="
   return $ 
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 4d44842e2..1c56388ed 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
 import Control.Monad.State.Strict
 import Data.Char (ord, isDigit)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -228,8 +229,9 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                         Period       -> "stopper=."
                         OneParen     -> "stopper=)"
                         TwoParens    -> "left=(,stopper=)"
-    let width = maximum $ map T.length $ take (length contents)
-                          (orderedListMarkers (start, style', delim))
+    let width = maybe 0 maximum $ nonEmpty $ map T.length $
+                  take (length contents)
+                       (orderedListMarkers (start, style', delim))
     let width' = (toEnum width + 1) / 2
     let width'' = if width' > (1.5 :: Double)
                      then "width=" <> tshow width' <> "em"
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 7df47c912..602c70ebe 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -27,6 +27,7 @@ import Control.Monad.Reader (ReaderT, asks, local, runReaderT)
 import Control.Monad.State.Strict (StateT, evalStateT)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -172,7 +173,8 @@ blockToDokuWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then return []
                  else zipWithM (tableItemToDokuWiki opts) aligns headers
   rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length)
+                   $ transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index edb70f53e..87b2d8d21 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -16,6 +16,7 @@ Conversion of 'Pandoc' documents to roff man page format.
 module Text.Pandoc.Writers.Man ( writeMan ) where
 import Control.Monad.State.Strict
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -175,8 +176,7 @@ blockToMan opts (BulletList items) = do
   return (vcat contents)
 blockToMan opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 1 +
-                     maximum (map T.length markers)
+  let indent = 1 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
               zip markers items
   return (vcat contents)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 533bcc071..4d9f3d5b0 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -24,6 +24,7 @@ import Control.Monad.Reader
 import Control.Monad.State.Strict
 import Data.Default
 import Data.List (intersperse, sortOn, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, mapMaybe)
 import qualified Data.Set as Set
@@ -492,19 +493,20 @@ blockToMarkdown' opts (CodeBlock attribs str) = do
            | isEnabled Ext_fenced_code_blocks opts ->
           tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline
      _ -> nest (writerTabStop opts) (literal str) <> blankline
-   where endline c = literal $ case [T.length ln
-                                   | ln <- map trim (T.lines str)
-                                   , T.pack [c,c,c] `T.isPrefixOf` ln
-                                   , T.all (== c) ln] of
-                               [] -> T.replicate 3 $ T.singleton c
-                               xs -> T.replicate (maximum xs + 1) $ T.singleton c
-         backticks = endline '`'
-         tildes = endline '~'
-         attrs  = if isEnabled Ext_fenced_code_attributes opts
-                     then nowrap $ " " <> attrsToMarkdown attribs
-                     else case attribs of
-                                (_,cls:_,_) -> " " <> literal cls
-                                _             -> empty
+   where
+     endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $
+                        [T.length ln
+                         | ln <- map trim (T.lines str)
+                         , T.pack [c,c,c] `T.isPrefixOf` ln
+                         , T.all (== c) ln]
+     endline c = literal $ T.replicate (endlineLen c) $ T.singleton c
+     backticks = endline '`'
+     tildes = endline '~'
+     attrs  = if isEnabled Ext_fenced_code_attributes opts
+                 then nowrap $ " " <> attrsToMarkdown attribs
+                 else case attribs of
+                            (_,cls:_,_) -> " " <> literal cls
+                            _             -> empty
 blockToMarkdown' opts (BlockQuote blocks) = do
   variant <- asks envVariant
   -- if we're writing literate haskell, put a space before the bird tracks
@@ -517,7 +519,7 @@ blockToMarkdown' opts (BlockQuote blocks) = do
   return $ prefixed leader contents <> blankline
 blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do
   let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   caption' <- inlineListToMarkdown opts caption
   let caption''
@@ -619,7 +621,8 @@ pipeTable headless aligns rawHeaders rawRows = do
       blockFor AlignCenter x y = cblock (x + 2) (sp <> y <> sp) <> lblock 0 empty
       blockFor AlignRight  x y = rblock (x + 2) (y <> sp) <> lblock 0 empty
       blockFor _           x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
-  let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
+  let widths = map (max 3 . maybe 3 maximum . nonEmpty . map offset) $
+                     transpose (rawHeaders : rawRows)
   let torow cs = nowrap $ literal "|" <>
                     hcat (intersperse (literal "|") $
                           zipWith3 blockFor aligns widths (map chomp cs))
@@ -653,11 +656,11 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break.
   -- The @+2@ is needed for specifying the alignment.
-  let numChars    = (+ 2) . maximum . map offset
+  let numChars    = (+ 2) . maybe 0 maximum . nonEmpty . map offset
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break *inside a word*.
   -- The @+2@ is needed for specifying the alignment.
-  let minNumChars = (+ 2) . maximum . map minOffset
+  let minNumChars = (+ 2) . maybe 0 maximum . nonEmpty . map minOffset
   let columns = transpose (rawHeaders : rawRows)
   -- minimal column width without wrapping a single word
   let relWidth w col =
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index 19157701e..e35e1a0b9 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -17,6 +17,7 @@ import Control.Monad.Reader
 import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isDigit)
 import Data.List (find, intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
@@ -383,9 +384,7 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do
                    else "“" <> contents <> "”"
 inlineToMarkdown opts (Code attr str) = do
   let tickGroups = filter (T.any (== '`')) $ T.group str
-  let longest    = if null tickGroups
-                     then 0
-                     else maximum $ map T.length tickGroups
+  let longest    = maybe 0 maximum $ nonEmpty $ map T.length tickGroups
   let marker     = T.replicate (longest + 1) "`"
   let spacer     = if longest == 0 then "" else " "
   let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
@@ -438,9 +437,7 @@ inlineToMarkdown opts (Math DisplayMath str) =
             (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
 inlineToMarkdown opts il@(RawInline f str) = do
   let tickGroups = filter (T.any (== '`')) $ T.group str
-  let numticks   = if null tickGroups
-                     then 1
-                     else 1 + maximum (map T.length tickGroups)
+  let numticks   = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups))
   variant <- asks envVariant
   let Format fmt = f
   let rawAttribInline = return $
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 48395c420..0ed7a8a64 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -23,6 +23,7 @@ module Text.Pandoc.Writers.Ms ( writeMs ) where
 import Control.Monad.State.Strict
 import Data.Char (isLower, isUpper, ord)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Data.Maybe (catMaybes)
 import Data.Text (Text)
@@ -274,8 +275,7 @@ blockToMs opts (BulletList items) = do
   return (vcat contents)
 blockToMs opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 2 +
-                     maximum (map T.length markers)
+  let indent = 2 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $
               zip markers items
   setFirstPara
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index bf3265107..d5100f43f 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -31,6 +31,7 @@ import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace)
 import Data.Default
 import Data.List (intersperse, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Set as Set
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -158,7 +159,8 @@ simpleTable caption headers rows = do
   caption' <- inlineListToMuse caption
   headers' <- mapM blockListToMuse headers
   rows' <- mapM (mapM blockListToMuse) rows
-  let widthsInChars = maximum . map offset <$> transpose (headers' : rows')
+  let widthsInChars = maybe 0 maximum . nonEmpty . map offset <$>
+                       transpose (headers' : rows')
   let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks
         where sep' = lblock (T.length sep) $ literal sep
   let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars
@@ -238,7 +240,7 @@ blockToMuse (DefinitionList items) = do
           label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label
           let ind = offset' label' -- using Text.DocLayout.offset results in round trip failures
           hang ind (nowrap label') . vcat <$> mapM descriptionToMuse defs
-          where offset' d = maximum (0: map T.length
+          where offset' d = maximum (0 :| map T.length
                                          (T.lines $ render Nothing d))
         descriptionToMuse :: PandocMonad m
                           => [Block]
@@ -269,7 +271,8 @@ blockToMuse (Table _ blkCapt specs thead tbody tfoot) =
     (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
     blocksToDoc opts blocks =
       local (\env -> env { envOptions = opts }) $ blockListToMuse blocks
-    numcols = maximum (length aligns : length widths : map length (headers:rows))
+    numcols = maximum
+              (length aligns :| length widths : map length (headers:rows))
     isSimple = onlySimpleTableCells (headers : rows) && all (== 0) widths
 blockToMuse (Div _ bs) = flatBlockListToMuse bs
 blockToMuse Null = return empty
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 29d58a161..bb645eaf9 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Writers.Org (writeOrg) where
 import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isDigit)
 import Data.List (intersect, intersperse, partition, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -163,7 +164,7 @@ blockToOrg (Table _ blkCapt specs thead tbody tfoot) =  do
                    else "#+caption: " <> caption''
   headers' <- mapM blockListToOrg headers
   rawRows <- mapM (mapM blockListToOrg) rows
-  let numChars = maximum . map offset
+  let numChars = maybe 0 maximum . nonEmpty . map offset
   -- FIXME: width is not being used.
   let widthsInChars =
        map numChars $ transpose (headers' : rawRows)
@@ -198,7 +199,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do
                     x         -> x
   let markers = take (length items) $ orderedListMarkers
                                       (start, Decimal, delim')
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToOrg markers' items
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 4dbf32c4e..0e515b3c2 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -475,15 +475,16 @@ registerLink link = do
   linkReg <- gets stLinkIds
   mediaReg <- gets stMediaIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+        Just xs -> maximum xs
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+        Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
       maxId = max maxLinkId maxMediaId
       slideLinks = case M.lookup curSlideId linkReg of
         Just mp -> M.insert (maxId + 1) link mp
@@ -498,20 +499,19 @@ registerMedia fp caption = do
   mediaReg <- gets stMediaIds
   globalIds <- gets stMediaGlobalIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+          Just ks -> maximum ks
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+          Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
       maxLocalId = max maxLinkId maxMediaId
 
-      maxGlobalId = case M.elems globalIds of
-        [] -> 0
-        ids -> maximum ids
+      maxGlobalId = maybe 0 maximum $ nonEmpty $ M.elems globalIds
 
   (imgBytes, mbMt) <- P.fetchItem $ T.pack fp
   let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x))
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index affec38aa..9246a93e9 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.List (intercalate)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Default
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -363,9 +364,7 @@ inlineToParElems (Note blks) = do
     then return []
     else do
     notes <- gets stNoteIds
-    let maxNoteId = case M.keys notes of
-          [] -> 0
-          lst -> maximum lst
+    let maxNoteId = maybe 0 maximum $ nonEmpty $ M.keys notes
         curNoteId = maxNoteId + 1
     modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes }
     local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 54d042332..0b9fc8331 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
 import Data.List (transpose, intersperse, foldl')
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -335,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do
                    then replicate (length items) "#."
                    else take (length items) $ orderedListMarkers
                                               (start, style', delim)
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum $ nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToRST markers' items
@@ -761,8 +762,7 @@ simpleTable opts blocksToDoc headers rows = do
                    then return []
                    else fixEmpties <$> mapM (blocksToDoc opts) headers
   rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows
-  let numChars [] = 0
-      numChars xs = maximum . map offset $ xs
+  let numChars = maybe 0 maximum . nonEmpty . map offset
   let colWidths = map numChars $ transpose (headerDocs : rowDocs)
   let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths
   let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths)
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fc3f8ff3a..91ecb310b 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -44,6 +44,7 @@ import Control.Monad (zipWithM)
 import Data.Aeson (ToJSON (..), encode)
 import Data.Char (chr, ord, isSpace)
 import Data.List (groupBy, intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text.Conversions (FromText(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
@@ -224,7 +225,7 @@ gridTable :: (Monad m, HasChars a)
           -> m (Doc a)
 gridTable opts blocksToDoc headless aligns widths headers rows = do
   -- the number of columns will be used in case of even widths
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   let officialWidthsInChars widths' = map (
                         (\x -> if x < 1 then 1 else x) .
@@ -253,8 +254,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do
   let handleFullWidths widths' = do
         rawHeaders' <- mapM (blocksToDoc opts) headers
         rawRows' <- mapM (mapM (blocksToDoc opts)) rows
-        let numChars [] = 0
-            numChars xs = maximum . map offset $ xs
+        let numChars = maybe 0 maximum . nonEmpty . map offset
         let minWidthsInChars =
                 map numChars $ transpose (rawHeaders' : rawRows')
         let widthsInChars' = zipWith max
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 9d695563f..0146fdfd8 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -16,6 +16,7 @@ import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
 import Data.Char (chr, ord)
 import Data.List (maximumBy, transpose, foldl')
+import Data.List.NonEmpty (nonEmpty)
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -238,9 +239,13 @@ blockToTexinfo (Table _ blkCapt specs thead tbody tfoot) = do
   colDescriptors <-
     if all (== 0) widths
        then do -- use longest entry instead of column widths
-            cols <- mapM (mapM (liftM (T.unpack . render Nothing . hcat) . mapM blockToTexinfo)) $
+            cols <- mapM (mapM (fmap (T.unpack . render Nothing . hcat) .
+                           mapM blockToTexinfo)) $
                         transpose $ heads : rows
-            return $ concatMap ((\x -> "{"++x++"} ") .  maximumBy (comparing length)) cols
+            return $ concatMap
+                ((\x -> "{"++x++"} ") .
+                        maybe "" (maximumBy (comparing length)) . nonEmpty)
+                cols
        else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
   let tableBody = text ("@multitable " ++ colDescriptors) $$
                   headers $$
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 9e45f0417..fcf9e000d 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -20,6 +20,7 @@ import Control.Monad (zipWithM)
 import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Text.DocLayout (render, literal)
 import Data.Maybe (fromMaybe)
@@ -143,7 +144,8 @@ blockToZimWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then zipWithM (tableItemToZimWiki opts) aligns (head rows)
                  else mapM (inlineListToZimWiki opts . removeFormatting)headers  -- emphasis, links etc. are not allowed in table headers
   rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length) $
+                  transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
-- 
cgit v1.2.3


From 7678c48122bb9bb7041b75ddc33061b93cbcdbb2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 14:43:42 -0700
Subject: Hlint suggestion.

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index f27a3fc2c..d355a4b55 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -56,12 +56,13 @@ getT2TMeta = do
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
     curMtime <- catchError
-                 ((nonEmpty <$> mapM P.getModificationTime inps) >>=
-                    \case
+                 (mapM P.getModificationTime inps >>=
+                   (\case
                        Nothing ->
                          formatTime defaultTimeLocale "%T" <$> P.getZonedTime
                        Just ts -> return $
                          formatTime defaultTimeLocale "%T" $ maximum ts)
+                    . nonEmpty)
                 (const (return ""))
     return $ T2TMeta (T.pack curDate) (T.pack curMtime)
                      (intercalate ", " inps) outp
-- 
cgit v1.2.3


From 4d041953f56b85d4db241cea11c764856ccbeebe Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 10:41:12 -0700
Subject: T.P.Readers.Metadata: made `yamlBsToMeta`, `yamlBsToRefs`
 polymorphic...

on the parser state, instead of requiring ParserState.
[API change]
---
 src/Text/Pandoc/Readers/Metadata.hs | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index 927291776..f4a27496f 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -31,10 +31,10 @@ import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
 
-yamlBsToMeta :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Text st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text ParserState m (F Meta)
+             -> ParserT Text st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -63,11 +63,11 @@ lookupYAML t (YAML.Mapping _ _ m) =
 lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
-yamlBsToRefs :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Text st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text ParserState m (F [MetaValue])
+             -> ParserT Text st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -104,10 +104,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t))       = Just t
 nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
-normalizeMetaValue :: PandocMonad m
-                   => ParserT Text ParserState m (F MetaValue)
+normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                   => ParserT Text st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text ParserState m (F MetaValue)
+                   -> ParserT Text st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -129,10 +129,10 @@ checkBoolean t
   | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
   | otherwise = Nothing
 
-yamlToMetaValue :: PandocMonad m
-                => ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                => ParserT Text st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text ParserState m (F MetaValue)
+                -> ParserT Text st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -152,10 +152,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
   fmap MetaMap <$> yamlMap pMetaValue o
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
-yamlMap :: PandocMonad m
-        => ParserT Text ParserState m (F MetaValue)
+yamlMap :: (PandocMonad m, HasLastStrPosition st)
+        => ParserT Text st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+        -> ParserT Text st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
-- 
cgit v1.2.3


From ce418667ae8a3e6e5bbf2523eef43edf4f803bcf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 10:41:55 -0700
Subject: Text.Pandoc.Parsing: remove F type synonym.

Muse and Org were defining their own F anyway, with their
own state.  We therefore move this definition to the Markdown
reader.
---
 src/Text/Pandoc/App/Opt.hs             | 3 ++-
 src/Text/Pandoc/Parsing.hs             | 7 ++-----
 src/Text/Pandoc/Readers/Markdown.hs    | 2 ++
 src/Text/Pandoc/Readers/Muse.hs        | 2 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index e5aaec9c5..c72f63464 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -686,7 +686,8 @@ yamlToMeta (Mapping _ _ m) =
     either (fail . show) return $ runEverything (yamlMap pMetaString m)
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
-    runEverything p = runPure (P.readWithM p def "")
+    runEverything p =
+      runPure (P.readWithM p (def :: P.ParserState) "")
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 10a08d410..44e6af59e 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -112,7 +112,6 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              citeKey,
                              Parser,
                              ParserT,
-                             F,
                              Future(..),
                              runF,
                              askF,
@@ -229,8 +228,6 @@ type ParserT = ParsecT
 newtype Future s a = Future { runDelayed :: Reader s a }
   deriving (Monad, Applicative, Functor)
 
-type F = Future ParserState
-
 runF :: Future s a -> s -> a
 runF = runReader . runDelayed
 
@@ -1169,7 +1166,7 @@ data ParserState = ParserState
       stateInNote            :: Bool,          -- ^ True if parsing note contents
       stateNoteNumber        :: Int,           -- ^ Last note number for citations
       stateMeta              :: Meta,          -- ^ Document metadata
-      stateMeta'             :: F Meta,        -- ^ Document metadata
+      stateMeta'             :: Future ParserState Meta, -- ^ Document metadata
       stateCitations         :: M.Map Text Text, -- ^ RST-style citations
       stateHeaderTable       :: [HeaderType],  -- ^ Ordered list of header types used
       stateIdentifiers       :: Set.Set Text, -- ^ Header identifiers used
@@ -1348,7 +1345,7 @@ data QuoteContext
 
 type NoteTable = [(Text, Text)]
 
-type NoteTable' = M.Map Text (SourcePos, F Blocks)
+type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
 -- used in markdown reader
 
 newtype Key = Key Text deriving (Show, Read, Eq, Ord)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a86286b3a..7c557b5a7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -52,6 +52,8 @@ import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
 
 type MarkdownParser m = ParserT Text ParserState m
 
+type F = Future ParserState
+
 -- | Read markdown from an input string and return a Pandoc document.
 readMarkdown :: PandocMonad m
              => ReaderOptions -- ^ Reader options
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b4eea9d3a..698bfd3d7 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,7 +36,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
+import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (crFilter, trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index d33920d47..6ed24a602 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -106,8 +106,8 @@ module Text.Pandoc.Readers.Org.Parsing
 import Data.Text (Text)
 import Text.Pandoc.Readers.Org.ParserState
 
-import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
-                            parseFromString)
+import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
+                                   parseFromString)
 import qualified Text.Pandoc.Parsing as P
 
 import Control.Monad (guard)
-- 
cgit v1.2.3


From bea86f394e6bc77d7441bb98f10a8b8ccfee04c9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 11:23:41 -0700
Subject: Markdown reader: export `yamlMetaBlock`.

[API change]

This will allow us to parse YAML metadata blocks in other
readers, potentially.
---
 src/Text/Pandoc/Readers/Markdown.hs | 40 +++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7c557b5a7..8d68510c5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -15,6 +15,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Markdown (
   readMarkdown,
+  yamlMetaBlock,
   yamlToMeta,
   yamlToRefs ) where
 
@@ -274,24 +275,29 @@ pandocTitleBlock = do
                      $ nullMeta
     updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = do
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Text st m (Future st Blocks)
+              -> ParserT Text st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta (fmap B.toMetaValue <$> parser)
+               $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  try $ do
-    string "---"
-    blankline
-    notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-    rawYamlLines <- manyTill anyLine stopLine
-    -- by including --- and ..., we allow yaml blocks with just comments:
-    let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-    optional blanklines
-    newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-                $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-    -- Since `<>` is left-biased, existing values are not touched:
-    updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
-    return mempty
+  newMetaF <- yamlMetaBlock parseBlocks
+  -- Since `<>` is left-biased, existing values are not touched:
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
+  return mempty
 
-stopLine :: PandocMonad m => MarkdownParser m ()
+stopLine :: PandocMonad m => ParserT Text st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
@@ -456,7 +462,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks)
 block = do
   res <- choice [ mempty <$ blanklines
                , codeBlockFenced
-               , yamlMetaBlock
+               , yamlMetaBlock'
                -- note: bulletList needs to be before header because of
                -- the possibility of empty list items: -
                , bulletList
-- 
cgit v1.2.3


From 2274eb88a4dddf622d86bee94bb6f20db6e148b2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 11:37:09 -0700
Subject: Move yamlMetaBlock from Markdown reader to T.P.Readers.Metadata.

---
 src/Text/Pandoc/Readers/Markdown.hs | 24 ++----------------------
 src/Text/Pandoc/Readers/Metadata.hs | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8d68510c5..6c3947a81 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -15,7 +15,6 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Markdown (
   readMarkdown,
-  yamlMetaBlock,
   yamlToMeta,
   yamlToRefs ) where
 
@@ -29,7 +28,6 @@ import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
 import qualified Data.ByteString.Lazy as BL
 import System.FilePath (addExtension, takeExtension)
 import Text.HTML.TagSoup hiding (Row)
@@ -47,9 +45,8 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
                                  isCommentTag, isInlineTag, isTextTag)
 import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
 
 type MarkdownParser m = ParserT Text ParserState m
 
@@ -275,31 +272,14 @@ pandocTitleBlock = do
                      $ nullMeta
     updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
-yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st Blocks)
-              -> ParserT Text st m (Future st Meta)
-yamlMetaBlock parser = try $ do
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  yamlBsToMeta (fmap B.toMetaValue <$> parser)
-               $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-
 yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
 yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  newMetaF <- yamlMetaBlock parseBlocks
+  newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks)
   -- Since `<>` is left-biased, existing values are not touched:
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
   return mempty
 
-stopLine :: PandocMonad m => ParserT Text st m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
 mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index f4a27496f..cb141cba5 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
 module Text.Pandoc.Readers.Metadata (
   yamlBsToMeta,
   yamlBsToRefs,
+  yamlMetaBlock,
   yamlMap ) where
 
 import Control.Monad
@@ -30,6 +31,8 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
+import qualified Data.Text.Lazy as TL
+import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
              => ParserT Text st m (Future st MetaValue)
@@ -171,3 +174,20 @@ yamlMap pMetaValue o = do
       return $ do
         v' <- fv
         return (k, v')
+
+-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Text st m (Future st MetaValue)
+              -> ParserT Text st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+stopLine :: Monad m => ParserT Text st m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-- 
cgit v1.2.3


From c389211e2f678d9327cd2c008d54a1e438f07a07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 15:45:09 -0700
Subject: Support `yaml_metadata_block` extension form commonmark, gfm.

This is a bit more limited than with markdown, as documented
in the manual:

- The YAML block must be the first thing in the input.
- The leaf notes are parsed in isolation from the rest of
  the document.  So, for example, you can't use reference
  links if the references are defined later in the document.

Closes #6537.
---
 MANUAL.txt                            | 16 ++++++++++++++++
 src/Text/Pandoc/Extensions.hs         |  3 +++
 src/Text/Pandoc/Parsing.hs            |  2 +-
 src/Text/Pandoc/Readers/CommonMark.hs | 30 ++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index bc9e226e9..845d1dbba 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -4240,6 +4240,22 @@ will be interpreted as markdown. For example:
       \renewcommand{\section}[1]{\clearpage\oldsection{#1}}
       ```
 
+Note:  the `yaml_metadata_block` extension works with
+`commonmark` as well as `markdown` (and it is enabled by default
+in `gfm` and `commonmark_x`).  However, in these formats the
+following restrictions apply:
+
+- The YAML metadata block must occur at the beginning of the
+  document (and there can be only one).  If multiple files are
+  given as arguments to pandoc, only the first can be a YAML
+  metadata block.
+
+- The leaf nodes of the YAML structure are parsed in isolation from
+  each other and from the rest of the document.  So, for
+  example,  you can't use a reference link in these contexts
+  if the link definition is somewhere else in the document.
+
+
 ## Backslash escapes
 
 #### Extension: `all_symbols_escapable` ####
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 266a09e3c..6423d5f56 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -354,6 +354,7 @@ getDefaultExtensions "gfm"             = extensionsFromList
   , Ext_strikeout
   , Ext_task_lists
   , Ext_emoji
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "commonmark"      = extensionsFromList
                                           [Ext_raw_html]
@@ -379,6 +380,7 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   , Ext_raw_attribute
   , Ext_implicit_header_references
   , Ext_attributes
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
@@ -511,6 +513,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_implicit_header_references
     , Ext_attributes
     , Ext_sourcepos
+    , Ext_yaml_metadata_block
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 44e6af59e..0c2078721 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -517,7 +517,7 @@ parseFromString :: (Stream s m Char, IsString s)
                 -> ParserT s st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos "chunk"
+  setPosition $ initialPos " chunk"
   oldInput <- getInput
   setInput $ fromString $ T.unpack str
   result <- parser
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 150a837e4..244f77940 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -26,13 +26,43 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Options
 import Text.Pandoc.Error
+import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
+import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
+                            runF, defaultParserState, take1WhileP, option)
+import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s
+  | isEnabled Ext_yaml_metadata_block opts
+  , "---" `T.isPrefixOf` s = do
+       let metaValueParser = do
+             inp <- option "" $ take1WhileP (const True)
+             case runIdentity
+                    (commonmarkWith (specFor opts) "metadata value" inp) of
+                    Left _ -> mzero
+                    Right (Cm bls :: Cm () Blocks)
+                             -> return $ return $ B.toMetaValue bls
+       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
+                             pos <- getPosition
+                             return (meta, pos))
+                         defaultParserState "YAML metadata" s
+       case res of
+         Left _ -> readCommonMarkBody opts s
+         Right (meta, pos) -> do
+           let dropLines 0 = id
+               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
+           let metaLines = sourceLine pos - 1
+           let body = T.replicate metaLines "\n" <> dropLines metaLines s
+           Pandoc _ bs <- readCommonMarkBody opts body
+           return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = readCommonMarkBody opts s
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMarkBody opts s
   | isEnabled Ext_sourcepos opts =
     case runIdentity (commonmarkWith (specFor opts) "" s) of
       Left err -> throwError $ PandocParsecError s err
-- 
cgit v1.2.3


From 56ce1fc12651e9cf60b882a18d72a410967d6540 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 24 Mar 2021 11:57:49 -0700
Subject: Fix DocBook reader mathml regression...

...caused by the switch in XML libraries.
Also fixed a similar issue in JATS.
Closes #7173.
---
 src/Text/Pandoc/Readers/DocBook.hs |   3 +-
 src/Text/Pandoc/Readers/JATS.hs    |   8 ++-
 test/command/7173.md               | 137 +++++++++++++++++++++++++++++++++++++
 test/docbook-reader.docbook        |   4 +-
 4 files changed, 147 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7173.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6f5bb0ad4..4f525cfb1 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1239,7 +1239,8 @@ equation e constructor =
   where
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
-      (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
+      (\x -> qName (elName x) == "math" &&
+             qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML")
       (readMathML . showElement)
 
     latexEquations :: [Text]
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 602f3b4f2..c068f3774 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -491,7 +491,8 @@ parseInline (Elem e) =
 
         "disp-formula" -> formula displayMath
         "inline-formula" -> formula math
-        "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+        "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
+                   -> return . math $ mathML e
         "tex-math" -> return . math $ textContent e
 
         "email" -> return $ link ("mailto:" <> textContent e) ""
@@ -514,8 +515,9 @@ parseInline (Elem e) =
                             filterChildren isMathML whereToLook
             return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
 
-         isMathML x = qName   (elName x) == "math" &&
-                      qPrefix (elName x) == Just "mml"
+         isMathML x = qName (elName x) == "math" &&
+                      qURI  (elName x) ==
+                                      Just "http://www.w3.org/1998/Math/MathML"
          removePrefix elname = elname { qPrefix = Nothing }
          codeWithLang = do
            let classes' = case attrValue "language" e of
diff --git a/test/command/7173.md b/test/command/7173.md
new file mode 100644
index 000000000..2599dc19b
--- /dev/null
+++ b/test/command/7173.md
@@ -0,0 +1,137 @@
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <m:math xmlns:m="http://www.w3.org/1998/Math/MathML">
+            <m:mrow>
+                <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn>
+                </m:mrow>
+            </m:mrow>
+        </m:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML">
+            <mml:mrow>
+                <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn>
+                </mml:mrow>
+            </mml:mrow>
+        </mml:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <math xmlns="http://www.w3.org/1998/Math/MathML">
+            <mrow>
+                <mrow><mn>1</mn><mo>+</mo><mn>2</mn>
+                </mrow>
+            </mrow>
+        </math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    xmlns:m="http://www.w3.org/1998/Math/MathML"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <m:math>
+            <m:mrow>
+                <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn>
+                </m:mrow>
+            </m:mrow>
+        </m:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    xmlns:mml="http://www.w3.org/1998/Math/MathML"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <mml:math>
+            <mml:mrow>
+                <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn>
+                </mml:mrow>
+            </mml:mrow>
+        </mml:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 5717d78d0..51e62942b 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -6,7 +6,9 @@
 <!ENTITY let "LET" >
 <!ENTITY case "CASE" >
 ]>
-<article>
+<article xmlns="http://docbook.org/ns/docbook"
+         xmlns:xlink="http://www.w3.org/1999/xlink"
+         xmlns:mml="http://www.w3.org/1998/Math/MathML">
   <articleinfo>
     <title>Pandoc Test Suite</title>
     <authorgroup>
-- 
cgit v1.2.3


From 40da6c402beb64e47838254c04228098129165df Mon Sep 17 00:00:00 2001
From: niszet <niszet0016@gmail.com>
Date: Thu, 1 Apr 2021 08:44:34 +0900
Subject: Treat tabs as spaces in ODT Reader. (#7185)

---
 src/Text/Pandoc/Readers/Odt/ContentReader.hs |   8 +++++++-
 test/Tests/Readers/Odt.hs                    |   1 +
 test/odt/native/tab.native                   |   1 +
 test/odt/odt/tab.odt                         | Bin 0 -> 10170 bytes
 4 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 test/odt/native/tab.native
 create mode 100644 test/odt/odt/tab.odt

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index df90880fa..c4220b0db 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -577,7 +577,10 @@ read_spaces       = matchingElement NsText "s" (
 read_line_break  :: InlineMatcher
 read_line_break   = matchingElement NsText "line-break"
                     $ returnV linebreak
-
+--
+read_tab         :: InlineMatcher
+read_tab          = matchingElement NsText "tab"
+                    $ returnV space
 --
 read_span        :: InlineMatcher
 read_span         = matchingElement NsText "span"
@@ -585,6 +588,7 @@ read_span         = matchingElement NsText "span"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -604,6 +608,7 @@ read_paragraph    = matchingElement NsText "p"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -630,6 +635,7 @@ read_header       = matchingElement NsText "h"
   children <- ( matchChildContent [ read_span
                                   , read_spaces
                                   , read_line_break
+                                  , read_tab
                                   , read_link
                                   , read_note
                                   , read_citation
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index 9b5ec6b9e..edff4fe2c 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -176,6 +176,7 @@ namesOfTestsComparingToNative   = [ "blockquote"
                                   , "referenceToText"
                                   , "simpleTable"
                                   , "simpleTableWithCaption"
+                                  , "tab"
 --                                , "table"
                                   , "textMixedStyles"
                                   , "tableWithContents"
diff --git a/test/odt/native/tab.native b/test/odt/native/tab.native
new file mode 100644
index 000000000..948e81cd5
--- /dev/null
+++ b/test/odt/native/tab.native
@@ -0,0 +1 @@
+[Para [Str "Three",Space,Str "tabs",Space,Str "between",Space,Str "A",Space,Str "and",Space,Str "B",Space,Str "will",Space,Str "be",Space,Str "converted",Space,Str "to",Space,Str "one",Space,Str "Space:",Space,Str "A",Space,Str "B."]]
\ No newline at end of file
diff --git a/test/odt/odt/tab.odt b/test/odt/odt/tab.odt
new file mode 100644
index 000000000..a2c80f45c
Binary files /dev/null and b/test/odt/odt/tab.odt differ
-- 
cgit v1.2.3


From 935d10769df682fa08d947a873ca7665136e0b79 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 Apr 2021 17:04:55 -0700
Subject: Fix "phrase" in DocBook: take classes from "role" not "class".

Closes #7195.  Revises #6438.
---
 src/Text/Pandoc/Readers/DocBook.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 4f525cfb1..ac3caa2c0 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1075,7 +1075,7 @@ parseInline (Elem e) =
            return $ spanWith (attrValue "id" e, [], []) mempty
         "phrase" -> do
           let ident = attrValue "id" e
-          let classes = T.words $ attrValue "class" e
+          let classes = T.words $ attrValue "role" e
           if ident /= "" || classes /= []
             then innerInlines (spanWith (ident,classes,[]))
             else innerInlines id
-- 
cgit v1.2.3


From aecbf8156eb7c36c4b41de27797e262c23728db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Apr 2021 21:28:48 -0700
Subject: Remove Text.Pandoc.BCP47 module.

[API change]

Use Lang from UnicodeCollation.Lang instead.
This is a richer implementation of BCP 47.
---
 pandoc.cabal                            |   1 -
 src/Text/Pandoc/App.hs                  |   6 +-
 src/Text/Pandoc/BCP47.hs                |  99 -------------
 src/Text/Pandoc/Citeproc.hs             |  10 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs      |  20 +--
 src/Text/Pandoc/Citeproc/Data.hs        |  12 +-
 src/Text/Pandoc/Class/CommonState.hs    |   2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |   4 +-
 src/Text/Pandoc/Readers/BibTeX.hs       |   9 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 241 ++++++++++++++++----------------
 src/Text/Pandoc/Writers/BibTeX.hs       |   2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  48 +++----
 src/Text/Pandoc/Writers/CslJson.hs      |   7 +-
 src/Text/Pandoc/Writers/Docx.hs         |   2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |   8 +-
 src/Text/Pandoc/Writers/ODT.hs          |   7 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |   8 +-
 src/Text/Pandoc/Writers/Shared.hs       |   2 +-
 20 files changed, 198 insertions(+), 294 deletions(-)
 delete mode 100644 src/Text/Pandoc/BCP47.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index b6cbb0d7a..8816767e9 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -601,7 +601,6 @@ library
                    Text.Pandoc.Asciify,
                    Text.Pandoc.Emoji,
                    Text.Pandoc.ImageSize,
-                   Text.Pandoc.BCP47,
                    Text.Pandoc.Class,
                    Text.Pandoc.Citeproc
   other-modules:   Text.Pandoc.App.CommandLineOptions,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6b45e5418..67d3cce7d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
@@ -200,8 +200,8 @@ convertWithOpts opts = do
                     Just f  -> readFileStrict f
 
     case lookupMetaString "lang" (optMetadata opts) of
-           ""      -> setTranslations $ Lang "en" "" "US" []
-           l       -> case parseBCP47 l of
+           ""      -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+           l       -> case parseLang l of
                            Left _   -> report $ InvalidLang l
                            Right l' -> setTranslations l'
 
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
deleted file mode 100644
index 1ecf0bf73..000000000
--- a/src/Text/Pandoc/BCP47.hs
+++ /dev/null
@@ -1,99 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing and rendering BCP47 language identifiers.
--}
-module Text.Pandoc.BCP47 (
-                       getLang
-                     , parseBCP47
-                     , Lang(..)
-                     , renderLang
-                     )
-where
-import Control.Monad (guard)
-import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.DocTemplates (FromContext(..))
-import qualified Data.Text as T
-import qualified Text.Parsec as P
-
--- | Represents BCP 47 language/country code.
-data Lang = Lang{ langLanguage :: T.Text
-                , langScript   :: T.Text
-                , langRegion   :: T.Text
-                , langVariants :: [T.Text] }
-                deriving (Eq, Ord, Show)
-
--- | Render a Lang as BCP 47.
-renderLang :: Lang -> T.Text
-renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
-                    ([langScript lang, langRegion lang] ++ langVariants lang))
-
--- | Parse a BCP 47 string as a Lang.  Currently we parse
--- extensions and private-use fields as "variants," even
--- though officially they aren't.
-parseBCP47 :: T.Text -> Either T.Text Lang
-parseBCP47 lang =
-  case P.parse bcp47 "lang" lang of
-       Right r -> Right r
-       Left e  -> Left $ T.pack $ show e
-  where bcp47 = do
-          language <- pLanguage
-          script <- P.option "" pScript
-          region <- P.option "" pRegion
-          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
-          P.eof
-          return Lang{   langLanguage = language
-                       , langScript = script
-                       , langRegion = region
-                       , langVariants = variants }
-        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
-        pLanguage = do
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toLower $ T.pack cs
-        pScript = P.try $ do
-          P.char '-'
-          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
-          xs <- P.count 3
-                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
-          return $ T.toLower $ T.pack (x:xs)
-        pRegion = P.try $ do
-          P.char '-'
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toUpper $ T.pack cs
-        pVariant = P.try $ do
-          P.char '-'
-          ds <- P.option "" (P.count 1 P.digit)
-          cs <- P.many1 asciiLetter
-          let var = ds ++ cs
-              lv = length var
-          guard $ if null ds
-                     then lv >= 5 && lv <= 8
-                     else lv == 4
-          return $ T.toLower $ T.pack var
-        pExtension = P.try $ do
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          let lcs = length cs
-          guard $ lcs >= 2 && lcs <= 8
-          return $ T.toLower $ T.pack cs
-        pPrivateUse = P.try $ do
-          P.char '-'
-          P.char 'x'
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          guard $ not (null cs) && length cs <= 8
-          let var = "x-" ++ cs
-          return $ T.toLower $ T.pack var
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index af302f782..c9f1806e4 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import qualified Text.Pandoc.BCP47 as BCP47
 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
@@ -630,13 +629,8 @@ removeFinalPeriod ils =
 
 bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
 bcp47LangToIETF bcplang =
-  case BCP47.parseBCP47 bcplang of
+  case parseLang bcplang of
     Left _ -> do
       report $ InvalidLang bcplang
       return Nothing
-    Right lang ->
-      return $ Just
-             $ Lang (BCP47.langLanguage lang)
-                    (if T.null (BCP47.langRegion lang)
-                        then Nothing
-                        else Just (BCP47.langRegion lang))
+    Right lang -> return $ Just lang
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index c0752dadc..510e56f9c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref =
                    [ (", " <>) <$> nameGiven name,
                      nameDroppingParticle name ]
 
-  mblang' = (parseLang <$> getVariableAsText "language") <|> mblang
+  mblang' = case getVariableAsText "language" of
+              Just l  -> either (const Nothing) Just $ parseLang l
+              Nothing -> mblang
 
   titlecase = case mblang' of
-                Just (Lang "en" _) -> titlecase'
+                Just lang | langLanguage lang == "en"
+                                   -> titlecase'
                 Nothing            -> titlecase'
                 _                  ->
                   case variant of
@@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref =
   renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
-defaultLang = Lang "en" (Just "US")
+defaultLang = Lang "en" Nothing (Just "US") [] [] []
 
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
@@ -351,9 +354,7 @@ itemToReference locale variant item = do
   bib item $ do
     let lang = fromMaybe defaultLang $ localeLanguage locale
     modify $ \st -> st{ localeLang = lang,
-                        untitlecase = case lang of
-                                           (Lang "en" _) -> True
-                                           _             -> False }
+                        untitlecase = langLanguage lang == "en" }
 
     id' <- asks identifier
     otherIds <- (Just <$> getRawField "ids")
@@ -711,7 +712,7 @@ itemToReference locale variant item = do
 
 
 bib :: Item -> Bib a -> BibParser a
-bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+bib entry m = fst <$> evalRWST m entry (BibState True defaultLang)
 
 resolveCrossRefs :: Variant -> [Item] -> [Item]
 resolveCrossRefs variant entries =
@@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
-resolveKey' lang@(Lang l _) k =
-  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+resolveKey' lang k =
+  case Map.lookup (langLanguage lang) biblatexStringMap >>=
+        Map.lookup (T.toLower k) of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 40430b0f5..388b9ba62 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ _ _ _ _ = parseLang
-                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
-        ls = T.lines $ TE.decodeUtf8 bs
-     in if length ls > 4
-           then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
-           else id
+    let ls = T.lines $ TE.decodeUtf8 bs
+     in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of
+          Right lang | length ls > 4
+            -> M.insert (langLanguage lang)
+                        (toStringMap $ map (T.splitOn "|") ls)
+          _ -> id
   toStringMap = foldr go' mempty
   go' [term, x, y] = M.insert term (x, y)
   go' _ = id
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 7e1735c2b..0fd094d99 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang)
+import UnicodeCollation.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 293a822a0..76f1fa32b 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError
 toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
 toLang Nothing = return Nothing
 toLang (Just s) =
-  case parseBCP47 s of
+  case parseLang s of
        Left _ -> do
          report $ InvalidLang s
          return Nothing
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 956b9f1f7..b82a81350 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
 readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 851756065..83caf742a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 08e217bdb..b92e6ab57 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import UnicodeCollation.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
@@ -99,133 +99,136 @@ setDefaultLanguage = do
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
index b9ae0c13a..95de6b71f 100644
--- a/src/Text/Pandoc/Writers/BibTeX.hs
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
 writeBibTeX' variant opts (Pandoc meta _) = do
   let mblang = case lookupMetaString "lang" meta of
                  "" -> Nothing
-                 t  -> Just $ parseLang t
+                 t  -> either (const Nothing) Just $ parseLang t
   let refs = case lookupMeta "references" meta of
                Just (MetaList xs) -> mapMaybe metaValueToReference xs
                _ -> []
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3c9975be8..f352c84bc 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47
+import UnicodeCollation.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs
 -- https://tools.ietf.org/html/bcp47#section-2.1
 -- http://wiki.contextgarden.net/Language_Codes
 fromBCP47' :: Maybe Lang -> Maybe Text
-fromBCP47' (Just (Lang "ar" _ "SY" _)     ) = Just "ar-sy"
-fromBCP47' (Just (Lang "ar" _ "IQ" _)     ) = Just "ar-iq"
-fromBCP47' (Just (Lang "ar" _ "JO" _)     ) = Just "ar-jo"
-fromBCP47' (Just (Lang "ar" _ "LB" _)     ) = Just "ar-lb"
-fromBCP47' (Just (Lang "ar" _ "DZ" _)     ) = Just "ar-dz"
-fromBCP47' (Just (Lang "ar" _ "MA" _)     ) = Just "ar-ma"
-fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
-fromBCP47' (Just (Lang "de" _ "DE" _)     ) = Just "de-de"
-fromBCP47' (Just (Lang "de" _ "AT" _)     ) = Just "de-at"
-fromBCP47' (Just (Lang "de" _ "CH" _)     ) = Just "de-ch"
-fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
-fromBCP47' (Just (Lang "en" _ "US" _)     ) = Just "en-us"
-fromBCP47' (Just (Lang "en" _ "GB" _)     ) = Just "en-gb"
-fromBCP47' (Just (Lang "grc"_  _ _)       ) = Just "agr"
-fromBCP47' (Just (Lang "el" _ _ _)        ) = Just "gr"
-fromBCP47' (Just (Lang "eu" _ _ _)        ) = Just "ba"
-fromBCP47' (Just (Lang "he" _ _ _)        ) = Just "il"
-fromBCP47' (Just (Lang "jp" _ _ _)        ) = Just "ja"
-fromBCP47' (Just (Lang "uk" _ _ _)        ) = Just "ua"
-fromBCP47' (Just (Lang "vi" _ _ _)        ) = Just "vn"
-fromBCP47' (Just (Lang "zh" _ _ _)        ) = Just "cn"
-fromBCP47' (Just (Lang l _ _ _)           ) = Just l
-fromBCP47' Nothing                          = Nothing
+fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _))    = Just "deo"
+fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _))    = Just "agr"
+fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _ _ _))           = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _ _ _))           = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _ _ _))           = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _ _ _))           = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _ _ _))           = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _ _ _))           = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _ _ _))           = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _ _ _))           = Just "cn"
+fromBCP47' (Just (Lang l _ _ _ _ _))              = Just l
+fromBCP47' Nothing                                = Nothing
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index a10def95e..395335667 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -34,15 +34,16 @@ import Control.Monad.Identity
 import Citeproc.Locale (getLocale)
 import Citeproc.CslJson
 import Text.Pandoc.Options (WriterOptions)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Aeson.Encode.Pretty         (Config (..), Indent (Spaces),
                                          NumberFormat (Generic),
                                          defConfig, encodePretty')
 
 writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCslJson _opts (Pandoc meta _) = do
-  let lang = maybe (Lang "en" (Just "US")) parseLang
-               (lookupMeta "lang" meta >>= metaValueToText)
+  let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] [])
+               (lookupMeta "lang" meta >>= metaValueToText >>=
+                  either (const Nothing) Just . parseLang)
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 20bcd0324..7781df8e7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import Text.Pandoc.BCP47 (getLang, renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 1c970e6ad..e99bad738 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 871b2692a..437b84120 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
 toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
 toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss")
 toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
 toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
 toPolyglossia (Lang "el" _ _ vars _ _)
@@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
 toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
 toPolyglossia (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars                   = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian")
 toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
+toPolyglossia x                               = (commonFromBcp47 x, "")
 
 -- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
@@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
 toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
+toBabel (Lang "el" _ _ vars _ _)
   | "polyton" `elem` vars               = "polutonikogreek"
 toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
 toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 101b236aa..6fd4cdeb4 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -16,6 +16,7 @@ import Codec.Archive.Zip
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
+import Data.Maybe (fromMaybe)
 import Data.Generics (everywhere', mkT)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
@@ -23,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
-                                   fixDisplayMath)
+                                   fixDisplayMath, getLang)
 import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
                            = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (langRegion lang)
+                           = Attr n (fromMaybe "" $ langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index cf42f2228..6c265090c 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
@@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of
 -- | Convert Pandoc document to string in OpenDocument format.
 writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeOpenDocument opts (Pandoc meta blocks) = do
-  let defLang = Lang "en" "US" "" []
+  let defLang = Lang "en" (Just "US") Nothing [] [] []
   lang <- case lookupMetaString "lang" meta of
             "" -> pure defLang
             s  -> fromMaybe defLang <$> toLang (Just s)
@@ -893,7 +893,7 @@ textStyleAttr m s
                     Map.insert "style:font-name-complex" "Courier New" $ m
     | Language lang <- s
                   = Map.insert "fo:language" (langLanguage lang) .
-                    Map.insert "fo:country" (langRegion lang) $ m
+                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
     | otherwise   = m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
@@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action =
   case lookup "lang" kvs of
        Nothing -> action
        Just l  ->
-         case parseBCP47 l of
+         case parseLang l of
               Right lang -> withTextStyle (Language lang) action
               Left _ -> do
                 report $ InvalidLang l
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fcb47bd5a..a09d18571 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -149,7 +149,7 @@ defField field val (Context m) =
     f _newval oldval = oldval
 
 -- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe Text
+getLang :: WriterOptions -> Meta -> Maybe T.Text
 getLang opts meta =
   case lookupContext "lang" (writerVariables opts) of
         Just s -> Just s
-- 
cgit v1.2.3


From a478a5c4c8753fd0bf272cd540ca197ae146a196 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 17 Apr 2021 11:47:54 -0700
Subject: Update to released unicode-collation, latest citeproc dev version.

Update citeproc test.
---
 cabal.project                           | 11 +----------
 src/Text/Pandoc/App.hs                  |  2 +-
 src/Text/Pandoc/Citeproc/Data.hs        |  2 +-
 src/Text/Pandoc/Class/CommonState.hs    |  2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs      |  2 +-
 src/Text/Pandoc/Writers/Docx.hs         |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ODT.hs          |  2 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |  2 +-
 stack.yaml                              |  5 ++---
 test/command/pandoc-citeproc-320a.md    |  8 ++++----
 16 files changed, 20 insertions(+), 30 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/cabal.project b/cabal.project
index fa17a20a6..77bc8ef88 100644
--- a/cabal.project
+++ b/cabal.project
@@ -5,14 +5,5 @@ flags: +embed_data_files
 source-repository-package
   type: git
   location: https://github.com/jgm/citeproc
-  tag: b42857be658b8f2649e989e061978e304986f853
+  tag: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 
-source-repository-package
-  type: git
-  location: https://github.com/jgm/unicode-collation
-  tag: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
-
--- source-repository-package
---  type: git
---  location: https://github.com/jgm/citeproc
---  tag: d44e24696ab444090d0e63e321c3a573f68b2e74
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 67d3cce7d..4e8c9f2ab 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 388b9ba62..848a83a1e 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import UnicodeCollation.Lang (Lang(..), parseLang)
+import Text.Collate.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 0fd094d99..796a4afd5 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang)
+import Text.Collate.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 76f1fa32b..7559cd7cd 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
+import Text.Collate.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 83caf742a..203dab83c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index b92e6ab57..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import UnicodeCollation.Lang (Lang(..), renderLang)
+import Text.Collate.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index f352c84bc..f14b1d894 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 1f10c9d04..02b141250 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -493,4 +493,4 @@ isSectionAttr DocBook4 ("os",_) = True
 isSectionAttr DocBook4 ("revision",_) = True
 isSectionAttr DocBook4 ("security",_) = True
 isSectionAttr DocBook4 ("vendor",_) = True
-isSectionAttr _ (_,_) = False
\ No newline at end of file
+isSectionAttr _ (_,_) = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 7781df8e7..749ad9a21 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e99bad738..8b1f3df1d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 437b84120..0ba68b74e 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang
     toBabel
   ) where
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 
 
 -- In environments \Arabic instead of \arabic is used
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 6fd4cdeb4..e4eb4fd25 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -24,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 6c265090c..34a3a4aa5 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
diff --git a/stack.yaml b/stack.yaml
index 159bf74b9..7bc33fa43 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -16,11 +16,10 @@ extra-deps:
 - texmath-0.12.2
 - random-1.2.0
 - xml-conduit-1.9.1.1
+- unicode-collation-0.1
 # - citeproc-0.3.0.9
 - git: https://github.com/jgm/citeproc
-  commit: b42857be658b8f2649e989e061978e304986f853
-- git: https://github.com/jgm/unicode-collation
-  commit: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
+  commit: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-17.5
diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md
index 1c3b47de0..79dacfa10 100644
--- a/test/command/pandoc-citeproc-320a.md
+++ b/test/command/pandoc-citeproc-320a.md
@@ -56,6 +56,10 @@ n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
 Uch, Ann. n.d.
 :::
 
+::: {#ref-item4 .csl-entry}
+'Udhrī, Jamīl al-. n.d.
+:::
+
 ::: {#ref-item1 .csl-entry}
 ʾUdhrī, Jamīl al-. n.d.
 :::
@@ -68,10 +72,6 @@ Uch, Ann. n.d.
 \'Udhrī, Jamīl al-. n.d.
 :::
 
-::: {#ref-item4 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
-:::
-
 ::: {#ref-item5 .csl-entry}
 'Udhrī, Jamīl al-. n.d.
 :::
-- 
cgit v1.2.3


From 73d394ca2adff31a384404ae25665b36c7d0bba0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 18 Apr 2021 22:01:12 -0700
Subject: Use MetaInlines not MetaBlocks for multimarkdown metadata fields.

This gives better results in converting to e.g. pandoc markdown.

Ref: <https://groups.google.com/d/msgid/pandoc-discuss/9728d1f4-040e-4392-aa04-148f648a8dfdn%40googlegroups.com>
---
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 test/command/mmd-metadata.md        | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/command/mmd-metadata.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 6c3947a81..4b20e3a8b 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -298,7 +298,7 @@ kvPair allowEmpty = try $ do
           (try $ newline >> lookAhead (blankline <|> nonspaceChar))
   guard $ allowEmpty || not (T.null val)
   let key' = T.concat $ T.words $ T.toLower key
-  let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
+  let val' = MetaInlines $ B.toList $ B.text val
   return (key',val')
 
 parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
diff --git a/test/command/mmd-metadata.md b/test/command/mmd-metadata.md
new file mode 100644
index 000000000..3cda34873
--- /dev/null
+++ b/test/command/mmd-metadata.md
@@ -0,0 +1,20 @@
+```
+% pandoc -f markdown_mmd -t markdown -s
+Title: Blah blah blah
+Author: Doo de Doo
+Base Header Level: 1
+Bibliography: Pubs.bib
+Lang: en-GB
+
+body
+^D
+---
+author: Doo de Doo
+baseheaderlevel: 1
+bibliography: Pubs.bib
+lang: en-GB
+title: Blah blah blah
+---
+
+body
+```
-- 
cgit v1.2.3


From c56d080a253c1364ca55ae97b8d43ed638acf723 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sun, 25 Apr 2021 21:19:07 +0200
Subject: Writers: Recognize custom syntax definitions (#7241)

Languages defined using `--syntax-definition` were not recognized by `languagesByExtension`.
This patch corrects that, allowing the writers to see all custom definitions.

The LaTeX still uses the default syntax map, but that's okay in that context, since
`--syntax-definition` won't create new listings styles.
---
 src/Text/Pandoc/Highlighting.hs    | 10 +++++-----
 src/Text/Pandoc/Readers/LaTeX.hs   |  3 ++-
 src/Text/Pandoc/Writers/Docbook.hs |  7 ++++---
 src/Text/Pandoc/Writers/JATS.hs    | 24 +++++++++++++-----------
 src/Text/Pandoc/Writers/TEI.hs     |  7 ++++---
 5 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 0bb6ed319..62a261e50 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -52,12 +52,12 @@ highlightingStyles =
    ("breezedark", breezeDark),
    ("haddock", haddock)]
 
-languages :: [T.Text]
-languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap]
+languages :: SyntaxMap -> [T.Text]
+languages syntaxmap = [T.toLower (sName s) | s <- M.elems syntaxmap]
 
-languagesByExtension :: T.Text -> [T.Text]
-languagesByExtension ext =
-  [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)]
+languagesByExtension :: SyntaxMap -> T.Text -> [T.Text]
+languagesByExtension syntaxmap ext =
+  [T.toLower (sName s) | s <- syntaxesByExtension syntaxmap (T.unpack ext)]
 
 highlight :: SyntaxMap
           -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 203dab83c..19c257a48 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -32,6 +32,7 @@ import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
+import Skylighting (defaultSyntaxMap)
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Builder as B
@@ -1170,7 +1171,7 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 02b141250..25bd308bf 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -253,17 +253,18 @@ blockToDocbook opts (LineBlock lns) =
   blockToDocbook opts $ linesToPara lns
 blockToDocbook opts (BlockQuote blocks) =
   inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
+blockToDocbook opts (CodeBlock (_,classes,_) str) = return $
   literal ("<programlisting" <> lang <> ">") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>")
     where lang  = if null langs
                      then ""
                      else " language=\"" <> escapeStringForXML (head langs) <>
                           "\""
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToDocbook opts (BulletList lst) = do
   let attribs = [("spacing", "compact") | isTightList lst]
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index b58ff8aef..0bcfa0df4 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -224,19 +224,21 @@ imageMimeType src kvs =
                   (T.drop 1 . T.dropWhile (/='/') <$> mbMT)
   in (maintype, subtype)
 
-languageFor :: [Text] -> Text
-languageFor classes =
+languageFor :: WriterOptions -> [Text] -> Text
+languageFor opts classes =
   case langs of
      (l:_) -> escapeStringForXML l
      []    -> ""
-    where isLang l    = T.toLower l `elem` map T.toLower languages
+    where
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 
-codeAttr :: Attr -> (Text, [(Text, Text)])
-codeAttr (ident,classes,kvs) = (lang, attr)
+codeAttr :: WriterOptions -> Attr -> (Text, [(Text, Text)])
+codeAttr opts (ident,classes,kvs) = (lang, attr)
     where
        attr = [("id", escapeNCName ident) | not (T.null ident)] ++
               [("language",lang) | not (T.null lang)] ++
@@ -244,7 +246,7 @@ codeAttr (ident,classes,kvs) = (lang, attr)
                 "code-version", "executable",
                 "language-version", "orientation",
                     "platforms", "position", "specific-use"]]
-       lang  = languageFor classes
+       lang  = languageFor opts classes
 
 -- | Convert a Pandoc block element to JATS.
 blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text)
@@ -330,9 +332,9 @@ blockToJATS opts (BlockQuote blocks) = do
                     HorizontalRule -> True
                     _              -> False
   inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
-blockToJATS _ (CodeBlock a str) = return $
+blockToJATS opts (CodeBlock a str) = return $
   inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "preformat" else "code"
 blockToJATS _ (BulletList []) = return empty
 blockToJATS opts (BulletList lst) =
@@ -412,9 +414,9 @@ inlineToJATS opts (Quoted SingleQuote lst) = do
 inlineToJATS opts (Quoted DoubleQuote lst) = do
   contents <- inlinesToJATS opts lst
   return $ char '“' <> contents <> char '”'
-inlineToJATS _ (Code a str) =
+inlineToJATS opts (Code a str) =
   return $ inTags False tag attr $ literal (escapeStringForXML str)
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "monospace" else "code"
 inlineToJATS _ il@(RawInline f x)
   | f == "jats" = return $ literal x
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index b926c48a1..18015259d 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -146,16 +146,17 @@ blockToTEI opts (LineBlock lns) =
   blockToTEI opts $ linesToPara lns
 blockToTEI opts (BlockQuote blocks) =
   inTagsIndented "quote" <$> blocksToTEI opts blocks
-blockToTEI _ (CodeBlock (_,classes,_) str) =
+blockToTEI opts (CodeBlock (_,classes,_) str) =
   return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> text "</ab>")
     where lang  = if null langs
                      then ""
                      else escapeStringForXML (head langs)
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToTEI opts (BulletList lst) = do
   let attribs = [("type", "unordered")]
-- 
cgit v1.2.3


From 3a98f7a0c767c3b13c7b358ca75d2bddab7cd4d2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 25 Apr 2021 12:21:00 -0700
Subject: Minor code reformatting.

Also taking this opportunity to note, for the record, that
the commit for #7241 should be marked [API change].
It changes the type of `languagesByExtension` in Highlighting,
adding a parameter for a `SyntaxMap`.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 19c257a48..9ad168293 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1171,7 +1171,8 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap
+                                (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
-- 
cgit v1.2.3


From 80e2e88287f43d88ea92a77779b25e161c81f67b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 28 Apr 2021 23:30:16 -0700
Subject: Smarter smart quotes.

Treat a leading " with no closing " as a left curly quote.
This supports the practice, in fiction, of continuing
paragraphs quoting the same speaker without an end quote.
It also helps with quotes that break over lines in line
blocks.

Closes #7216.
---
 src/Text/Pandoc/Parsing.hs           | 45 +++++++++++++++++++++++-------------
 src/Text/Pandoc/Readers/Markdown.hs  | 18 ++++++++-------
 src/Text/Pandoc/Readers/RST.hs       | 19 +--------------
 src/Text/Pandoc/Readers/TWiki.hs     | 22 +-----------------
 test/Tests/Readers/Markdown.hs       |  2 +-
 test/command/7216.md                 | 19 +++++++++++++++
 test/command/pandoc-citeproc-320a.md | 12 +++++-----
 7 files changed, 67 insertions(+), 70 deletions(-)
 create mode 100644 test/command/7216.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 847fd2e05..2f6189104 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -105,8 +105,9 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              singleQuoteEnd,
                              doubleQuoteStart,
                              doubleQuoteEnd,
-                             ellipses,
                              apostrophe,
+                             doubleCloseQuote,
+                             ellipses,
                              dash,
                              nested,
                              citeKey,
@@ -1398,10 +1399,7 @@ smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
-  choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
+  choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
 quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
        => ParserT s st m Inlines
@@ -1411,16 +1409,22 @@ quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ B.singleQuoted . mconcat
-  <$  singleQuoteStart
-  <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+singleQuoted inlineParser = do
+  singleQuoteStart
+  (B.singleQuoted . mconcat <$>
+    try
+     (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
+   <|> pure "\8217"
+
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
-  <$  doubleQuoteStart
-  <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
+doubleQuoted inlineParser = do
+  doubleQuoteStart
+  (B.doubleQuoted . mconcat <$>
+    try
+     (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)))
+   <|> pure (B.str "\8220")
 
 failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
                      => QuoteContext
@@ -1443,7 +1447,7 @@ singleQuoteStart = do
   guard =<< notAfterString
   try $ do
     charOrRef "'\8216\145"
-    notFollowedBy (oneOf [' ', '\t', '\n'])
+    notFollowedBy (satisfy isSpaceChar)
 
 singleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
@@ -1451,17 +1455,26 @@ singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
   notFollowedBy alphaNum
 
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+doubleQuoteStart :: (HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
+  guard =<< notAfterString
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (oneOf [' ', '\t', '\n'])
+           notFollowedBy (satisfy isSpaceChar)
 
 doubleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
+apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
+
+doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote = B.str "\8221" <$ char '"'
+
 ellipses :: Stream s m Char
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4b20e3a8b..ba8ed147e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2199,25 +2199,27 @@ citation = try $ do
 smart :: PandocMonad m => MarkdownParser m (F Inlines)
 smart = do
   guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [apostrophe, dash, ellipses])
+  doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|>
+    (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses)
 
 singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
+singleQuoted = do
   singleQuoteStart
-  withQuoteContext InSingleQuote $
+  (try (withQuoteContext InSingleQuote $
     fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline singleQuoteEnd
+      many1Till inline singleQuoteEnd))
+    <|> (return (return (B.str "\8217")))
 
 -- doubleQuoted will handle regular double-quoted sections, as well
 -- as dialogues with an open double-quote without a close double-quote
 -- in the same paragraph.
 doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
+doubleQuoted = do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $
+  (try (withQuoteContext InDoubleQuote $
     fmap B.doubleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline doubleQuoteEnd
+      many1Till inline doubleQuoteEnd))
+    <|> (return (return (B.str "\8220")))
 
 toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 514e3b88d..ac4c0b6cb 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1658,21 +1658,4 @@ note = try $ do
       return $ B.note contents
 
 smart :: PandocMonad m => RSTParser m Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote $
-    B.singleQuoted . trimInlines . mconcat <$>
-      many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  withQuoteContext InDoubleQuote $
-    B.doubleQuoted . trimInlines . mconcat <$>
-      many1Till inline doubleQuoteEnd
+smart = smartPunctuation inline
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 484a6c923..c4d7bcc93 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines
 symbol = B.str <$> countChar 1 nonspaceChar
 
 smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [ apostrophe
-           , dash
-           , ellipses
-           ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote
-    (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  withQuoteContext InDoubleQuote (doubleQuoteEnd >>
-   return (B.doubleQuoted $ B.trimInlines contents))
-   <|> return (B.str "\8220" B.<> contents)
+smart = smartPunctuation inline
 
 link :: PandocMonad m => TWParser m B.Inlines
 link = try $ do
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 0930deae6..6e38da21a 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -358,7 +358,7 @@ tests = [ testGroup "inline code"
               para (text "The value of the " <> math "x" <> text "\8217s and the systems\8217 condition.")
           , test markdownSmart "unclosed double quote"
             ("**this should \"be bold**"
-            =?> para (strong "this should \"be bold"))
+            =?> para (strong "this should \8220be bold"))
           ]
         , testGroup "footnotes"
           [ "indent followed by newline and flush-left text" =:
diff --git a/test/command/7216.md b/test/command/7216.md
new file mode 100644
index 000000000..cab3b9689
--- /dev/null
+++ b/test/command/7216.md
@@ -0,0 +1,19 @@
+```
+pandoc -t latex
+"This is some text in quotes. Another paragraph by the same speaker follows. The first paragraph should have no close quote.
+
+"The second paragraph should have open and close quotes."
+
+| "Open quote on this line,
+| Close quote on the next line."
+| "Quotes on the same line."
+^D
+``This is some text in quotes. Another paragraph by the same speaker
+follows. The first paragraph should have no close quote.
+
+``The second paragraph should have open and close quotes.''
+
+``Open quote on this line,\\
+Close quote on the next line.''\\
+``Quotes on the same line.''
+```
diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md
index 79dacfa10..e894a2250 100644
--- a/test/command/pandoc-citeproc-320a.md
+++ b/test/command/pandoc-citeproc-320a.md
@@ -49,17 +49,13 @@ references:
 Foo [@item1; @item2; @item3; @item4; @item5; @item6; @item7; @item8].
 ^D
 Foo (al-ʾUdhrī, n.d.; al-ʿUdhrī, n.d.; al-\'Udhrī, n.d.; al-'Udhrī,
-n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
+n.d.a, n.d.b; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
 
 ::: {#refs .references .csl-bib-body .hanging-indent}
 ::: {#ref-item6 .csl-entry}
 Uch, Ann. n.d.
 :::
 
-::: {#ref-item4 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
-:::
-
 ::: {#ref-item1 .csl-entry}
 ʾUdhrī, Jamīl al-. n.d.
 :::
@@ -72,8 +68,12 @@ Uch, Ann. n.d.
 \'Udhrī, Jamīl al-. n.d.
 :::
 
+::: {#ref-item4 .csl-entry}
+'Udhrī, Jamīl al-. n.d.a.
+:::
+
 ::: {#ref-item5 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
+---------. n.d.b.
 :::
 
 ::: {#ref-item7 .csl-entry}
-- 
cgit v1.2.3


From b6a65445e10d74dfe384763c37438338bd395372 Mon Sep 17 00:00:00 2001
From: mbrackeantidot <65160241+mbrackeantidot@users.noreply.github.com>
Date: Thu, 29 Apr 2021 18:11:44 +0200
Subject: Docx reader: add handling of vml image objects (jgm#4735) (#7257)

They represent images, the same way as other images in vml format.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs |  11 +++++++++--
 test/Tests/Readers/Docx.hs            |   4 ++++
 test/docx/image_vml_as_object.docx    | Bin 0 -> 74199 bytes
 test/docx/image_vml_as_object.native  |   2 ++
 4 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 test/docx/image_vml_as_object.docx
 create mode 100644 test/docx/image_vml_as_object.native

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index f8ed248d7..7325ff300 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -709,7 +709,8 @@ elemToParPart ns element
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
        Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
 elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
@@ -717,9 +718,15 @@ elemToParPart ns element
                   >>= findAttrByName ns "r" "id"
     in
      case drawing of
-       -- Todo: check out title and attr for deprecated format.
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
        Nothing -> throwError WrongElem
+elemToParPart ns element
+  | isElem ns "w" "r" element
+  , Just objectElem <- findChildByName ns "w" "object" element
+  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
 -- Chart
 elemToParPart ns element
   | isElem ns "w" "r" element
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 263e04173..2cce70cc5 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -155,6 +155,10 @@ tests = [ testGroup "document"
             "VML image"
             "docx/image_vml.docx"
             "docx/image_vml.native"
+          , testCompare
+            "VML image as object"
+            "docx/image_vml_as_object.docx"
+            "docx/image_vml_as_object.native"
           , testCompare
             "inline image in links"
             "docx/inline_images.docx"
diff --git a/test/docx/image_vml_as_object.docx b/test/docx/image_vml_as_object.docx
new file mode 100644
index 000000000..7e1e4d2ca
Binary files /dev/null and b/test/docx/image_vml_as_object.docx differ
diff --git a/test/docx/image_vml_as_object.native b/test/docx/image_vml_as_object.native
new file mode 100644
index 000000000..6e689486a
--- /dev/null
+++ b/test/docx/image_vml_as_object.native
@@ -0,0 +1,2 @@
+[Para [Str "Test",Space,Str "with",Space,Str "object",Space,Str "as",Space,Str "image:"]
+,Para [Image ("",[],[]) [] ("media/image1.emf","")]]
-- 
cgit v1.2.3


From 6e45607f9948f45b2e94f54b4825b667ca0d5441 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 1 May 2021 13:17:45 -0700
Subject: Change reader types, allowing better tracking of source positions.

Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.

As a result, the readers had no way of knowing which file
was the source of any particular bit of text.  This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).

Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class.  A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`.  The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).

Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides.  Modified parsers to use a `Sources` as stream
[API change].

The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.

In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.

T.P.Error: showPos, do not print "-" as source name.
---
 .hlint.yaml                               |   1 +
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/App.hs                    |   8 +-
 src/Text/Pandoc/App/Opt.hs                |   2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs        |  14 +-
 src/Text/Pandoc/Error.hs                  |  39 ++-
 src/Text/Pandoc/Logging.hs                |   8 +-
 src/Text/Pandoc/Parsing.hs                | 474 ++++++++++++++++++------------
 src/Text/Pandoc/Readers.hs                |  97 +++---
 src/Text/Pandoc/Readers/BibTeX.hs         |  13 +-
 src/Text/Pandoc/Readers/CSV.hs            |  14 +-
 src/Text/Pandoc/Readers/CommonMark.hs     |  70 +++--
 src/Text/Pandoc/Readers/Creole.hs         |  11 +-
 src/Text/Pandoc/Readers/CslJson.hs        |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs        |  11 +-
 src/Text/Pandoc/Readers/DokuWiki.hs       |  17 +-
 src/Text/Pandoc/Readers/FB2.hs            |   9 +-
 src/Text/Pandoc/Readers/HTML.hs           |  33 ++-
 src/Text/Pandoc/Readers/Haddock.hs        |  12 +-
 src/Text/Pandoc/Readers/Ipynb.hs          |   8 +-
 src/Text/Pandoc/Readers/JATS.hs           |  11 +-
 src/Text/Pandoc/Readers/Jira.hs           |  16 +-
 src/Text/Pandoc/Readers/LaTeX.hs          |  23 +-
 src/Text/Pandoc/Readers/LaTeX/Citation.hs |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |  12 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs    |  15 +-
 src/Text/Pandoc/Readers/Man.hs            |  29 +-
 src/Text/Pandoc/Readers/Markdown.hs       |  49 +--
 src/Text/Pandoc/Readers/MediaWiki.hs      |  13 +-
 src/Text/Pandoc/Readers/Metadata.hs       |  26 +-
 src/Text/Pandoc/Readers/Muse.hs           |  17 +-
 src/Text/Pandoc/Readers/Native.hs         |  12 +-
 src/Text/Pandoc/Readers/OPML.hs           |  13 +-
 src/Text/Pandoc/Readers/Org.hs            |  11 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs    |   3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs    |   2 +-
 src/Text/Pandoc/Readers/RST.hs            |  76 ++---
 src/Text/Pandoc/Readers/Roff.hs           |  34 +--
 src/Text/Pandoc/Readers/TWiki.hs          |  12 +-
 src/Text/Pandoc/Readers/Textile.hs        | 172 +++++------
 src/Text/Pandoc/Readers/TikiWiki.hs       |  12 +-
 src/Text/Pandoc/Readers/Txt2Tags.hs       |  12 +-
 src/Text/Pandoc/Readers/Vimwiki.hs        |  19 +-
 src/Text/Pandoc/Shared.hs                 |   1 +
 src/Text/Pandoc/Sources.hs                | 195 ++++++++++++
 test/Tests/Readers/Markdown.hs            |   4 +-
 46 files changed, 1025 insertions(+), 617 deletions(-)
 create mode 100644 src/Text/Pandoc/Sources.hs

(limited to 'src/Text/Pandoc/Readers')

diff --git a/.hlint.yaml b/.hlint.yaml
index 350794803..ad0f7ddb9 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -9,6 +9,7 @@
 # Ignore some builtin hints
 #
 - ignore: {name: "Avoid lambda"}
+- ignore: {name: "Use bimap"}
 - ignore: {name: "Eta reduce"}
 - ignore: {name: "Evaluate"}
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
diff --git a/pandoc.cabal b/pandoc.cabal
index 8ea3aa681..de7951c54 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -513,6 +513,7 @@ library
                    Text.Pandoc.Options,
                    Text.Pandoc.Extensions,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.Sources,
                    Text.Pandoc.MediaBag,
                    Text.Pandoc.Error,
                    Text.Pandoc.Filter,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 96e4b5f47..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -160,9 +160,11 @@ convertWithOpts opts = do
                                     else optTabStop opts)
 
 
-    let readSources :: [FilePath] -> PandocIO Text
-        readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
-                              mapM readSource srcs
+    let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+        readSources srcs =
+          mapM (\fp -> do
+                   t <- readSource fp
+                   return (if fp == "-" then "" else fp, convertTabs t)) srcs
 
 
     outputSettings <- optToOutputSettings opts
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index c72f63464..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
     runEverything p =
-      runPure (P.readWithM p (def :: P.ParserState) "")
+      runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 510e56f9c..f6833000c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
 -- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant           -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+                 => Variant           -- ^ bibtex or biblatex
                  -> Locale            -- ^ Locale
                  -> (Text -> Bool)    -- ^ Filter on citation ids
-                 -> Text              -- ^ bibtex/biblatex text
+                 -> a                 -- ^ bibtex/biblatex text
                  -> Either ParseError [Reference Inlines]
 readBibtexString variant locale idpred contents = do
   case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" contents of
+           "" (toSources contents) of
           Left err -> Left err
           Right xs -> return xs
 
@@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
 
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
 
 data Item = Item{ identifier :: Text
                 , sourcePos  :: SourcePos
@@ -804,7 +805,7 @@ bibEntries = do
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
 
 bibComment :: BibParser ()
 bibComment = do
@@ -829,6 +830,9 @@ bibString = do
   updateState (\(l,m) -> (l, Map.insert k v m))
   return ()
 
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
 inBraces :: BibParser Text
 inBraces = do
   char '{'
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 8102f04cc..81eb41f85 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
+import Data.List (sortOn)
 import qualified Data.Text as T
+import Data.Ord (Down(..))
 import GHC.Generics (Generic)
 import Network.HTTP.Client (HttpException)
 import System.Exit (ExitCode (..), exitWith)
 import System.IO (stderr)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
 import Text.Printf (printf)
 import Text.Parsec.Error
 import Text.Parsec.Pos hiding (Line)
 import Text.Pandoc.Shared (tshow)
 import Citeproc (CiteprocError, prettyCiteprocError)
 
-type Input = Text
-
 data PandocError = PandocIOError Text IOError
                  | PandocHttpError Text HttpException
                  | PandocShouldNeverHappenError Text
                  | PandocSomeError Text
                  | PandocParseError Text
-                 | PandocParsecError Input ParseError
+                 | PandocParsecError Sources ParseError
                  | PandocMakePDFError Text
                  | PandocOptionError Text
                  | PandocSyntaxMapError Text
@@ -81,22 +82,28 @@ renderError e =
       "Please report this to pandoc's developers: " <> s
     PandocSomeError s -> s
     PandocParseError s -> s
-    PandocParsecError input err' ->
+    PandocParsecError (Sources inputs) err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
             errColumn = sourceColumn errPos
-            ls = T.lines input <> [""]
-            errorInFile = if length ls > errLine - 1
-                            then T.concat ["\n", ls !! (errLine - 1)
-                                          ,"\n", T.replicate (errColumn - 1) " "
-                                          ,"^"]
-                        else ""
-        in  "\nError at " <> tshow  err' <>
-                     -- if error comes from a chunk or included file,
-                     -- then we won't get the right text this way:
-                     if sourceName errPos == "source"
-                        then errorInFile
-                        else ""
+            errFile = sourceName errPos
+            errorInFile =
+              case sortOn (Down . sourceLine . fst)
+                      [ (pos,t)
+                        | (pos,t) <- inputs
+                        , sourceName pos == errFile
+                        , sourceLine pos <= errLine
+                      ] of
+                []  -> ""
+                ((pos,txt):_) ->
+                  let ls = T.lines txt <> [""]
+                      ln = errLine - sourceLine pos
+                   in if length ls > ln - 1
+                         then T.concat ["\n", ls !! (ln - 1)
+                                       ,"\n", T.replicate (errColumn - 1) " "
+                                       ,"^"]
+                         else ""
+        in  "\nError at " <> tshow  err' <> errorInFile
     PandocMakePDFError s -> s
     PandocOptionError s -> s
     PandocSyntaxMapError s -> s
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index efd2188f1..8c7292b69 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -241,9 +241,11 @@ instance ToJSON LogMessage where
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
      show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
-  where sn = if sourceName pos == "source" || sourceName pos == ""
-                then ""
-                else sourceName pos ++ " "
+  where
+    sn' = sourceName pos
+    sn = if sn' == "source" || sn' == "" || sn' == "-"
+            then ""
+            else sn' ++ " "
 
 encodeLogMessages :: [LogMessage] -> BL.ByteString
 encodeLogMessages ms =
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 37ab0adaa..11c4c7a62 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,6 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
 import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
                   isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
@@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -399,11 +460,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
 nonspaceChar = satisfy (not . isSpaceChar)
 
 isSpaceChar :: Char -> Bool
@@ -451,21 +518,24 @@ isSpaceChar '\r' = True
 isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -473,18 +543,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -493,7 +571,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -501,39 +580,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos " chunk"
+  setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -542,7 +623,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -551,7 +632,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -606,7 +687,7 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
@@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
@@ -677,7 +758,7 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
@@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
   many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
             (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -733,7 +814,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -746,7 +827,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -758,30 +839,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -794,19 +882,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -815,7 +903,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -834,37 +922,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
   ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
   ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -876,7 +964,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -885,7 +973,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -896,7 +984,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -919,10 +1007,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -932,11 +1020,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -944,7 +1032,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1050,7 +1139,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
 gridTableHeader True _ = do
   optional blanklines
   dashes <- gridDashedLines '-'
@@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: Monad m
-          => ParserT Text st m a -- ^ parser
-          -> st                  -- ^ initial state
-          -> Text                -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
 testStringWith parser str = UTF8.putStrLn $ tshow $
-                            readWith parser defaultParserState str
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
   choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 singleQuoted inlineParser = do
@@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
      (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
    <|> pure "\8217"
 
-doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 doubleQuoted inlineParser = do
@@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1449,7 +1548,7 @@ singleQuoteStart = do
     charOrRef "'\8216\145"
     void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
@@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
 
 doubleQuoteStart :: (HasLastStrPosition st,
                      HasQuoteContext st m,
-                     Stream s m Char)
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
@@ -1465,21 +1564,21 @@ doubleQuoteStart = do
   try $ do charOrRef "\"\8220\147"
            void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
 
-doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 doubleCloseQuote = B.str "\8221" <$ char '"'
 
-ellipses :: Stream s m Char
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1506,7 +1605,7 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
         => ParserT s st m (Bool, Text)
 citeKey = try $ do
   guard =<< notAfterString
@@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
 insertIncludedFile blocks totoks dirs f =
   runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
 
+-- TODO: replace this with something using addToSources.
 -- | Parse content of include file as future blocks. Circular includes result in
 -- an @PandocParseError@.
 insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
+                    => ParserT Sources st m (Future st Blocks)
                     -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
+                    -> ParserT Sources st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7ae9db34f..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE MonoLocalBinds      #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
@@ -72,6 +73,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.Extensions
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
@@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.JATS (readJATS)
 import Text.Pandoc.Readers.Jira (readJira)
 import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.Muse
 import Text.Pandoc.Readers.Native
@@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+                                ReaderOptions -> a -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
 
 -- | Association list of formats and readers.
 readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native"       , TextReader readNative)
-           ,("json"         , TextReader readJSON)
-           ,("markdown"     , TextReader readMarkdown)
-           ,("markdown_strict" , TextReader readMarkdown)
-           ,("markdown_phpextra" , TextReader readMarkdown)
-           ,("markdown_github" , TextReader readMarkdown)
-           ,("markdown_mmd",  TextReader readMarkdown)
-           ,("commonmark"   , TextReader readCommonMark)
-           ,("commonmark_x" , TextReader readCommonMark)
-           ,("creole"       , TextReader readCreole)
-           ,("dokuwiki"     , TextReader readDokuWiki)
-           ,("gfm"          , TextReader readCommonMark)
-           ,("rst"          , TextReader readRST)
-           ,("mediawiki"    , TextReader readMediaWiki)
-           ,("vimwiki"      , TextReader readVimwiki)
-           ,("docbook"      , TextReader readDocBook)
-           ,("opml"         , TextReader readOPML)
-           ,("org"          , TextReader readOrg)
-           ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
-           ,("html"         , TextReader readHtml)
-           ,("jats"         , TextReader readJATS)
-           ,("jira"         , TextReader readJira)
-           ,("latex"        , TextReader readLaTeX)
-           ,("haddock"      , TextReader readHaddock)
-           ,("twiki"        , TextReader readTWiki)
-           ,("tikiwiki"     , TextReader readTikiWiki)
-           ,("docx"         , ByteStringReader readDocx)
-           ,("odt"          , ByteStringReader readOdt)
-           ,("t2t"          , TextReader readTxt2Tags)
-           ,("epub"         , ByteStringReader readEPUB)
-           ,("muse"         , TextReader readMuse)
-           ,("man"          , TextReader readMan)
-           ,("fb2"          , TextReader readFB2)
-           ,("ipynb"        , TextReader readIpynb)
-           ,("csv"          , TextReader readCSV)
-           ,("csljson"      , TextReader readCslJson)
-           ,("bibtex"       , TextReader readBibTeX)
-           ,("biblatex"     , TextReader readBibLaTeX)
+readers = [("native"       , TextReader readNative)
+          ,("json"         , TextReader readJSON)
+          ,("markdown"     , TextReader readMarkdown)
+          ,("markdown_strict" , TextReader readMarkdown)
+          ,("markdown_phpextra" , TextReader readMarkdown)
+          ,("markdown_github" , TextReader readMarkdown)
+          ,("markdown_mmd",  TextReader readMarkdown)
+          ,("commonmark"   , TextReader readCommonMark)
+          ,("commonmark_x" , TextReader readCommonMark)
+          ,("creole"       , TextReader readCreole)
+          ,("dokuwiki"     , TextReader readDokuWiki)
+          ,("gfm"          , TextReader readCommonMark)
+          ,("rst"          , TextReader readRST)
+          ,("mediawiki"    , TextReader readMediaWiki)
+          ,("vimwiki"      , TextReader readVimwiki)
+          ,("docbook"      , TextReader readDocBook)
+          ,("opml"         , TextReader readOPML)
+          ,("org"          , TextReader readOrg)
+          ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
+          ,("html"         , TextReader readHtml)
+          ,("jats"         , TextReader readJATS)
+          ,("jira"         , TextReader readJira)
+          ,("latex"        , TextReader readLaTeX)
+          ,("haddock"      , TextReader readHaddock)
+          ,("twiki"        , TextReader readTWiki)
+          ,("tikiwiki"     , TextReader readTikiWiki)
+          ,("docx"         , ByteStringReader readDocx)
+          ,("odt"          , ByteStringReader readOdt)
+          ,("t2t"          , TextReader readTxt2Tags)
+          ,("epub"         , ByteStringReader readEPUB)
+          ,("muse"         , TextReader readMuse)
+          ,("man"          , TextReader readMan)
+          ,("fb2"          , TextReader readFB2)
+          ,("ipynb"        , TextReader readIpynb)
+          ,("csv"          , TextReader readCSV)
+          ,("csljson"      , TextReader readCslJson)
+          ,("bibtex"       , TextReader readBibTeX)
+          ,("biblatex"     , TextReader readBibLaTeX)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
@@ -173,9 +176,13 @@ getReader s =
                      return (r, exts)
 
 -- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
-         => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
-  case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readJSON _ s =
+  case eitherDecode' . BL.fromStrict . UTF8.fromText
+                     . sourcesToText . toSources $ s of
        Right doc -> return doc
-       Left e    -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+       Left e    -> throwError $ PandocParseError ("JSON parse error: "
+                                                   <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
   mblangEnv <- lookupEnv "LANG"
   let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
-                            runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
 import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
 readCommonMark opts s
-  | isEnabled Ext_yaml_metadata_block opts
-  , "---" `T.isPrefixOf` s = do
-       let metaValueParser = do
-             inp <- option "" $ take1WhileP (const True)
-             case runIdentity
-                    (commonmarkWith (specFor opts) "metadata value" inp) of
-                    Left _ -> mzero
-                    Right (Cm bls :: Cm () Blocks)
-                             -> return $ return $ B.toMetaValue bls
-       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
-                             pos <- getPosition
-                             return (meta, pos))
-                         defaultParserState "YAML metadata" s
-       case res of
-         Left _ -> readCommonMarkBody opts s
-         Right (meta, pos) -> do
-           let dropLines 0 = id
-               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
-           let metaLines = sourceLine pos - 1
-           let body = T.replicate metaLines "\n" <> dropLines metaLines s
-           Pandoc _ bs <- readCommonMarkBody opts body
-           return $ Pandoc (runF meta defaultParserState) bs
-  | otherwise = readCommonMarkBody opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          pos <- getPosition
+                          return (meta, pos))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, pos) -> do
+        -- strip off metadata section and parse body
+        let body = dropWhile (\t -> tokPos t < pos) toks
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
 
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
   | isEnabled Ext_sourcepos opts =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
   | otherwise =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 
@@ -539,11 +540,15 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents
-              (TL.fromStrict . handleInstructions $ crFilter inp)
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.XML.Light
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
     Left msg -> throwError $ PandocXMLError "" msg
     Right el ->  do
       (bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (
-    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
     htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
   let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _                = False
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
 import Data.List (intersperse)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -52,10 +53,14 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
-            parseXMLContents (TL.fromStrict $ crFilter inp)
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -641,7 +642,7 @@ opt = do
   parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 -- block elements:
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
       runParserT (mconcat <$> many inline) st "bracketed option" toks
     case parsed of
       Right result -> return result
-      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
   , untokenize
   , untoken
   , totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
 
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                void $ count (T.length (untokenize toks')) anyChar
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
 >   let t = T.pack s in untokenize (tokenize "random" t) == t
 -}
 
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
    Copyright   : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
 import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
 
 type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
         setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -146,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1390,7 +1391,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
     s <- rawLaTeXInline
     return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
 import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text st m (Future st Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
 yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text st m (Future st [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
 normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                   => ParserT Text st m (Future st MetaValue)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text st m (Future st MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
   | otherwise = Nothing
 
 yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                => ParserT Text st m (Future st MetaValue)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text st m (Future st MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
 yamlMap :: (PandocMonad m, HasLastStrPosition st)
-        => ParserT Text st m (Future st MetaValue)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text st m (Future st (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
 
 -- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
 yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st MetaValue)
-              -> ParserT Text st m (Future st Meta)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
 yamlMetaBlock parser = try $ do
   string "---"
   blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
   optional blanklines
   yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
 
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
@@ -46,10 +47,14 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
                      Left msg -> throwError $ PandocXMLError "" msg
                      Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -456,7 +463,6 @@ includeDirective top fields body = do
   let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
   let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
   oldPos <- getPosition
-  oldInput <- getInput
   containers <- stateContainers <$> getState
   when (f `elem` containers) $
     throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
        Nothing   -> case lookup "literal" fields of
                          Just _  -> return $ B.rawBlock "rst" contents'
                          Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
+                           addToSources (initialPos (T.unpack f))
+                              (contents' <> "\n")
                            updateState $ \s -> s{ stateContainers =
                                          tail $ stateContainers s }
-                           return bs
+                           return mempty
 
 
 --
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1250,13 +1254,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
 attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
                      (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index e389c1727..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
                        (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                        <> go (T.drop 1 s2)
 
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
 -- | Strip out DOS line endings.
 crFilter :: T.Text -> T.Text
 crFilter = T.filter (/= '\r')
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Sources
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+  ( Sources(..)
+  , ToSources(..)
+  , UpdateSourcePos(..)
+  , sourcesToText
+  , initialSourceName
+  , addToSources
+  , ensureFinalNewlines
+  , addToInput
+  , satisfy
+  , oneOf
+  , noneOf
+  , anyChar
+  , char
+  , string
+  , newline
+  , space
+  , spaces
+  , letter
+  , digit
+  , hexDigit
+  , alphaNum
+  )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions.  It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+  deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+  uncons (Sources []) = return Nothing
+  uncons (Sources ((pos,t):rest)) =
+    case T.uncons t of
+      Nothing -> uncons (Sources rest)
+      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+  toSources :: a -> Sources
+
+instance ToSources Text where
+  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+  toSources = Sources
+            . map (\(fp,t) ->
+                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+  toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+  curpos <- P.getPosition
+  Sources xs <- P.getInput
+  let xs' = case xs of
+               [] -> []
+               ((_,t'):rest) -> (curpos,t'):rest
+  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+                    -> Sources
+                    -> Sources
+ensureFinalNewlines n (Sources xs) =
+  case NonEmpty.nonEmpty xs of
+    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+    Just lst ->
+      case NonEmpty.last lst of
+        (spos, t) ->
+          case T.length (T.takeWhileEnd (=='\n') t) of
+            len | len >= n -> Sources xs
+                | otherwise -> Sources (NonEmpty.init lst ++
+                                        [(spos,
+                                          t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+  updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+   updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+   updateSourcePos pos c sources =
+     case sources of
+       Sources [] -> updatePosChar pos c
+       Sources ((_,t):(pos',_):_)
+         | T.null t  -> pos'
+       Sources _ ->
+           case c of
+             '\n' -> incSourceLine (setSourceColumn pos 1) 1
+             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+             _    -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+  Sources xs <- P.getInput
+  case xs of
+    [] -> P.setInput $ Sources [(initialPos "",t)]
+    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+  matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+     => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+hexDigit = satisfy isHexDigit
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 6e38da21a..f055ab197 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
         , testGroup "lhs"
           [ test (purely $ readMarkdown def{ readerExtensions = enableExtension
                        Ext_literate_haskell pandocExtensions })
-              "inverse bird tracks and html" $
-              "> a\n\n< b\n\n<div>\n"
+              "inverse bird tracks and html"
+              $ ("> a\n\n< b\n\n<div>\n" :: Text)
               =?> codeBlockWith ("",["haskell","literate"],[]) "a"
                   <>
                   codeBlockWith ("",["haskell"],[]) "b"
-- 
cgit v1.2.3


From 05ea507bd75e0bb4bbb8f25cad5fa2f02e4f6796 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 15:26:11 -0600
Subject: T.P.Parsing: improve include file functions.

Remove old `insertIncludedFileF`. [API change]
Give `insertIncludedFile` a more general type, allowing it
to be used where `insertIncludedFileF` was.
---
 src/Text/Pandoc/Parsing.hs             | 61 +++++++++++++++++-----------------
 src/Text/Pandoc/Readers/Org/Blocks.hs  |  3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs |  3 +-
 3 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 11c4c7a62..cbe9993c6 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -121,7 +121,6 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              (<+?>),
                              extractIdClass,
                              insertIncludedFile,
-                             insertIncludedFileF,
                              -- * Re-exports from Text.Parsec
                              Stream,
                              runParser,
@@ -1638,12 +1637,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
     cls'   = maybe cls T.words $ lookup "class" kvs
     kvs'   = filter (\(k,_) -> k /= "id" || k /= "class") kvs
 
-insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT a st m (mf Blocks)
-                    -> (Text -> a)
-                    -> [FilePath] -> FilePath
-                    -> ParserT a st m (mf Blocks)
-insertIncludedFile' blocks totoks dirs f = do
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT a st m b -- ^ parser to apply
+                   -> (Text -> a) -- ^ convert Text to stream type
+                   -> [FilePath]  -- ^ search path (directories)
+                   -> FilePath    -- ^ path of file to include
+                   -> Maybe Int   -- ^ start line (negative counts from end)
+                   -> Maybe Int   -- ^ end line (negative counts from end)
+                   -> ParserT a st m b
+insertIncludedFile parser toStream dirs f mbstartline mbendline = do
   oldPos <- getPosition
   oldInput <- getInput
   containers <- getIncludeFiles <$> getState
@@ -1652,33 +1654,32 @@ insertIncludedFile' blocks totoks dirs f = do
   updateState $ addIncludeFile $ T.pack f
   mbcontents <- readFileFromDirs dirs f
   contents <- case mbcontents of
-                   Just s -> return s
+                   Just s -> return $ exciseLines mbstartline mbendline s
                    Nothing -> do
                      report $ CouldNotLoadIncludeFile (T.pack f) oldPos
                      return ""
-  setPosition $ newPos f 1 1
-  setInput $ totoks contents
-  bs <- blocks
+  setInput $ toStream contents
+  setPosition $ newPos f (fromMaybe 1 mbstartline) 1
+  result <- parser
   setInput oldInput
   setPosition oldPos
   updateState dropLatestIncludeFile
-  return bs
+  return result
+
+exciseLines :: Maybe Int -> Maybe Int -> Text -> Text
+exciseLines Nothing Nothing t = t
+exciseLines mbstartline mbendline t =
+  T.unlines $ take (endline' - (startline' - 1))
+            $ drop (startline' - 1) contentLines
+ where
+  contentLines = T.lines t
+  numLines = length contentLines
+  startline' = case mbstartline of
+                 Nothing -> 1
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
+  endline' = case mbendline of
+                 Nothing -> numLines
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
 
--- | Parse content of include file as blocks. Circular includes result in an
--- @PandocParseError@.
-insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
-                   => ParserT [a] st m Blocks
-                   -> (Text -> [a])
-                   -> [FilePath] -> FilePath
-                   -> ParserT [a] st m Blocks
-insertIncludedFile blocks totoks dirs f =
-  runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-
--- TODO: replace this with something using addToSources.
--- | Parse content of include file as future blocks. Circular includes result in
--- an @PandocParseError@.
-insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Sources st m (Future st Blocks)
-                    -> [FilePath] -> FilePath
-                    -> ParserT Sources st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 883434cdc..5e15c2c36 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -44,6 +44,7 @@ import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
+import Text.Pandoc.Sources (ToSources(..))
 
 --
 -- parsing blocks
@@ -527,7 +528,7 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFileF blocksParser ["."] filename
+  insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index c7ea02815..701bf3398 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , ellipses
   , citeKey
   , gridTableWith
-  , insertIncludedFileF
-  -- * Re-exports from Text.Pandoc.Parsec
+  , insertIncludedFile
   , runParser
   , runParserT
   , getInput
-- 
cgit v1.2.3


From 41a3ac9da99c2701fa7e6adbc85da91f191620f4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 16:26:11 -0600
Subject: RST reader: use `insertIncludedFile` from T.P.Parsing...

instead of reproducing much of its code.
---
 src/Text/Pandoc/Readers/RST.hs | 94 ++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 58 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a3fcf028c..bb70b2620 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -27,8 +27,7 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getTimestamp)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -453,59 +452,38 @@ encoding
 -}
 
 includeDirective :: PandocMonad m
-                 => Text -> [(Text, Text)] -> Text
+                 => Text
+                 -> [(Text, Text)]
+                 -> Text
                  -> RSTParser m Blocks
 includeDirective top fields body = do
-  let f = trim top
-  guard $ not (T.null f)
+  let f = T.unpack $ trim top
+  guard $ not $ null f
   guard $ T.null (trim body)
-  -- options
-  let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
-  let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
-  oldPos <- getPosition
-  containers <- stateContainers <$> getState
-  when (f `elem` containers) $
-    throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
-  updateState $ \s -> s{ stateContainers = f : stateContainers s }
-  mbContents <- readFileFromDirs ["."] $ T.unpack f
-  contentLines <- case mbContents of
-                       Just s -> return $ T.lines s
-                       Nothing -> do
-                         logMessage $ CouldNotLoadIncludeFile f oldPos
-                         return []
-  let numLines = length contentLines
-  let startLine' = case startLine of
-                        Nothing            -> 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let endLine' = case endLine of
-                        Nothing            -> numLines + 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let contentLines' =   drop (startLine' - 1)
-                      $ take (endLine' - 1) contentLines
-  let contentLines'' = (case trim <$> lookup "end-before" fields of
-                             Just patt -> takeWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) .
-                       (case trim <$> lookup "start-after" fields of
-                             Just patt -> drop 1 .
-                                            dropWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) $ contentLines'
-  let contents' = T.unlines contentLines''
-  case lookup "code" fields of
-       Just lang -> do
-         let classes =  maybe [] T.words (lookup "class" fields)
-         let ident = maybe "" trimr $ lookup "name" fields
-         codeblock ident classes fields (trimr lang) contents' False
-       Nothing   -> case lookup "literal" fields of
-                         Just _  -> return $ B.rawBlock "rst" contents'
-                         Nothing -> do
-                           addToSources (initialPos (T.unpack f))
-                              (contents' <> "\n")
-                           updateState $ \s -> s{ stateContainers =
-                                         tail $ stateContainers s }
-                           return mempty
-
+  let startLine = lookup "start-line" fields >>= safeRead
+  let endLine = lookup "end-line" fields >>= safeRead
+  let classes =  maybe [] T.words (lookup "class" fields)
+  let ident = maybe "" trimr $ lookup "name" fields
+  let parser =
+       case lookup "code" fields of
+         Just lang ->
+           (codeblock ident classes fields (trimr lang) False
+            . sourcesToText) <$> getInput
+         Nothing   ->
+           case lookup "literal" fields of
+             Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
+             Nothing -> parseBlocks
+  let selectLines =
+        (case trim <$> lookup "end-before" fields of
+                         Just patt -> takeWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id) .
+        (case trim <$> lookup "start-after" fields of
+                         Just patt -> drop 1 .
+                                        dropWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id)
+  let toStream t =
+        toSources [(f, T.unlines . selectLines . T.lines $ t)]
+  insertIncludedFile parser toStream ["."] f startLine endLine
 
 --
 -- list blocks
@@ -734,8 +712,8 @@ directive' = do
                                      ""   -> stateRstHighlight def
                                      lang -> Just lang })
         x | x == "code" || x == "code-block" || x == "sourcecode" ->
-          codeblock name classes (map (second trimr) fields)
-             (trim top) body True
+          return $ codeblock name classes (map (second trimr) fields)
+             (trim top) True body
         "aafig" -> do
           let attribs = (name, ["aafig"], map (second trimr) fields)
           return $ B.codeBlockWith attribs $ stripTrailingNewlines body
@@ -1021,10 +999,10 @@ toChunks = dropWhile T.null
                           then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
                           else s
 
-codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool
-          -> RSTParser m Blocks
-codeblock ident classes fields lang body rmTrailingNewlines =
-  return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
+codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text
+          -> Blocks
+codeblock ident classes fields lang rmTrailingNewlines body =
+  B.codeBlockWith attribs $ stripTrailingNewlines' body
     where stripTrailingNewlines' = if rmTrailingNewlines
                                      then stripTrailingNewlines
                                      else id
-- 
cgit v1.2.3


From b2398cd74744ce34fa10cc34f4051555f21feb2c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 18:42:42 -0600
Subject: Org reader: Resolve org includes relative to ...

...the directory containing the file containing the
INCLUDE directive.  Closes #5501.
---
 src/Text/Pandoc/Readers/Org/Blocks.hs  | 6 ++++--
 src/Text/Pandoc/Readers/Org/Parsing.hs | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 5e15c2c36..f18d2f9a7 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -40,7 +40,7 @@ import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
 import Data.List.NonEmpty (nonEmpty)
-
+import System.FilePath
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
@@ -528,7 +528,9 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile blocksParser toSources
+                     [currentDir] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 701bf3398..f0949e205 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -99,6 +99,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , getState
   , updateState
   , SourcePos
+  , sourceName
   , getPosition
   ) where
 
-- 
cgit v1.2.3


From 2a2e08d82335d45c982f4acf14b62130365f2c8e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 18:50:41 -0600
Subject: RST reader: seek include files in the directory...

...of the file containing the include directive, as
RST requires.

Closes #6632.
---
 src/Text/Pandoc/Readers/RST.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index bb70b2620..35292d949 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,6 +38,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Time.Format
+import System.FilePath (takeDirectory)
 
 -- TODO:
 -- [ ] .. parsed-literal
@@ -483,7 +484,8 @@ includeDirective top fields body = do
                          Nothing   -> id)
   let toStream t =
         toSources [(f, T.unlines . selectLines . T.lines $ t)]
-  insertIncludedFile parser toStream ["."] f startLine endLine
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile parser toStream [currentDir] f startLine endLine
 
 --
 -- list blocks
-- 
cgit v1.2.3


From 46309319ef9eb66ea3598073df4174fb40b1b0f8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 12 May 2021 06:01:13 -0600
Subject: Fix source position reporting for YAML bibliographies.

Closes #7273.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 --
 src/Text/Pandoc/Readers/Metadata.hs | 8 ++++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 69dd51bc4..2d20ff018 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -97,12 +97,10 @@ yamlToRefs :: PandocMonad m
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
-        oldPos <- getPosition
         case mbfp of
           Nothing -> return ()
           Just fp -> setPosition $ initialPos fp
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
-        setPosition oldPos
         return $ runF refs defaultParserState
   parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index bbcfe62ea..45eddf25a 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -98,8 +98,12 @@ yamlBsToRefs pMetaValue idpred bstr =
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
        Right _  -> Prelude.fail "expecting YAML object"
-       Left (_pos, err')
-                -> Prelude.fail err'
+       Left (yamlpos, err')
+                -> do pos <- getPosition
+                      setPosition $ incSourceLine
+                            (setSourceColumn pos (YE.posColumn yamlpos))
+                            (YE.posLine yamlpos - 1)
+                      Prelude.fail err'
 
 
 nodeToKey :: YAML.Node YE.Pos -> Maybe Text
-- 
cgit v1.2.3


From 3f09f53459b877f53072efbf57dec21fa37280b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 13 May 2021 21:11:52 -0700
Subject: Implement curly-brace syntax for Markdown citation keys.

The change provides a way to use citation keys that contain
special characters not usable with the standard citation
key syntax.  Example: `@{foo_bar{x}'}` for the key `foo_bar{x}`.
Closes #6026.

The change requires adding a new parameter to the `citeKey`
parser from Text.Pandoc.Parsing [API change].

Markdown reader: recognize @{..} syntax for citatinos.

Markdown writer:  use @{..} syntax for citations when needed.

Update manual with curly-brace syntax for citations.

Closes #6026.
---
 MANUAL.txt                                 | 15 ++++++++++-----
 src/Text/Pandoc/Parsing.hs                 | 18 +++++++++++++-----
 src/Text/Pandoc/Readers/Markdown.hs        |  6 +++---
 src/Text/Pandoc/Readers/Org/Inlines.hs     |  8 ++++----
 src/Text/Pandoc/Writers/Markdown/Inline.hs |  9 +++++++--
 test/command/6026.md                       | 19 +++++++++++++++++++
 6 files changed, 56 insertions(+), 19 deletions(-)
 create mode 100644 test/command/6026.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 75fe599e2..6f06d1e8a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -4949,12 +4949,15 @@ Inline and regular footnotes may be mixed freely.
 #### Extension: `citations` ####
 
 Markdown citations go inside square brackets and are separated
-by semicolons.  Each citation must have a key, composed of '@' +
-the citation identifier from the database, and may optionally
-have a prefix, a locator, and a suffix.  The citation key must
-begin with a letter, digit, or `_`, and may contain
+by semicolons.  Each citation must have a key and may optionally
+have a prefix, a locator, and a suffix.  The citation key
+consists of `@` plus the citation identifier, possibly
+enclosed in curly braces.  If the identifier starts
+with a letter, digit, or `_`, followed by zero or more
 alphanumerics, `_`, and internal punctuation characters
-(`:.#$%&-+?<>~/`).  Here are some examples:
+(`:.#$%&-+?<>~/`), then the curly braces may be omitted.
+Identifiers may not contain whitespace characters or unbalanced
+curly braces.  Here are some examples:
 
     Blah blah [see @doe99, pp. 33-35; also @smith04, chap. 1].
 
@@ -4962,6 +4965,8 @@ alphanumerics, `_`, and internal punctuation characters
 
     Blah blah [@smith04; @doe99].
 
+    Blah blah [@{https://example.com/bib?name=foobar&date=2000}, p. 33].
+
 `pandoc` detects locator terms in the [CSL locale files].
 Either abbreviated or unabbreviated forms are accepted. In the `en-US`
 locale, locator terms can be written in either singular or plural forms,
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index cbe9993c6..0bb794ba1 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1605,19 +1605,27 @@ nested p = do
   return res
 
 citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
-        => ParserT s st m (Bool, Text)
-citeKey = try $ do
+        => Bool -- ^ If True, allow expanded @{..} syntax.
+        -> ParserT s st m (Bool, Text)
+citeKey allowBraced = try $ do
   guard =<< notAfterString
   suppress_author <- option False (True <$ char '-')
   char '@'
+  key <- simpleCiteIdentifier
+        <|> if allowBraced
+               then charsInBalanced '{' '}' (satisfy (not . isSpace))
+               else mzero
+  return (suppress_author, key)
+
+simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m Text
+simpleCiteIdentifier = do
   firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
   let regchar = satisfy (\c -> isAlphaNum c || c == '_')
   let internal p = try $ p <* lookAhead regchar
   rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
                  try (oneOf ":/" <* lookAhead (char '/'))
-  let key = firstChar:rest
-  return (suppress_author, T.pack key)
-
+  return $ T.pack $ firstChar:rest
 
 token :: (Stream s m t)
       => (t -> Text)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2d20ff018..34f16ab4e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2094,7 +2094,7 @@ cite = do
 
 textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
 textualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey True
   -- If this is a reference to an earlier example list item,
   -- then don't parse it as a citation.  If the example list
   -- item comes later, we'll parse it here and figure out in
@@ -2174,7 +2174,7 @@ prefix = trimInlinesF . mconcat <$>
   manyTill inline (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
-                   citeKey
+                   citeKey True
                    return ']'))
 
 citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@@ -2183,7 +2183,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
 citation :: PandocMonad m => MarkdownParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey True
   suff <- suffix
   noteNum <- stateNoteNumber <$> getState
   return $ do
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 054f2611a..6862dd71e 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -263,7 +263,7 @@ berkeleyCitationList = try $ do
  where
    citationListPart :: PandocMonad m => OrgParser m (F Inlines)
    citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
-     notFollowedBy' citeKey
+     notFollowedBy' $ citeKey False
      notFollowedBy (oneOf ";]")
      inline
 
@@ -278,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
 
 berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
 berkeleyTextualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey False
   returnF . return $ Citation
     { citationId      = key
     , citationPrefix  = mempty
@@ -351,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
 citation :: PandocMonad m => OrgParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey False
   suff <- suffix
   return $ do
     x <- pref
@@ -368,7 +368,7 @@ citation = try $ do
       }
  where
    prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+            manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
    suffix = try $ do
      hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
      skipSpaces
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index e35e1a0b9..2062050e4 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -491,11 +491,16 @@ inlineToMarkdown opts (Cite (c:cs) lst)
            rest <- mapM convertOne cs
            let inbr = suffs <+> joincits rest
                br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
-           return $ literal ("@" <> citationId c) <+> br
+           return $ literal ("@" <> maybeInBraces (citationId c)) <+> br
          else do
            cits <- mapM convertOne (c:cs)
            return $ literal "[" <> joincits cits <> literal "]"
   where
+        maybeInBraces key =
+          case readWith (citeKey False >> spaces >> eof)
+                 defaultParserState ("@" <> key) of
+            Left _  -> "{" <> key <> "}"
+            Right _ -> key
         joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
         convertOne Citation { citationId      = k
                             , citationPrefix  = pinlines
@@ -504,7 +509,7 @@ inlineToMarkdown opts (Cite (c:cs) lst)
                                = do
            pdoc <- inlineListToMarkdown opts pinlines
            sdoc <- inlineListToMarkdown opts sinlines
-           let k' = literal (modekey m <> "@" <> k)
+           let k' = literal (modekey m <> "@" <> maybeInBraces k)
                r = case sinlines of
                         Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
                         _                                         -> k' <+> sdoc
diff --git a/test/command/6026.md b/test/command/6026.md
new file mode 100644
index 000000000..5e18a5f42
--- /dev/null
+++ b/test/command/6026.md
@@ -0,0 +1,19 @@
+```
+% pandoc -t native
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+^D
+[Para [Cite [Citation {citationId = "https://openreview.net/forum?id=HkwoSDPgg", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "@https://openreview.net/forum?id=HkwoSDPgg"]]
+,Para [Cite [Citation {citationId = "https://openreview.net/forum?id", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "@https://openreview.net/forum?id"],Str "=HkwoSDPgg"]]
+```
+```
+% pandoc -t markdown
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+^D
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+```
-- 
cgit v1.2.3


From 875f8f36545d1c21fa3d29c52c40517a667f2574 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 11:58:58 +0200
Subject: HTML reader: don't fail on unmatched closing "script" tag.

Prevent the reader from crashing if the HTML input contains an unmatched
closing `</script>` tag.

Fixes: #7282
---
 src/Text/Pandoc/Readers/HTML.hs | 16 +++++++++-------
 test/command/7282.md            |  7 +++++++
 2 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 test/command/7282.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f5c8a2277..0a9d67e35 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -942,13 +942,15 @@ getTagName (TagClose t)  = Just t
 getTagName _             = Nothing
 
 isInlineTag :: Tag Text -> Bool
-isInlineTag t =
-  isCommentTag t ||
-  case getTagName t of
-    Nothing  -> False
-    Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
-    Just x   -> x `Set.notMember` blockTags ||
-                T.take 1 x == "?" -- processing instr.
+isInlineTag t = isCommentTag t || case t of
+  TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+  TagClose "script"  -> True
+  TagOpen name _     -> isInlineTagName name
+  TagClose name      -> isInlineTagName name
+  _                  -> False
+ where isInlineTagName x =
+         x `Set.notMember` blockTags ||
+         T.take 1 x == "?" -- processing instr.
 
 isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
diff --git a/test/command/7282.md b/test/command/7282.md
new file mode 100644
index 000000000..4ed6b5b3e
--- /dev/null
+++ b/test/command/7282.md
@@ -0,0 +1,7 @@
+Don't crash on unmatched closing tag.
+```
+% pandoc -f html -t native
+</script>
+^D
+[]
+```
-- 
cgit v1.2.3


From 013e4a3164115bf84f3e1964f21b7cd7f020b86a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 21:31:24 +0200
Subject: HTML reader: keep h1 tags as normal headers (#7274)

The tags `<title>` and `<h1 class="title">` often contain the same
information, so the latter was dropped from the document. However, as
this can lead to loss of information, the heading is now always
retained.

Use `--shift-heading-level-by=-1` to turn the `<h1>` into the document
title, or a filter to restore the previous behavior.

Closes: #2293
---
 src/Text/Pandoc/Readers/HTML.hs | 6 +-----
 test/html-reader.native         | 3 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0a9d67e35..b305de7b5 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -499,17 +499,13 @@ pHeader = try $ do
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let attr = toStringAttr attr'
-  let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
-                                               [("class","title")]
   level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] T.words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   attr'' <- registerHeader (ident, classes, keyvals) contents
-  return $ if bodyTitle
-              then mempty  -- skip a representation of the title in the body
-              else B.headerWith attr'' level contents
+  return $ B.headerWith attr'' level contents
 
 pHrule :: PandocMonad m => TagParser m Blocks
 pHrule = do
diff --git a/test/html-reader.native b/test/html-reader.native
index 04ec55d1e..880561a93 100644
--- a/test/html-reader.native
+++ b/test/html-reader.native
@@ -1,5 +1,6 @@
 Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
-[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
+[Header 1 ("pandoc-test-suite",["title"],[]) [Str "Pandoc",Space,Str "Test",Space,Str "Suite"]
+,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,HorizontalRule
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link ("",[],[]) [Str "embedded",Space,Str "link"] ("/url","")]
-- 
cgit v1.2.3


From 0794862aacdb5d28616dadc3d681a63c36251212 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 15 May 2021 16:36:13 +0200
Subject: HTML writer: parse `<header>` as a Div

HTML5 `<header>` elements are treated like `<div>` elements.
---
 src/Text/Pandoc/Readers/HTML.hs |  2 ++
 test/Tests/Readers/HTML.hs      | 14 +++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b305de7b5..0a9e4addf 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -205,6 +205,7 @@ block = ((do
           | otherwise
           -> pDiv
         "section" -> pDiv
+        "header" -> pDiv
         "main" -> pDiv
         "figure" -> pFigure
         "iframe" -> pIframe
@@ -404,6 +405,7 @@ pLineBlock = try $ do
 isDivLike :: Text -> Bool
 isDivLike "div"     = True
 isDivLike "section" = True
+isDivLike "header"  = True
 isDivLike "main"    = True
 isDivLike _         = False
 
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index f23af2cb1..7f5849991 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -99,11 +99,15 @@ tests = [ testGroup "base tag"
             plain (codeWith ("",["sample"],[]) "Answer is 42")
           ]
         , testGroup "var"
-        [
-          test html "inline var block" $
-          "<var>result</var>" =?>
-          plain (codeWith ("",["variable"],[]) "result")
-        ]
+          [ test html "inline var block" $
+            "<var>result</var>" =?>
+            plain (codeWith ("",["variable"],[]) "result")
+          ]
+        , testGroup "header"
+          [ test htmlNativeDivs "<header> is parsed as a div" $
+            "<header id=\"title\">Title</header>" =?>
+            divWith ("title", mempty, mempty) (plain "Title")
+          ]
         , askOption $ \(QuickCheckTests numtests) ->
             testProperty "Round trip" $
               withMaxSuccess (if QuickCheckTests numtests == defaultValue
-- 
cgit v1.2.3


From 1843a8793a9043a45c8c427b06f100461889d7ef Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 17 May 2021 18:08:02 +0200
Subject: HTML writer: keep attributes from code nested below pre tag.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a code block is defined with `<pre><code
class="language-x">…</code></pre>`, where the `<pre>` element has no
attributes, then the attributes from the `<code>` element are used
instead. Any leading `language-` prefix is dropped in the code's *class*
attribute are dropped to improve syntax highlighting.

Closes: #7221
---
 src/Text/Pandoc/Readers/HTML.hs | 13 ++++++++++++-
 test/Tests/Readers/HTML.hs      | 11 +++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0a9e4addf..fc4575f2d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -557,7 +557,18 @@ pFigure = try $ do
 pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
-  let attr = toAttr attr'
+  -- if the `pre` has no attributes, try if it is followed by a `code`
+  -- element and use those attributes if possible.
+  attr <- case attr' of
+    _:_ -> pure (toAttr attr')
+    []  -> option nullAttr $ do
+      TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+      pure $ toAttr
+        [ (k, v') | (k, v) <- codeAttr
+                    -- strip language from class
+                  , let v' = if k == "class"
+                             then fromMaybe v (T.stripPrefix "language-" v)
+                             else v ]
   contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index 7f5849991..9bf567194 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -108,6 +108,17 @@ tests = [ testGroup "base tag"
             "<header id=\"title\">Title</header>" =?>
             divWith ("title", mempty, mempty) (plain "Title")
           ]
+        , testGroup "code block"
+          [ test html "attributes in pre > code element" $
+            "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>"
+            =?>
+            codeBlockWith ("a", ["python"], []) "print('hi')"
+
+          , test html "attributes in pre take precendence" $
+            "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>"
+            =?>
+            codeBlockWith ("c", [], []) "print('hi mom!')"
+          ]
         , askOption $ \(QuickCheckTests numtests) ->
             testProperty "Round trip" $
               withMaxSuccess (if QuickCheckTests numtests == defaultValue
-- 
cgit v1.2.3


From 5736b331d8ecaa12cc3e2712211ada37c665a93a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 May 2021 16:14:49 -0700
Subject: LaTeX reader: better support for `\xspace`.

Previously we only supported it in inline contexts; now
we support it in all contexts, including math.

Partially addresses #7299.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 12 ------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 21 +++++++++++++++++++--
 test/command/4442.md                     |  2 +-
 test/command/7299.md                     | 23 +++++++++++++++++++++++
 4 files changed, 43 insertions(+), 15 deletions(-)
 create mode 100644 test/command/7299.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f90d562ae..2ace18d1b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -232,16 +232,6 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -417,8 +407,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 35ce3509d..b6804a825 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -464,7 +464,7 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st) $
     getInput >>= doMacros' 1 >>= setInput
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
@@ -526,7 +526,7 @@ doMacros' n inp =
         $ throwError $ PandocMacroLoop name
       macros <- sMacros <$> getState
       case M.lookup name macros of
-           Nothing -> mzero
+           Nothing -> trySpecialMacro name ts
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
@@ -554,6 +554,23 @@ doMacros' n inp =
                    ExpandWhenUsed    -> doMacros' (n' + 1) result
                    ExpandWhenDefined -> return result
 
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+  ts' <- doMacros' 1 ts
+  case ts' of
+    Tok pos Word t : _
+      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+    _ -> return ts'
+trySpecialMacro _ _ = mzero
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+  case T.uncons t of
+       Just (c, _) | isAlphaNum c -> True
+       _           -> False
+
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 
diff --git a/test/command/4442.md b/test/command/4442.md
index 8574fe759..447073406 100644
--- a/test/command/4442.md
+++ b/test/command/4442.md
@@ -5,5 +5,5 @@
 ^D
 \newcommand{\myFruit}{Mango\xspace}
 
-Mango\xspace is the king of fruits.
+Mango is the king of fruits.
 ```
diff --git a/test/command/7299.md b/test/command/7299.md
new file mode 100644
index 000000000..0847c40ce
--- /dev/null
+++ b/test/command/7299.md
@@ -0,0 +1,23 @@
+```
+% pandoc -f latex -t plain
+$1-{\ensuremath{r}\xspace}$
+^D
+1 − r
+```
+
+```
+% pandoc -f latex -t plain
+\newcommand{\foo}{Foo\xspace}
+
+$\text{\foo bar}$
+^D
+Foo bar
+```
+
+```
+% pandoc -f latex -t plain
+a\xspace b
+^D
+a b
+```
+
-- 
cgit v1.2.3


From 8437a4a002210a33ee721c58f5f95605898a8e1b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 08:15:48 -0700
Subject: LaTeX reader: support `\pm` in `SI{..}`.

Closes #6620.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 4 +++-
 test/command/6620.md                     | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1952f4e1a..c4fb06700 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,7 +45,9 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize <$> braced)
+doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
+  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
+        convertPM t = t
 
 tonum :: Text -> Inlines
 tonum value =
diff --git a/test/command/6620.md b/test/command/6620.md
index 314200d30..00643e57c 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -7,9 +7,12 @@
 \SI{0.135(21)}{\m}
 
 \SI{12.3(60)}{\m}
+
+\SI{10.0 \pm 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From a366bd6abccd18a49f0033de850ddd53830cc5ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:03:29 -0700
Subject: LaTeX reader: Fix parsing of `+-` in siunitx numbers.

See #6658.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 11 +++++++----
 test/command/6620.md                     |  5 ++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index c4fb06700..5e140ef7a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,9 +45,7 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
-  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
-        convertPM t = t
+doSInum = skipopts *> (tonum . untokenize <$> braced)
 
 tonum :: Text -> Inlines
 tonum value =
@@ -74,12 +72,16 @@ parseNumPart =
   parseDecimalNum <|>
   parseComma <|>
   parsePlusMinus <|>
+  parsePM <|>
   parseI <|>
   parseExp <|>
   parseX <|>
   parseSpace
  where
-  parseDecimalNum = do
+  parseDecimalNum, parsePlusMinus, parsePM,
+    parseComma, parseI, parseX,
+    parseExp, parseSpace :: Parser Text () Inlines
+  parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
     basenum <- (pref <>) . T.pack
                 <$> many1 (satisfy (\c -> isDigit c || c == '.'))
@@ -100,6 +102,7 @@ parseNumPart =
                                                | otherwise -> "." <> t
   parseComma = str "." <$ char ','
   parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
   parseParens =
     char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
   parseI = str "i" <$ char 'i'
diff --git a/test/command/6620.md b/test/command/6620.md
index 00643e57c..b19cd1972 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,10 +9,13 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
+
+\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
-<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From 5dc917da3ed997c6e48e22bde242f0f8e1ae5333 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:11:26 -0700
Subject: LaTeX reader siunitx: add leading 0 to numbers starting with .

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 7 +++++--
 test/command/6620.md                     | 3 ---
 test/command/6658.md                     | 9 +++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 test/command/6658.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 5e140ef7a..1474329d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -83,8 +83,11 @@ parseNumPart =
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
-    basenum <- (pref <>) . T.pack
-                <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+    let basenum = pref <> T.pack
+                    (case basenum' of
+                      '.':_ -> '0':basenum'
+                      _ -> basenum')
     uncertainty <- option mempty $ T.pack <$> parseParens
     if T.null uncertainty
        then return $ str basenum
diff --git a/test/command/6620.md b/test/command/6620.md
index b19cd1972..e448ca6b5 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,13 +9,10 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
-
-\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
 <p>10.0 ± 3.3 ms</p>
-<p>10.0 ± 3.3 ms</p>
 ```
diff --git a/test/command/6658.md b/test/command/6658.md
new file mode 100644
index 000000000..bcd174465
--- /dev/null
+++ b/test/command/6658.md
@@ -0,0 +1,9 @@
+```
+pandoc -f latex
+\SI{10.0 +- 3.3}{\ms}
+
+\num{.3e45}
+^D
+<p>10.0 ± 3.3 ms</p>
+<p>0.3 × 10<sup>45</sup></p>
+```
-- 
cgit v1.2.3


From bc5058234feab7646f58dc01379b4eadf95bf411 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:18:23 -0700
Subject: LaTeX reader sinuitx: fix + sign on ang.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 9 ++++++---
 test/command/6658.md                     | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1474329d4..72f81dcde 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -118,11 +118,14 @@ doSIang :: PandocMonad m => LP m Inlines
 doSIang = do
   skipopts
   ps <- T.splitOn ";" . untokenize <$> braced
+  let dropPlus t = case T.uncons t of
+                     Just ('+',t') -> t'
+                     _ -> t
   case ps ++ repeat "" of
     (d:m:s:_) -> return $
-      (if T.null d then mempty else str d <> str "\xb0") <>
-      (if T.null m then mempty else str m <> str "\x2032") <>
-      (if T.null s then mempty else str s <> str "\x2033")
+      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
     _ -> return mempty
 
 -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
diff --git a/test/command/6658.md b/test/command/6658.md
index bcd174465..0a8512f85 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -3,7 +3,10 @@ pandoc -f latex
 \SI{10.0 +- 3.3}{\ms}
 
 \num{.3e45}
+
+\ang{+10;+3;}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
+<p>10°3′</p>
 ```
-- 
cgit v1.2.3


From 4e990a8cf9207f2315d6a55a45c93c2857663316 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 10:12:44 -0700
Subject: LaTeX/siunitx: fix parsing of `\cubic` etc. See #6658.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 85 +++++++++++++++++++-------------
 test/command/6658.md                     |  3 ++
 2 files changed, 53 insertions(+), 35 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 72f81dcde..63ab7267d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
   ( siunitxCommands )
@@ -154,40 +155,55 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
-  Tok _ (CtrlSeq name) _ <- anyControlSeq
-  case name of
-    "square" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "2"
-    "cubic" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "3"
-    "raisetothe" -> do
-       n <- tok
-       unit <- siUnit tok
-       return $ unit <> superscript n
-    _ ->
-       case M.lookup name siUnitMap of
-            Just il ->
-              option il $
-                choice
-                 [ (il <> superscript "2") <$ controlSeq "squared"
-                 , (il <> superscript "3") <$ controlSeq "cubed"
-                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
-                 ]
-            Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
-         return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
-         return (str t))
+siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
+siUnit tok = mconcat <$> many1 siUnitPart
+ where
+  siUnitPart :: LP m Inlines
+  siUnitPart =
+        (siPrefix <*> siUnitPart)
+    <|> (do u <- siBase <|> tok
+            option u $ siSuffix <*> pure u)
+  siPrefix :: LP m (Inlines -> Inlines)
+  siPrefix =
+       (do _ <- controlSeq "per"
+           skipopts -- TODO handle option
+           return (str "/" <>))
+   <|> (do _ <- controlSeq "square"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubic"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "raisetothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siSuffix :: LP m (Inlines -> Inlines)
+  siSuffix =
+       (do _ <- controlSeq "squared"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubed"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "tothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siBase :: LP m Inlines
+  siBase = mconcat <$> many1
+    ((try
+       (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+           case M.lookup name siUnitMap of
+              Just il -> pure il
+              Nothing -> fail "not a unit command"))
+    <|> (do Tok _ Word t <- satisfyTok isWordTok
+            return $ str t)
+    <|> (symbol '^' *> (superscript <$> tok))
+    <|> (symbol '_' *> (subscript <$> tok))
+    <|> (str "\xa0" <$ symbol '.')
+    <|> (str "\xa0" <$ symbol '~')
+     )
 
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
@@ -347,7 +363,6 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("per", str "/")
   , ("peta", str "P")
   , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
diff --git a/test/command/6658.md b/test/command/6658.md
index 0a8512f85..549610992 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -5,8 +5,11 @@ pandoc -f latex
 \num{.3e45}
 
 \ang{+10;+3;}
+
+\si{\gram\per\cubic\centi\metre}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
 <p>10°3′</p>
+<p>g/cm<sup>3</sup></p>
 ```
-- 
cgit v1.2.3


From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 12:06:15 -0700
Subject: LaTeX reader: More siunitx improvements.  Closes #6658.

There's still one slight divergence from the siunitx behavior:
we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm
not going to worry about that.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |   3 +-
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++----------
 test/command/6658.md                     |  72 ++++++++++++++--
 3 files changed, 161 insertions(+), 52 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index b6804a825..1c77eb299 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -806,7 +806,8 @@ withRaw parser = do
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 63ab7267d..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -10,27 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
-
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
 
 siunitxCommands :: PandocMonad m
                  => LP m Inlines -> M.Map Text (LP m Inlines)
 siunitxCommands tok = M.fromList
-  [ ("si", skipopts *> dosi tok)
+  [ ("si", dosi tok)
   , ("SI", doSI tok)
   , ("SIrange", doSIrange True tok)
   , ("numrange", doSIrange False tok)
   , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
   , ("num", doSInum)
   , ("ang", doSIang)
   ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -65,9 +70,29 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
@@ -83,7 +108,7 @@ parseNumPart =
     parseComma, parseI, parseX,
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
     basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
     let basenum = pref <> T.pack
                     (case basenum' of
@@ -155,20 +180,30 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = mconcat <$> many1 siUnitPart
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
  where
   siUnitPart :: LP m Inlines
-  siUnitPart =
-        (siPrefix <*> siUnitPart)
-    <|> (do u <- siBase <|> tok
-            option u $ siSuffix <*> pure u)
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
   siPrefix :: LP m (Inlines -> Inlines)
   siPrefix =
-       (do _ <- controlSeq "per"
-           skipopts -- TODO handle option
-           return (str "/" <>))
-   <|> (do _ <- controlSeq "square"
+       (do _ <- controlSeq "square"
            skipopts
            return (<> superscript "2"))
    <|> (do _ <- controlSeq "cubic"
@@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "raisetothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
   siSuffix :: LP m (Inlines -> Inlines)
   siSuffix =
@@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "tothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
   siBase :: LP m Inlines
-  siBase = mconcat <$> many1
+  siBase =
     ((try
        (do Tok _ (CtrlSeq name) _ <- anyControlSeq
-           case M.lookup name siUnitMap of
-              Just il -> pure il
-              Nothing -> fail "not a unit command"))
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
     <|> (do Tok _ Word t <- satisfyTok isWordTok
             return $ str t)
-    <|> (symbol '^' *> (superscript <$> tok))
-    <|> (symbol '_' *> (subscript <$> tok))
-    <|> (str "\xa0" <$ symbol '.')
-    <|> (str "\xa0" <$ symbol '~')
      )
 
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
+
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
   [ ("fg", str "fg")
@@ -303,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -311,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -363,24 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
diff --git a/test/command/6658.md b/test/command/6658.md
index 549610992..96700c8fe 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -1,15 +1,75 @@
 ```
-pandoc -f latex
-\SI{10.0 +- 3.3}{\ms}
+pandoc -f latex -t html
+\num{12345,67890}
+
+\num{1+-2i}
 
 \num{.3e45}
 
-\ang{+10;+3;}
+\num{1.654 x 2.34 x 3.430}
+
+\si{kg.m.s^{-1}}
+
+\si{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second}
+
+\numlist{10;20;30}
+
+\SIlist{0.13;0.67;0.80}{\milli\metre}
+
+\numrange{10}{20}
+
+\SIrange{0.13}{0.67}{\milli\metre}
+
+\ang{10}
+
+\ang{1;2;3}
+
+\ang{;;1}
+
+\ang{+10;;}
+
+\ang{-0;1;}
+
+\si{kg.m/s^2}
+
+\si{g_{polymer}~mol_{cat}.s^{-1}}
+
+\si{\kilo\gram\metre\per\square\second}
 
 \si{\gram\per\cubic\centi\metre}
+
+\si{\square\volt\cubic\lumen\per\farad}
+
+\si{\metre\squared\per\gray\cubic\lux}
+
+\si{\henry\second}
 ^D
-<p>10.0 ± 3.3 ms</p>
+<p>12345.67890</p>
+<p>1 ± 2i</p>
 <p>0.3 × 10<sup>45</sup></p>
-<p>10°3′</p>
-<p>g/cm<sup>3</sup></p>
+<p>1.654 × 2.34 × 3.430</p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m/s</p>
+<p>kg m/A/s</p>
+<p>10, 20, &amp; 30</p>
+<p>0.13 mm, 0.67 mm, &amp; 0.80 mm</p>
+<p>10–20</p>
+<p>0.13 mm–0.67 mm</p>
+<p>10°</p>
+<p>1°2′3″</p>
+<p>1″</p>
+<p>10°</p>
+<p>-0°1′</p>
+<p>kg m/s<sup>2</sup></p>
+<p>g<sub>polymer</sub> mol<sub>cat</sub> s<sup>−1</sup></p>
+<p>kg m s<sup>−2</sup></p>
+<p>g cm<sup>−3</sup></p>
+<p>V<sup>2</sup> lm<sup>3</sup> F<sup>−1</sup></p>
+<p>m<sup>2</sup> Gy<sup>−1</sup> lx<sup>3</sup></p>
+<p>H s</p>
 ```
-- 
cgit v1.2.3


From 07d299d353761a7c29aa7e7a51371ad7842ec767 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 18:45:39 -0700
Subject: DocBook reader: ensure that first and last names are separated.

Closes #6541.
---
 src/Text/Pandoc/Readers/DocBook.hs | 20 ++++++++++++++------
 test/command/6541.md               | 27 +++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 test/command/6541.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3db459cfd..b01ad3252 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -600,16 +600,24 @@ addMetadataFromElement e = do
          Nothing -> return ()
          Just z  -> addMetaField "author" z
     addMetaField "subtitle" e
-    addMetaField "author" e
+    addAuthor e
     addMetaField "date" e
     addMetaField "release" e
     addMetaField "releaseinfo" e
     return mempty
-  where addMetaField fieldname elt =
-            case filterChildren (named fieldname) elt of
-                   []  -> return ()
-                   [z] -> getInlines z >>= addMeta fieldname
-                   zs  -> mapM getInlines zs >>= addMeta fieldname
+  where
+   addAuthor elt =
+     case filterChildren (named "author") elt of
+       [] -> return ()
+       [z] -> fromAuthor z >>= addMeta "author"
+       zs  -> mapM fromAuthor zs >>= addMeta "author"
+   fromAuthor elt =
+     mconcat . intersperse space <$> mapM getInlines (elChildren elt)
+   addMetaField fieldname elt =
+     case filterChildren (named fieldname) elt of
+       []  -> return ()
+       [z] -> getInlines z >>= addMeta fieldname
+       zs  -> mapM getInlines zs >>= addMeta fieldname
 
 addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
 addMeta field val = modify (setMeta field val)
diff --git a/test/command/6541.md b/test/command/6541.md
new file mode 100644
index 000000000..956340d4c
--- /dev/null
+++ b/test/command/6541.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f docbook -t markdown -s
+<?xml version="1.0"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<book>
+
+<bookinfo>
+<title>Title</title>
+<author>
+<firstname>Firstname</firstname><surname>Lastname</surname>
+</author>
+<releaseinfo>1.17</releaseinfo>
+</bookinfo>
+
+<para>Text.</para>
+
+</book>
+^D
+---
+author: Firstname Lastname
+releaseinfo: 1.17
+title: Title
+---
+
+Text.
+```
-- 
cgit v1.2.3


From f76fe2ab56606528d4710cc6c40bceb5788c3906 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 22 May 2021 13:29:13 +0200
Subject: HTML reader: simplify col width parsing

---
 src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index ad0b51253..6537bbce9 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -1,6 +1,5 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
    Copyright   : © 2006-2021 John MacFarlane,
@@ -42,18 +41,15 @@ pCol = try $ do
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
-                Nothing -> case lookup "style" attribs of
-                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
-                Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+  let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead
+  return $ fromMaybe ColWidthDefault $
+    (case lookup "width" attribs >>= T.unsnoc of
+       Just (xs, '%') -> Just (toColWidth xs)
+       _ -> Nothing) <|>
+    (case lookup "style" attribs >>= T.stripPrefix "width" of
+        Just xs | T.any (== '%') xs -> Just . toColWidth $
+                  T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs
+        _ -> Nothing)
 
 pColgroup :: PandocMonad m => TagParser m [ColWidth]
 pColgroup = try $ do
-- 
cgit v1.2.3


From 80b4b3fe82a19a4ea1e76fc4a81c9c88676c7ce0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 May 2021 22:03:51 -0700
Subject: Revert "HTML reader: simplify col width parsing"

This reverts commit f76fe2ab56606528d4710cc6c40bceb5788c3906.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6537bbce9..ad0b51253 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
    Copyright   : © 2006-2021 John MacFarlane,
@@ -41,15 +42,18 @@ pCol = try $ do
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead
-  return $ fromMaybe ColWidthDefault $
-    (case lookup "width" attribs >>= T.unsnoc of
-       Just (xs, '%') -> Just (toColWidth xs)
-       _ -> Nothing) <|>
-    (case lookup "style" attribs >>= T.stripPrefix "width" of
-        Just xs | T.any (== '%') xs -> Just . toColWidth $
-                  T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs
-        _ -> Nothing)
+  let width = case lookup "width" attribs of
+                Nothing -> case lookup "style" attribs of
+                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
+                    fromMaybe 0.0 $ safeRead (T.filter
+                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> 0.0
+                Just (T.unsnoc -> Just (xs, '%')) ->
+                  fromMaybe 0.0 $ safeRead xs
+                _ -> 0.0
+  if width > 0.0
+    then return $ ColWidth $ width / 100.0
+    else return ColWidthDefault
 
 pColgroup :: PandocMonad m => TagParser m [ColWidth]
 pColgroup = try $ do
-- 
cgit v1.2.3


From 1af2cfb2873c5bb6ddd9fc00d076088b2e62af30 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 May 2021 21:56:10 -0700
Subject: Handle relative lengths (e.g. `2*`) in HTML column widths.

See <https://www.w3.org/TR/html4/types.html#h-6.6>.

"A relative length has the form "i*", where "i" is an integer. When
allotting space among elements competing for that space, user agents
allot pixel and percentage lengths first, then divide up remaining
available space among relative lengths. Each relative length receives a
portion of the available space that is proportional to the integer
preceding the "*". The value "*" is equivalent to "1*". Thus, if 60
pixels of space are available after the user agent allots pixel and
percentage space, and the competing relative lengths are 1*, 2*, and 3*,
the 1* will be alloted 10 pixels, the 2* will be alloted 20 pixels, and
the 3* will be alloted 30 pixels."

Closes #4063.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 47 ++++++++++++++++++++++++-----------
 test/command/4063.md                  | 29 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 14 deletions(-)
 create mode 100644 test/command/4063.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index ad0b51253..3a569dd0a 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
@@ -33,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used:  Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
 pCol = try $ do
   TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
   let attribs = toStringAttr attribs'
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
+  return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
+                    maybe (Right ColWidthDefault) (Right . ColWidth)
+                      $ safeRead (T.filter
+                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> Right ColWidthDefault
+                Just (T.unsnoc -> Just (xs, '*')) ->
+                  maybe (Left 1) Left $ safeRead xs
                 Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+                  maybe (Right ColWidthDefault)
+                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
+                _ -> Right ColWidthDefault
 
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
 pColgroup = try $ do
   pSatisfy (matchTagOpen "colgroup" [])
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+  let remaining = 1 - sum (map getColWidth $ rights ws)
+      relatives = sum $ lefts ws
+      relUnit = remaining / fromIntegral relatives
+      toColWidth (Right x) = x
+      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+  in  map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
 data CellType
   = HeaderCell
   | BodyCell
@@ -182,7 +200,8 @@ pTable :: PandocMonad m
 pTable block = try $ do
   TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
   caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
-  widths  <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+  widths <- resolveRelativeLengths <$>
+              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
   thead   <- pTableHead block               <* skipMany pBlank
   topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
   tbodies <- many (pTableBody block)        <* skipMany pBlank
diff --git a/test/command/4063.md b/test/command/4063.md
new file mode 100644
index 000000000..838472b46
--- /dev/null
+++ b/test/command/4063.md
@@ -0,0 +1,29 @@
+```
+% pandoc -f html -t native
+<table>
+<colgroup>
+  <col width="30%" />
+  <col width="*" />
+</colgroup>
+<tr>
+  <td>1</td>
+  <td>2</td>
+</tr>
+</table>
+^D
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.3)
+ ,(AlignDefault,ColWidth 0.7)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]]])]
+ (TableFoot ("",[],[])
+ [])]
+```
-- 
cgit v1.2.3


From d46ea7d7da3f842d265f09730c90cfc3691576ef Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 16:54:42 +0200
Subject: Jira: add support for "smart" links

Support has been added for the new
`[alias|https://example.com|smart-card]` syntax.
---
 pandoc.cabal                    | 2 +-
 src/Text/Pandoc/Readers/Jira.hs | 2 ++
 src/Text/Pandoc/Writers/Jira.hs | 2 ++
 stack.yaml                      | 2 +-
 test/Tests/Readers/Jira.hs      | 8 ++++++++
 test/Tests/Writers/Jira.hs      | 8 ++++++++
 6 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 4e10e01f3..241196d7c 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -470,7 +470,7 @@ library
                  http-client-tls       >= 0.2.4    && < 0.4,
                  http-types            >= 0.8      && < 0.13,
                  ipynb                 >= 0.1      && < 0.2,
-                 jira-wiki-markup      >= 1.3.5    && < 1.4,
+                 jira-wiki-markup      >= 1.4      && < 1.5,
                  mtl                   >= 2.2      && < 2.3,
                  network               >= 2.6,
                  network-uri           >= 2.6      && < 2.8,
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index a3b415f09..cf111f173 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -172,6 +172,8 @@ jiraLinkToPandoc linkType alias url =
     Jira.Email      -> link ("mailto:" <> url') "" alias'
     Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias'
     Jira.User       -> linkWith ("", ["user-account"], []) url' "" alias'
+    Jira.SmartCard  -> linkWith ("", ["smart-card"], []) url' "" alias'
+    Jira.SmartLink  -> linkWith ("", ["smart-link"], []) url' "" alias'
 
 -- | Get unicode representation of a Jira icon.
 iconUnicode :: Jira.Icon -> Text
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index cf4dadebc..1351814e9 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -280,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do
     | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email)
     | "user-account" `elem` classes              = (Jira.User, dropTilde url)
     | "attachment" `elem` classes                = (Jira.Attachment, url)
+    | "smart-card" `elem` classes                = (Jira.SmartCard, url)
+    | "smart-link" `elem` classes                = (Jira.SmartLink, url)
     | otherwise                                  = (Jira.External, url)
   dropTilde txt = case T.uncons txt of
     Just ('~', username) -> username
diff --git a/stack.yaml b/stack.yaml
index 809157425..16e5ad2cf 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -9,7 +9,7 @@ packages:
 extra-deps:
 - hslua-1.3.0
 - hslua-module-path-0.1.0
-- jira-wiki-markup-1.3.5
+- jira-wiki-markup-1.4.0
 - random-1.2.0
 - unicode-collation-0.1.3
 - citeproc-0.4
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index cb7dde4ea..b7194a3b9 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -167,6 +167,14 @@ tests =
       , "user with description" =:
         "[John Doe|~johndoe]" =?>
         para (linkWith ("", ["user-account"], []) "~johndoe" "" "John Doe")
+
+      , "'smart' link" =:
+        "[x|http://example.com|smart-link]" =?>
+        para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x")
+
+      , "'smart' card" =:
+        "[x|http://example.com|smart-card]" =?>
+        para (linkWith ("", ["smart-card"], []) "http://example.com" "" "x")
       ]
 
     , "image" =:
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index d8e856e34..00a7ae931 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -61,6 +61,14 @@ tests =
       , "user link with user as description" =:
         linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?>
         "[~johndoe]"
+
+      , "'smart' link" =:
+        para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") =?>
+        "[x|http://example.com|smart-link]"
+
+      , "'smart' card" =:
+        para (linkWith ("", ["smart-card"], []) "http://example.org" "" "x") =?>
+        "[x|http://example.org|smart-card]"
       ]
 
     , testGroup "spans"
-- 
cgit v1.2.3


From bb2530caa414234f00e7c89ef18a538708b2297c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 17:49:48 +0200
Subject: Use haddock-library-1.10.0

---
 pandoc.cabal                       | 2 +-
 src/Text/Pandoc/Readers/Haddock.hs | 3 ++-
 stack.yaml                         | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 241196d7c..c8e49c620 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -461,7 +461,7 @@ library
                  exceptions            >= 0.8      && < 0.11,
                  file-embed            >= 0.0      && < 0.1,
                  filepath              >= 1.1      && < 1.5,
-                 haddock-library       >= 1.8      && < 1.10,
+                 haddock-library       >= 1.10     && < 1.11,
                  hslua                 >= 1.1      && < 1.4,
                  hslua-module-path     >= 0.1.0    && < 0.2.0,
                  hslua-module-system   >= 0.2      && < 0.3,
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 35eaac0a9..67b3af2d3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -131,7 +131,8 @@ docHToInlines isCode d' =
           DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
           _               -> mempty
     DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
-    DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
+    DocModule s -> B.codeWith ("",["haskell","module"],[]) $
+                   T.pack (modLinkName s)
     DocWarning _ -> mempty -- TODO
     DocEmphasis d -> B.emph (docHToInlines isCode d)
     DocMonospaced (DocString s) -> B.code $ T.pack s
diff --git a/stack.yaml b/stack.yaml
index 16e5ad2cf..ea48fbb88 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -7,6 +7,7 @@ flags:
 packages:
 - '.'
 extra-deps:
+- haddock-library-1.10.0
 - hslua-1.3.0
 - hslua-module-path-0.1.0
 - jira-wiki-markup-1.4.0
-- 
cgit v1.2.3


From 105a50569be6d2c1b37cc290abcbfbae1cd0fd1e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 17:49:48 +0200
Subject: Allow compilation with base 4.15

---
 src/Text/Pandoc/Options.hs                   | 34 +++++-----
 src/Text/Pandoc/Readers/Docx/Combine.hs      |  6 +-
 src/Text/Pandoc/Readers/Odt/ContentReader.hs | 11 ++--
 src/Text/Pandoc/Writers/OpenDocument.hs      | 98 +++++++++++++---------------
 4 files changed, 72 insertions(+), 77 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 92bda36b2..85d9aa103 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -315,6 +315,23 @@ defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.j
 defaultKaTeXURL :: Text
 defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/"
 
+-- Update documentation in doc/filters.md if this is changed.
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''TrackChanges)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''WrapOption)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated . drop 8
+                           } ''TopLevelDivision)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''ReferenceLocation)
+
 -- Update documentation in doc/filters.md if this is changed.
 $(deriveJSON defaultOptions ''ReaderOptions)
 
@@ -338,20 +355,3 @@ $(deriveJSON defaultOptions{ constructorTagModifier =
                            } ''ObfuscationMethod)
 
 $(deriveJSON defaultOptions ''HTMLSlideVariant)
-
--- Update documentation in doc/filters.md if this is changed.
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''TrackChanges)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''WrapOption)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated . drop 8
-                           } ''TopLevelDivision)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''ReferenceLocation)
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 7c6d01769..6e4faa639 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -61,7 +61,7 @@ import Data.List
 import Data.Bifunctor
 import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
                      , (><), (|>) )
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 
 data Modifier a = Modifier (a -> a)
                 | AttrModifier (Attr -> a -> a) Attr
@@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of
 
 inlinesL :: Inlines -> (Inlines, Inlines)
 inlinesL ils = case viewl $ unMany ils of
-  (s :< sq) -> (singleton s, Many sq)
+  (s :< sq) -> (B.singleton s, Many sq)
   _         -> (mempty, ils)
 
 inlinesR :: Inlines -> (Inlines, Inlines)
 inlinesR ils = case viewr $ unMany ils of
-  (sq :> s) -> (Many sq, singleton s)
+  (sq :> s) -> (Many sq, B.singleton s)
   _         -> (ils, mempty)
 
 combineInlines :: Inlines -> Inlines -> Inlines
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index c4220b0db..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE Arrows            #-}
+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE DeriveFoldable    #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE PatternGuards     #-}
@@ -33,7 +34,7 @@ import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
 
 import Text.TeXMath (readMathML, writeTeX)
 import qualified Text.Pandoc.XML.Light as XML
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
 
 type BlockMatcher  = ElementMatcher Blocks
 
-
-newtype FirstMatch a = FirstMatch (Option (First a))
-                     deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+  deriving (Foldable, Monoid, Semigroup)
 
 firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
 
 --
 matchingElement :: (Monoid e)
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 34a3a4aa5..5f3224c2f 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE ViewPatterns      #-}
@@ -372,36 +373,32 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
 
 -- | Convert a Pandoc block element to OpenDocument.
 blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m (Doc Text)
-blockToOpenDocument o bs
-    | Plain          b <- bs = if null b
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs
-                             = figure attr c s t
-    | Para           b <- bs = if null b &&
-                                    not (isEnabled Ext_empty_paragraphs o)
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | LineBlock      b <- bs = blockToOpenDocument o $ linesToPara b
-    | Div attr xs      <- bs = mkDiv attr xs
-    | Header     i (ident,_,_) b
-                       <- bs = setFirstPara >> (inHeaderTags i ident
-                                  =<< inlinesToOpenDocument o b)
-    | BlockQuote     b <- bs = setFirstPara >> mkBlockQuote b
-    | DefinitionList b <- bs = setFirstPara >> defList b
-    | BulletList     b <- bs = setFirstPara >> bulletListToOpenDocument o b
-    | OrderedList  a b <- bs = setFirstPara >> orderedList a b
-    | CodeBlock    _ s <- bs = setFirstPara >> preformatted s
-    | Table a bc s th tb tf <- bs =  setFirstPara >> table (Ann.toTable a bc s th tb tf)
-    | HorizontalRule   <- bs = setFirstPara >> return (selfClosingTag "text:p"
-                                [ ("text:style-name", "Horizontal_20_Line") ])
-    | RawBlock f     s <- bs = if f == Format "opendocument"
-                                  then return $ text $ T.unpack s
-                                  else do
-                                    report $ BlockNotRendered bs
-                                    return empty
-    | Null             <- bs = return empty
-    | otherwise              = return empty
+blockToOpenDocument o = \case
+    Plain          b -> if null b
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t
+    Para           b -> if null b &&
+                           not (isEnabled Ext_empty_paragraphs o)
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    LineBlock      b -> blockToOpenDocument o $ linesToPara b
+    Div attr xs      -> mkDiv attr xs
+    Header     i (ident,_,_) b -> do
+      setFirstPara
+      inHeaderTags i ident =<< inlinesToOpenDocument o b
+    BlockQuote     b -> setFirstPara >> mkBlockQuote b
+    DefinitionList b -> setFirstPara >> defList b
+    BulletList     b -> setFirstPara >> bulletListToOpenDocument o b
+    OrderedList  a b -> setFirstPara >> orderedList a b
+    CodeBlock    _ s -> setFirstPara >> preformatted s
+    Table a bc s th tb tf -> setFirstPara >> table (Ann.toTable a bc s th tb tf)
+    HorizontalRule   -> setFirstPara >> return (selfClosingTag "text:p"
+                         [ ("text:style-name", "Horizontal_20_Line") ])
+    b@(RawBlock f s) -> if f == Format "opendocument"
+                        then return $ text $ T.unpack s
+                        else empty <$ report (BlockNotRendered b)
+    Null             -> return empty
     where
       defList       b = do setInDefinitionList True
                            r <- vcat  <$> mapM (deflistItemToOpenDocument o) b
@@ -874,27 +871,26 @@ data TextStyle = Italic
 textStyleAttr :: Map.Map Text Text
               -> TextStyle
               -> Map.Map Text Text
-textStyleAttr m s
-    | Italic <- s = Map.insert "fo:font-style" "italic" .
-                    Map.insert "style:font-style-asian" "italic" .
-                    Map.insert "style:font-style-complex" "italic" $ m
-    | Bold   <- s = Map.insert "fo:font-weight" "bold" .
-                    Map.insert "style:font-weight-asian" "bold" .
-                    Map.insert "style:font-weight-complex" "bold" $ m
-    | Under  <- s = Map.insert "style:text-underline-style" "solid" .
-                    Map.insert "style:text-underline-width" "auto" .
-                    Map.insert "style:text-underline-color" "font-color" $ m
-    | Strike <- s = Map.insert "style:text-line-through-style" "solid" m
-    | Sub    <- s = Map.insert "style:text-position" "sub 58%" m
-    | Sup    <- s = Map.insert "style:text-position" "super 58%" m
-    | SmallC <- s = Map.insert "fo:font-variant" "small-caps" m
-    | Pre    <- s = Map.insert "style:font-name" "Courier New" .
-                    Map.insert "style:font-name-asian" "Courier New" .
-                    Map.insert "style:font-name-complex" "Courier New" $ m
-    | Language lang <- s
-                  = Map.insert "fo:language" (langLanguage lang) .
-                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
-    | otherwise   = m
+textStyleAttr m = \case
+  Italic -> Map.insert "fo:font-style" "italic" .
+                Map.insert "style:font-style-asian" "italic" .
+                Map.insert "style:font-style-complex" "italic" $ m
+  Bold   -> Map.insert "fo:font-weight" "bold" .
+                Map.insert "style:font-weight-asian" "bold" .
+                Map.insert "style:font-weight-complex" "bold" $ m
+  Under  -> Map.insert "style:text-underline-style" "solid" .
+                Map.insert "style:text-underline-width" "auto" .
+                Map.insert "style:text-underline-color" "font-color" $ m
+  Strike -> Map.insert "style:text-line-through-style" "solid" m
+  Sub    -> Map.insert "style:text-position" "sub 58%" m
+  Sup    -> Map.insert "style:text-position" "super 58%" m
+  SmallC -> Map.insert "fo:font-variant" "small-caps" m
+  Pre    -> Map.insert "style:font-name" "Courier New" .
+            Map.insert "style:font-name-asian" "Courier New" .
+            Map.insert "style:font-name-complex" "Courier New" $ m
+  Language lang ->
+            Map.insert "fo:language" (langLanguage lang) .
+            maybe id (Map.insert "fo:country") (langRegion lang) $ m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
 withLangFromAttr (_,_,kvs) action =
-- 
cgit v1.2.3


From 81eadfd99ad3e905b806cc6c80ab0fea0185286f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 May 2021 22:50:35 -0700
Subject: LaTeX reader: improve `\def` and implement `\newif`.

- Improve parsing of `\def` macros.  We previously set "verbatim mode"
  even for parsing the initial `\def`; this caused problems for things
  like
  ```
  \def\foo{\def\bar{BAR}}
  \foo
  \bar
  ```
- Implement `\newif`.
- Add tests.
---
 src/Text/Pandoc/Readers/LaTeX/Macro.hs   | 59 ++++++++++++++++++++++++--------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 19 +++++++++-
 test/command/newif.md                    | 55 +++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100644 test/command/newif.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 607f5438c..5495a8e74 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -14,6 +14,7 @@ import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
 import Control.Applicative ((<|>), optional)
 import qualified Data.Map as M
 import Data.Text (Text)
+import qualified Data.Text as T
 
 macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
 macroDef constructor = do
@@ -22,9 +23,11 @@ macroDef constructor = do
       guardDisabled Ext_latex_macros)
      <|> return mempty
   where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
           guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+           mapM_ (\(name, macro') ->
+                   updateState (\s -> s{ sMacros = M.insert name macro'
+                                          (sMacros s) })) nameMacroPairs
         environmentDef = do
           mbenv <- newenvironment
           case mbenv of
@@ -40,7 +43,7 @@ macroDef constructor = do
         -- @\newcommand{\envname}[n-args][default]{begin}@
         -- @\newcommand{\endenvname}@
 
-letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
   (name, contents) <- withVerbatimMode $ do
@@ -53,18 +56,47 @@ letmacro = do
     contents <- bracedOrToken
     return (name, contents)
   contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
+  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
 
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
+  controlSeq "def"
   withVerbatimMode $ do
-    controlSeq "def"
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
     contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+  controlSeq "newif"
+  withVerbatimMode $ do
+    Tok pos (CtrlSeq name) _ <- anyControlSeq
+    -- \def\iffoo\iffalse
+    -- \def\footrue{\def\iffoo\iftrue}
+    -- \def\foofalse{\def\iffoo\iffalse}
+    let base = T.drop 2 name
+    return [ (name, Macro ExpandWhenUsed [] Nothing
+                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+           , (base <> "true",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+                   ])
+           , (base <> "false",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+                   ])
+           ]
 
 argspecArg :: PandocMonad m => LP m ArgSpec
 argspecArg = do
@@ -77,10 +109,9 @@ argspecPattern =
                               (toktype' == Symbol || toktype' == Word) &&
                               (txt /= "{" && txt /= "\\" && txt /= "}")))
 
-newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
 newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
                              controlSeq "renewcommand" <|>
                              controlSeq "providecommand" <|>
                              controlSeq "DeclareMathOperator" <|>
@@ -112,9 +143,9 @@ newcommand = do
         Just macro
           | mtype == "newcommand" -> do
               report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+              return [(name, macro)]
+          | mtype == "providecommand" -> return [(name, macro)]
+        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
 
 newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
 newenvironment = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 1c77eb299..a17b1f324 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -113,7 +113,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
--- import Debug.Trace
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -563,8 +562,26 @@ trySpecialMacro "xspace" ts = do
     Tok pos Word t : _
       | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
     _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
 trySpecialMacro _ _ = mzero
 
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+  case res' of
+    Left _ -> Prelude.fail "Could not parse conditional"
+    Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+                    *> anyTok)
+  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+                 <|> ([] <$ controlSeq "fi")
+  rest <- getInput
+  return $ (if b then ifToks else elseToks) ++ rest
+
 startsWithAlphaNum :: Text -> Bool
 startsWithAlphaNum t =
   case T.uncons t of
diff --git a/test/command/newif.md b/test/command/newif.md
new file mode 100644
index 000000000..f444f14c9
--- /dev/null
+++ b/test/command/newif.md
@@ -0,0 +1,55 @@
+```
+% pandoc -f latex -t plain
+\iftrue
+should print
+\iftrue
+should print
+\else
+should not print
+\fi
+\else
+should not print
+\fi
+
+\iffalse
+should not print
+\else
+\iftrue
+should print
+\else
+should not print
+\fi
+\fi
+
+\newif\ifepub
+
+\ifepub
+should not print
+\fi
+
+\epubtrue
+
+\ifepub
+should print
+\else
+should not print
+\fi
+
+\epubfalse
+
+\ifepub
+should not print
+\else
+should print
+\fi
+^D
+should print
+
+should print
+
+should print
+
+should print
+
+should print
+```
-- 
cgit v1.2.3


From 834da53058069fe50da510fa86e0807a7ff7868f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 24 May 2021 10:17:37 -0700
Subject: Add `rebase_relative_paths` extension.

- Add manual entry for (non-default) extension
  `rebase_relative_paths`.
- Add constructor `Ext_rebase_relative_paths` to `Extensions`
  in Text.Pandoc.Extensions [API change]. When enabled, this
  extension rewrites relative image and link paths by prepending
  the (relative) directory of the containing file.
- Make Markdown reader sensitive to the new extension.
- Add tests for #3752.

Closes #3752.

NB. currently the extension applies to markdown and associated
readers but not commonmark/gfm.
---
 MANUAL.txt                          |  28 ++++++++++++++++++++++++++--
 pandoc.cabal                        |   4 ++++
 src/Text/Pandoc/Extensions.hs       |   3 +++
 src/Text/Pandoc/Readers/Markdown.hs |  36 +++++++++++++++++++++++++++++-------
 test/command/3752.md                |  35 +++++++++++++++++++++++++++++++++++
 test/command/chap1/spider.png       | Bin 0 -> 63531 bytes
 test/command/chap1/text.md          |  11 +++++++++++
 test/command/chap2/spider.png       | Bin 0 -> 9861 bytes
 test/command/chap2/text.md          |   3 +++
 9 files changed, 111 insertions(+), 9 deletions(-)
 create mode 100644 test/command/3752.md
 create mode 100644 test/command/chap1/spider.png
 create mode 100644 test/command/chap1/text.md
 create mode 100644 test/command/chap2/spider.png
 create mode 100644 test/command/chap2/text.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 6dc783e8c..48bf60d6e 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3755,7 +3755,7 @@ definition:
 Note that space between items in a definition list is required.
 (A variant that loosens this requirement, but disallows "lazy"
 hard wrapping, can be activated with `compact_definition_lists`: see
-[Non-pandoc extensions], below.)
+[Non-default extensions], below.)
 
 [^3]:  I have been influenced by the suggestions of [David
   Wheeler](https://justatheory.com/2009/02/modest-markdown-proposal/).
@@ -5051,13 +5051,37 @@ author-in-text style inside notes when using a note style.
 [finding and editing styles]: https://citationstyles.org/authors/
 [CSL locale files]: https://github.com/citation-style-language/locales
 
-## Non-pandoc extensions
+## Non-default extensions
 
 The following Markdown syntax extensions are not enabled by default
 in pandoc, but may be enabled by adding `+EXTENSION` to the format
 name, where `EXTENSION` is the name of the extension.  Thus, for
 example, `markdown+hard_line_breaks` is Markdown with hard line breaks.
 
+#### Extension: `rebase_relative_paths` ####
+
+Rewrite relative paths for Markdown links and images, depending
+on the path of the file containing the link or image link.  For
+each link or image, pandoc will compute the directory of the
+containing file, relative to the working directory, and prepend
+the resulting path to the link or image path.
+
+The use of this extension is best understood by example.
+Suppose you have a a subdirectory for each chapter of a book,
+`chap1`, `chap2`, `chap3`. Each contains a file `text.md` and a
+number of images used in the chapter.  You would like to have
+`![image](spider.jpg)` in `chap1/text.md` refer to
+`chap1/spider.jpg` and `![image](spider.jpg)` in `chap2/text.md`
+refer to `chap2/spider.jpg`.  To do this, use
+
+    pandoc chap*/*.md -f markdown+rebase_relative_paths
+
+Without this extension, you would have to use
+`![image](chap1/spider.jpg)` in `chap1/text.md` and
+`![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
+relative paths will be rewritten in the same way as images.
+*This option currently only affects Markdown input.*
+
 #### Extension: `attributes` ####
 
 Allows attributes to be attached to any inline or block-level
diff --git a/pandoc.cabal b/pandoc.cabal
index c74df3e69..c8343d16e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -214,6 +214,10 @@ extra-source-files:
                  test/command/C.txt
                  test/command/D.txt
                  test/command/01.csv
+                 test/command/chap1/spider.png
+                 test/command/chap2/spider.png
+                 test/command/chap1/text.md
+                 test/command/chap2/text.md
                  test/command/defaults1.yaml
                  test/command/defaults2.yaml
                  test/command/defaults3.yaml
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 6423d5f56..c4d54c06e 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -136,6 +136,8 @@ data Extension =
     | Ext_raw_html            -- ^ Allow raw HTML
     | Ext_raw_tex             -- ^ Allow raw TeX (other than math)
     | Ext_raw_markdown        -- ^ Parse markdown in ipynb as raw markdown
+    | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths,
+                                -- relative to directory of containing file
     | Ext_shortcut_reference_links -- ^ Shortcut reference links
     | Ext_simple_tables       -- ^ Pandoc-style simple tables
     | Ext_smart               -- ^ "Smart" quotes, apostrophes, ellipses, dashes
@@ -462,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_gutenberg
        , Ext_smart
        , Ext_literate_haskell
+       , Ext_rebase_relative_paths
        ]
   getAll "markdown_strict"   = allMarkdownExtensions
   getAll "markdown_phpextra" = allMarkdownExtensions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34f16ab4e..968c6c165 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -1836,9 +1836,12 @@ regLink :: PandocMonad m
         -> MarkdownParser m (F Inlines)
 regLink constructor lab = try $ do
   (src, tit) <- source
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
+  let src' = if rebase then rebasePath pos src else src
   attr <- option nullAttr $
           guardEnabled Ext_link_attributes >> attributes
-  return $ constructor attr src tit <$> lab
+  return $ constructor attr src' tit <$> lab
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: PandocMonad m
@@ -1854,6 +1857,8 @@ referenceLink constructor (lab, raw) = do
                          return (mempty, "")))
       <|>
       try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
   when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
   let labIsRef = raw' == "" || raw' == "[]"
   let key = toKey $ if labIsRef then raw else raw'
@@ -1878,7 +1883,9 @@ referenceLink constructor (lab, raw) = do
                    Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
                    Nothing              -> makeFallback
             else makeFallback
-       Just ((src,tit), attr) -> constructor attr src tit <$> lab
+       Just ((src,tit), attr) ->
+          let src' = if rebase then rebasePath pos src else src
+           in constructor attr src' tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
@@ -1911,15 +1918,30 @@ autoLink = try $ do
   return $ return $ B.linkWith attr (src <> escapeURI extra) ""
                      (B.str $ orig <> extra)
 
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position.  Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+  let fp = sourceName pos
+   in if isAbsolute (T.unpack path) || isURI path
+         then path
+         else
+           case takeDirectory fp of
+             ""  -> path
+             "." -> path
+             d   -> T.pack d <> "/" <> path
+
 image :: PandocMonad m => MarkdownParser m (F Inlines)
 image = try $ do
   char '!'
   (lab,raw) <- reference
   defaultExt <- getOption readerDefaultImageExtension
-  let constructor attr' src = case takeExtension (T.unpack src) of
-                          "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
-                                                          $ T.unpack defaultExt)
-                          _  -> B.imageWith attr' src
+  let constructor attr' src =
+         case takeExtension (T.unpack src) of
+            "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+                                            $ T.unpack defaultExt)
+            _  -> B.imageWith attr' src
   regLink constructor lab <|> referenceLink constructor (lab,raw)
 
 note :: PandocMonad m => MarkdownParser m (F Inlines)
diff --git a/test/command/3752.md b/test/command/3752.md
new file mode 100644
index 000000000..76d51989b
--- /dev/null
+++ b/test/command/3752.md
@@ -0,0 +1,35 @@
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
+^D
+[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
+[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
+Chapter one
+
+A spider: [spider]
+
+The moon: [moon]
+
+Link to spider picture.
+
+URL left alone: manual.
+
+Absolute path left alone: absolute.
+
+Chapter two
+
+A spider: [spider]
+```
+
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths -t html
+^D
+<h1 id="chapter-one">Chapter one</h1>
+<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
+<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
+<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
+<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<h1 id="chapter-two">Chapter two</h1>
+<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
+```
diff --git a/test/command/chap1/spider.png b/test/command/chap1/spider.png
new file mode 100644
index 000000000..7ee9fe339
Binary files /dev/null and b/test/command/chap1/spider.png differ
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
new file mode 100644
index 000000000..88b30313d
--- /dev/null
+++ b/test/command/chap1/text.md
@@ -0,0 +1,11 @@
+# Chapter one
+
+A spider: ![spider](spider.png)
+
+The moon: ![moon](../../lalune.jpg)
+
+Link to [spider picture](spider.png).
+
+URL left alone: [manual](https://pandoc.org/MANUAL.html).
+
+Absolute path left alone: [absolute](/foo/bar/baz.png).
diff --git a/test/command/chap2/spider.png b/test/command/chap2/spider.png
new file mode 100644
index 000000000..5377e940b
Binary files /dev/null and b/test/command/chap2/spider.png differ
diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md
new file mode 100644
index 000000000..435a266d7
--- /dev/null
+++ b/test/command/chap2/text.md
@@ -0,0 +1,3 @@
+# Chapter two
+
+A spider: ![spider](spider.png)
-- 
cgit v1.2.3


From 6972a7dc9158c1d56b2dc9a06f596386f6b30860 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 11:26:38 -0700
Subject: Modify rebase_reference_links treatment of reference links/images.

The directory is based on the file containing the link
reference, not the file containing the link, if these differ.
---
 MANUAL.txt                          | 6 +++++-
 src/Text/Pandoc/Readers/Markdown.hs | 9 ++++-----
 test/command/3752.md                | 5 ++++-
 test/command/chap1/text.md          | 2 ++
 test/command/chap2/text.md          | 2 ++
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index 2ca50e44a..d755c0c00 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5080,7 +5080,11 @@ Without this extension, you would have to use
 `![image](chap1/spider.jpg)` in `chap1/text.md` and
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
-*This option currently only affects Markdown input.*
+
+Note that relative paths in reference links and images will
+be rewritten relative to the file containing the link
+reference definition, not the file containing the reference link
+or image itself, if these differ.
 
 #### Extension: `attributes` ####
 
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 968c6c165..e5cbadc94 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -336,7 +336,9 @@ referenceKey = try $ do
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
-  src <- try betweenAngles <|> sourceURL
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  src <- (if rebase then rebasePath pos else id) <$>
+             (try betweenAngles <|> sourceURL)
   tit <- option "" referenceTitle
   attr   <- option nullAttr $ try $
               do guardEnabled Ext_link_attributes
@@ -1857,8 +1859,6 @@ referenceLink constructor (lab, raw) = do
                          return (mempty, "")))
       <|>
       try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
-  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
-  pos <- getPosition
   when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
   let labIsRef = raw' == "" || raw' == "[]"
   let key = toKey $ if labIsRef then raw else raw'
@@ -1884,8 +1884,7 @@ referenceLink constructor (lab, raw) = do
                    Nothing              -> makeFallback
             else makeFallback
        Just ((src,tit), attr) ->
-          let src' = if rebase then rebasePath pos src else src
-           in constructor attr src' tit <$> lab
+           constructor attr src tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
diff --git a/test/command/3752.md b/test/command/3752.md
index 76d51989b..6d31eb450 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -2,12 +2,14 @@
 % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
 ^D
 [INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
-[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
 [INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
 Chapter one
 
 A spider: [spider]
 
+Another spider: [another spider]
+
 The moon: [moon]
 
 Link to spider picture.
@@ -26,6 +28,7 @@ A spider: [spider]
 ^D
 <h1 id="chapter-one">Chapter one</h1>
 <p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p>
 <p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
 <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index 88b30313d..d25514241 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -2,6 +2,8 @@
 
 A spider: ![spider](spider.png)
 
+Another spider: ![another spider][refspider]
+
 The moon: ![moon](../../lalune.jpg)
 
 Link to [spider picture](spider.png).
diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md
index 435a266d7..082a1d79e 100644
--- a/test/command/chap2/text.md
+++ b/test/command/chap2/text.md
@@ -1,3 +1,5 @@
 # Chapter two
 
 A spider: ![spider](spider.png)
+
+[refspider]: spider.png
-- 
cgit v1.2.3


From 0661ce699fe83c64dd5a5874e8ab17241b19550c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 13:53:26 -0700
Subject: rebase_relative_paths extension: don't change fragment paths.

We don't want a pure fragment path to be rewritten, since
these are used for cross-referencing.
---
 MANUAL.txt                          | 3 +++
 src/Text/Pandoc/Readers/Markdown.hs | 3 ++-
 test/command/3752.md                | 3 +++
 test/command/chap1/text.md          | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index d755c0c00..fb1a76e9a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5081,6 +5081,9 @@ Without this extension, you would have to use
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
 
+Absolute paths and URLs are not changed.  Neither are paths
+consisting entirely of a fragment, e.g. `[section one](#foo)`.
+
 Note that relative paths in reference links and images will
 be rewritten relative to the file containing the link
 reference definition, not the file containing the reference link
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e5cbadc94..74f2668e4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1923,7 +1923,8 @@ autoLink = try $ do
 rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
-   in if isAbsolute (T.unpack path) || isURI path
+      isFragment = T.take 1 path == "#"
+   in if isFragment || isAbsolute (T.unpack path) || isURI path
          then path
          else
            case takeDirectory fp of
diff --git a/test/command/3752.md b/test/command/3752.md
index 6d31eb450..2765d79ac 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -18,6 +18,8 @@ URL left alone: manual.
 
 Absolute path left alone: absolute.
 
+Link to fragment: chapter two.
+
 Chapter two
 
 A spider: [spider]
@@ -33,6 +35,7 @@ A spider: [spider]
 <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
 <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index d25514241..68a317161 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -11,3 +11,5 @@ Link to [spider picture](spider.png).
 URL left alone: [manual](https://pandoc.org/MANUAL.html).
 
 Absolute path left alone: [absolute](/foo/bar/baz.png).
+
+Link to fragment: [chapter two](#chapter-two).
-- 
cgit v1.2.3


From 4b16d181e7219ed161a0e03c0c5ee9dec4b526b4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 14:16:37 -0700
Subject: rebase_relative_paths: leave empty paths unchanged.

---
 MANUAL.txt                          | 4 ++--
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 test/command/3752.md                | 3 +++
 test/command/chap1/text.md          | 2 ++
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/MANUAL.txt b/MANUAL.txt
index fb1a76e9a..c234debc9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5081,8 +5081,8 @@ Without this extension, you would have to use
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
 
-Absolute paths and URLs are not changed.  Neither are paths
-consisting entirely of a fragment, e.g. `[section one](#foo)`.
+Absolute paths and URLs are not changed.  Neither are empty
+paths or paths consisting entirely of a fragment, e.g., `#foo`.
 
 Note that relative paths in reference links and images will
 be rewritten relative to the file containing the link
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 74f2668e4..bc5e3e30f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1924,7 +1924,7 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-   in if isFragment || isAbsolute (T.unpack path) || isURI path
+   in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path
          then path
          else
            case takeDirectory fp of
diff --git a/test/command/3752.md b/test/command/3752.md
index 2765d79ac..df8af0ba1 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -20,6 +20,8 @@ Absolute path left alone: absolute.
 
 Link to fragment: chapter two.
 
+Empty path: empty.
+
 Chapter two
 
 A spider: [spider]
@@ -36,6 +38,7 @@ A spider: [spider]
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
 <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
 <p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
+<p>Empty path: <a href="">empty</a>.</p>
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index 68a317161..54f0104de 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -13,3 +13,5 @@ URL left alone: [manual](https://pandoc.org/MANUAL.html).
 Absolute path left alone: [absolute](/foo/bar/baz.png).
 
 Link to fragment: [chapter two](#chapter-two).
+
+Empty path: [empty]().
-- 
cgit v1.2.3


From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001
From: Emily Bourke <undergroundquizscene@gmail.com>
Date: Sun, 11 Apr 2021 21:42:53 +0100
Subject: Docx reader: Read table column widths.

---
 src/Text/Pandoc/Readers/Docx.hs             |   5 +-
 src/Text/Pandoc/Readers/Docx/Parse.hs       |   2 +-
 test/Tests/Writers/Docx.hs                  |   5 ++
 test/docx/0_level_headers.native            |   4 +-
 test/docx/golden/table_one_row.docx         | Bin 9840 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx  | Bin 10159 -> 10162 bytes
 test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes
 test/docx/golden/tables.docx                | Bin 10200 -> 10202 bytes
 test/docx/sdt_elements.native               |   6 +-
 test/docx/table_one_row.native              |   8 +--
 test/docx/table_variable_width.native       |  12 ++--
 test/docx/table_with_list_cell.native       |   6 +-
 test/docx/tables-default-widths.native      |  92 ++++++++++++++++++++++++++++
 test/docx/tables.native                     |  18 +++---
 14 files changed, 128 insertions(+), 30 deletions(-)
 create mode 100644 test/docx/golden/tables-default-widths.docx
 create mode 100644 test/docx/tables-default-widths.native

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 22dd54193..375bb7338 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
     bodyPartToBlocks $ Paragraph pPr' parparts
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
+bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do
   let cap' = simpleCaption $ plain $ text cap
       (hdr, rows) = case firstRowFormatting look of
         True | null rs -> (Nothing, [r])
@@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
       -- so should be possible. Alignment might be more difficult,
       -- since there doesn't seem to be a column entity in docx.
   let alignments = replicate width AlignDefault
-      widths = replicate width ColWidthDefault
+      totalWidth = sum grid
+      widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 7325ff300..978d6ff3a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
 elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
   let cols = findChildrenByName ns "w" "gridCol" element
   in
-   mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+   mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
    cols
 elemToTblGrid _ _ = throwError WrongElem
 
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 2e0f1e3fb..da25b95e0 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -111,6 +111,11 @@ tests = [ testGroup "inlines"
             def
             "docx/tables.native"
             "docx/golden/tables.docx"
+          , docxTest
+            "tables without explicit column widths"
+            def
+            "docx/tables-default-widths.native"
+            "docx/golden/tables-default-widths.docx"
           , docxTest
             "tables with lists in cells"
             def
diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native
index 7f875891e..ed589b029 100644
--- a/test/docx/0_level_headers.native
+++ b/test/docx/0_level_headers.native
@@ -1,6 +1,6 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 1.0)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -49,4 +49,4 @@
 ,Para [Strong [Str "Table",Space,Str "Page"]]
 ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
 ,Header 1 ("introduction",[],[]) [Str "Introduction"]
-,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
\ No newline at end of file
+,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index f75e567ab..a7a8f2519 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index a49f70643..1362d4609 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx
new file mode 100644
index 000000000..f24e27516
Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index f24e27516..9dcbbc9d0 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native
index dca82f0a0..a072c0d39 100644
--- a/test/docx/sdt_elements.native
+++ b/test/docx/sdt_elements.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.5586085939783689)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native
index e9188b145..88d5e3af5 100644
--- a/test/docx/table_one_row.native
+++ b/test/docx/table_one_row.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -15,4 +15,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "Table"]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native
index 229cb83b1..43ac40cca 100644
--- a/test/docx/table_variable_width.native
+++ b/test/docx/table_variable_width.native
@@ -1,10 +1,10 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 2.0096205237840725e-2)
+ ,(AlignDefault,ColWidth 1.9882415820416888e-2)
+ ,(AlignDefault,ColWidth 0.22202030999465527)
+ ,(AlignDefault,ColWidth 0.4761090326028862)
+ ,(AlignDefault,ColWidth 1.0689470871191876e-4)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -42,4 +42,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native
index 06d8606da..51a35184b 100644
--- a/test/docx/table_with_list_cell.native
+++ b/test/docx/table_with_list_cell.native
@@ -1,7 +1,7 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -22,4 +22,4 @@
      ,[Para [Str "A"]]
      ,[Para [Str "Numbered",Space,Str "list."]]]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native
new file mode 100644
index 000000000..e541e5a6e
--- /dev/null
+++ b/test/docx/tables-default-widths.native
@@ -0,0 +1,92 @@
+[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Name"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Game"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Fame"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Blame"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Lebron",Space,Str "James"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Basketball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Very",Space,Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Leaving",Space,Str "Cleveland"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Ryan",Space,Str "Braun"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Baseball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Moderate"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Steroids"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Russell",Space,Str "Wilson"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Football"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Tacky",Space,Str "uniform"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Sinple"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Table"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Without"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Header"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Simple"]
+    ,Para [Str "Multiparagraph"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Table"]
+    ,Para [Str "Full"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Of"]
+    ,Para [Str "Paragraphs"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "In",Space,Str "each"]
+    ,Para [Str "Cell."]]]])]
+ (TableFoot ("",[],[])
+ [])]
\ No newline at end of file
diff --git a/test/docx/tables.native b/test/docx/tables.native
index e541e5a6e..5a89496be 100644
--- a/test/docx/tables.native
+++ b/test/docx/tables.native
@@ -1,10 +1,10 @@
 [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -48,8 +48,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -68,8 +68,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -89,4 +89,4 @@
     [Para [Str "In",Space,Str "each"]
     ,Para [Str "Cell."]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
-- 
cgit v1.2.3


From 56b211120c62a01f8aba1c4512acfe4677d8c7d0 Mon Sep 17 00:00:00 2001
From: Emily Bourke <undergroundquizscene@protonmail.com>
Date: Thu, 18 Jun 2020 09:53:32 +0100
Subject: Docx reader: Support new table features.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Column spans
* Row spans
  - The spec says that if the `val` attribute is ommitted, its value
    should be assumed to be `continue`, and that its values are
    restricted to {`restart`, `continue`}. If the value has any other
    value, I think it seems reasonable to default it to `continue`. It
    might cause problems if the spec is extended in the future by adding
    a third possible value, in which case this would probably give
    incorrect behaviour, and wouldn't error.
* Allow multiple header rows
* Include table description in simple caption
  - The table description element is like alt text for a table (along
    with the table caption element). It seems like we should include
    this somewhere, but I’m not 100% sure how – I’m pairing it with the
    simple caption for the moment. (Should it maybe go in the block
    caption instead?)
* Detect table captions
  - Check for caption paragraph style /and/ either the simple or
    complex table field. This means the caption detection fails for
    captions which don’t contain a field, as in an example doc I added
    as a test. However, I think it’s better to be too conservative: a
    missed table caption will still show up as a paragraph next to the
    table, whereas if I incorrectly classify something else as a table
    caption it could cause havoc by pairing it up with a table it’s
    not at all related to, or dropping it entirely.
* Update tests and add new ones

Partially fixes: #6316
---
 src/Text/Pandoc/Readers/Docx.hs            |  92 ++++++++------
 src/Text/Pandoc/Readers/Docx/Parse.hs      | 113 +++++++++++++++--
 src/Text/Pandoc/Readers/Docx/Util.hs       |   7 ++
 test/Tests/Readers/Docx.hs                 |  16 +++
 test/docx/sdt_elements.native              |  13 +-
 test/docx/table_captions_no_field.docx     | Bin 0 -> 40482 bytes
 test/docx/table_captions_no_field.native   |  34 ++++++
 test/docx/table_captions_with_field.docx   | Bin 0 -> 41091 bytes
 test/docx/table_captions_with_field.native |  54 +++++++++
 test/docx/table_header_rowspan.docx        | Bin 0 -> 15826 bytes
 test/docx/table_header_rowspan.native      | 189 +++++++++++++++++++++++++++++
 test/docx/table_one_header_row.docx        | Bin 0 -> 12185 bytes
 test/docx/table_one_header_row.native      |  18 +++
 test/docx/table_one_row.docx               | Bin 25251 -> 12148 bytes
 test/docx/table_variable_width.native      |  19 ++-
 15 files changed, 487 insertions(+), 68 deletions(-)
 create mode 100644 test/docx/table_captions_no_field.docx
 create mode 100644 test/docx/table_captions_no_field.native
 create mode 100644 test/docx/table_captions_with_field.docx
 create mode 100644 test/docx/table_captions_with_field.native
 create mode 100644 test/docx/table_header_rowspan.docx
 create mode 100644 test/docx/table_header_rowspan.native
 create mode 100644 test/docx/table_one_header_row.docx
 create mode 100644 test/docx/table_one_header_row.native

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 375bb7338..c06adf7e3 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
 import Codec.Archive.Zip
 import Control.Monad.Reader
 import Control.Monad.State.Strict
+import Data.Bifunctor (bimap, first)
 import qualified Data.ByteString.Lazy as B
 import Data.Default (Default)
-import Data.List (delete, intersect)
+import Data.List (delete, intersect, foldl')
 import Data.Char (isSpace)
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Data.Maybe (isJust, fromMaybe)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
 import Data.Sequence (ViewL (..), viewl)
 import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
@@ -113,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
                      -- restarting
                      , docxListState :: M.Map (T.Text, T.Text) Integer
                      , docxPrevPara  :: Inlines
+                     , docxTableCaptions :: [Blocks]
                      }
 
 instance Default DState where
@@ -123,6 +125,7 @@ instance Default DState where
                , docxDropCap   = mempty
                , docxListState = M.empty
                , docxPrevPara  = mempty
+               , docxTableCaptions = []
                }
 
 data DEnv = DEnv { docxOptions       :: ReaderOptions
@@ -491,15 +494,32 @@ singleParaToPlain blks
       singleton $ Plain ils
 singleParaToPlain blks = blks
 
-cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
-cellToBlocks (Docx.Cell bps) = do
+cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell
+cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do
   blks <- smushBlocks <$> mapM bodyPartToBlocks bps
-  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
+
+rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row]
+rowsToRows rows = do
+  let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows)
+  cells <- traverse (traverse (uncurry cellToCell)) rowspans
+  return (fmap (Pandoc.Row nullAttr) cells)
+
+splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row])
+splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
+  $ if hasFirstRowFormatting
+    then foldl' f ((take 1 rs, []), True) (drop 1 rs)
+    else foldl' f (([], []), False) rs
+  where
+    f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs)
+      | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
+        = ((r : headerRows, bodyRows), True)
+      | otherwise
+        = ((headerRows, r : bodyRows), False)
+
+    isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue
 
-rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
-rowToBlocksList (Docx.Row cells) = do
-  blksList <- mapM cellToBlocks cells
-  return $ map singleParaToPlain blksList
 
 -- like trimInlines, but also take out linebreaks
 trimSps :: Inlines -> Inlines
@@ -546,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
   where go c | isSpace c = '-'
              | otherwise = c
 
+bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks)
+bodyPartToTableCaption (TblCaption pPr parparts) =
+  Just <$> bodyPartToBlocks (Paragraph pPr parparts)
+bodyPartToTableCaption _ = pure Nothing
+
 bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
 bodyPartToBlocks (Paragraph pPr parparts)
   | Just True <- pBidi pPr = do
@@ -637,50 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
   let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
   in
     bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (TblCaption _ _) =
+  return $ para mempty -- collected separately
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do
-  let cap' = simpleCaption $ plain $ text cap
-      (hdr, rows) = case firstRowFormatting look of
-        True | null rs -> (Nothing, [r])
-             | otherwise -> (Just r, rs)
-        False -> (Nothing, r:rs)
-
-  cells <- mapM rowToBlocksList rows
+bodyPartToBlocks (Tbl cap grid look parts) = do
+  captions <- gets docxTableCaptions
+  fullCaption <- case captions of
+    c : cs -> do
+      modify (\s -> s { docxTableCaptions = cs })
+      return c
+    [] -> return $ if T.null cap then mempty else plain (text cap)
+  let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
+      cap' = caption shortCaption fullCaption
+      (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
       rowLength :: Docx.Row -> Int
-      rowLength (Docx.Row c) = length c
+      rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c)
 
-  let toRow = Pandoc.Row nullAttr . map simpleCell
-      toHeaderRow l = [toRow l | not (null l)]
+  headerCells <- rowsToRows hdr
+  bodyCells <- rowsToRows rows
 
-  -- pad cells.  New Text.Pandoc.Builder will do that for us,
-  -- so this is for compatibility while we switch over.
-  let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
-
-  hdrCells <- case hdr of
-    Just r' -> toHeaderRow <$> rowToBlocksList r'
-    Nothing -> return []
-
-      -- The two following variables (horizontal column alignment and
-      -- relative column widths) go to the default at the
-      -- moment. Width information is in the TblGrid field of the Tbl,
-      -- so should be possible. Alignment might be more difficult,
-      -- since there doesn't seem to be a column entity in docx.
+      -- Horizontal column alignment goes to the default at the moment. Getting
+      -- it might be difficult, since there doesn't seem to be a column entity
+      -- in docx.
   let alignments = replicate width AlignDefault
       totalWidth = sum grid
       widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
-                 (TableHead nullAttr hdrCells)
-                 [TableBody nullAttr 0 [] cells']
+                 (TableHead nullAttr headerCells)
+                 [TableBody nullAttr 0 [] bodyCells]
                  (TableFoot nullAttr [])
 bodyPartToBlocks (OMathPara e) =
   return $ para $ displayMath (writeTeX e)
 
-
 -- replace targets with generated anchors.
 rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
 rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
@@ -716,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
 bodyToOutput (Body bps) = do
   let (metabps, blkbps) = sepBodyParts bps
   meta <- bodyPartsToMeta metabps
+  captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+  modify (\s -> s { docxTableCaptions = captions })
   blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
   blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
   blks'' <- removeOrphanAnchors blks'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 978d6ff3a..aaa8f4ad0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , ParStyle
                                       , CharStyle(cStyleData)
                                       , Row(..)
+                                      , TblHeader(..)
                                       , Cell(..)
+                                      , VMerge(..)
                                       , TrackedChange(..)
                                       , ChangeType(..)
                                       , ChangeInfo(..)
@@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , pHeading
                                       , constructBogusParStyleData
                                       , leftBiasedMergeRunStyle
+                                      , rowsToRowspans
                                       ) where
 import Text.Pandoc.Readers.Docx.Parse.Styles
 import Codec.Archive.Zip
@@ -225,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
 data BodyPart = Paragraph ParagraphStyle [ParPart]
               | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
               | Tbl T.Text TblGrid TblLook [Row]
+              | TblCaption ParagraphStyle [ParPart]
               | OMathPara [Exp]
               deriving Show
 
@@ -236,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool}
 defaultTblLook :: TblLook
 defaultTblLook = TblLook{firstRowFormatting = False}
 
-newtype Row = Row [Cell]
-           deriving Show
+data Row = Row TblHeader [Cell] deriving Show
+
+data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq)
 
-newtype Cell = Cell [BodyPart]
+data Cell = Cell GridSpan VMerge [BodyPart]
             deriving Show
 
+type GridSpan = Integer
+
+data VMerge = Continue
+            -- ^ This cell should be merged with the one above it
+            | Restart
+            -- ^ This cell should not be merged with the one above it
+            deriving (Show, Eq)
+
+rowsToRowspans :: [Row] -> [[(Int, Cell)]]
+rowsToRowspans rows = let
+  removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart))
+  in removeMergedCells (foldr f [] rows)
+  where
+    f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]]
+    f (Row _ cells) acc = let
+      spans = g cells Nothing (listToMaybe acc)
+      in spans : acc
+
+    g ::
+      -- | The current row
+      [Cell] ->
+      -- | Number of columns left below
+      Maybe Integer ->
+      -- | (rowspan so far, cell) for the row below this one
+      Maybe [(Int, Cell)] ->
+      -- | (rowspan so far, cell) for this row
+      [(Int, Cell)]
+    g cells _ Nothing = zip (repeat 1) cells
+    g cells columnsLeftBelow (Just rowBelow) =
+        case cells of
+          [] -> []
+          thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of
+            [] -> zip (repeat 1) cells
+            (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ ->
+              let spanSoFar = case vmerge of
+                    Restart -> 1
+                    Continue -> 1 + spanSoFarBelow
+                  columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow)
+                  (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow
+              in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow)
+
+    dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)])
+    dropColumns n [] = (n, [])
+    dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) =
+      if n < gridSpan
+      then (gridSpan - n, cells)
+      else dropColumns (n - gridSpan) otherCells
+
 leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
 leftBiasedMergeRunStyle a b = RunStyle
     { isBold = isBold a <|> isBold b
@@ -587,14 +640,31 @@ elemToRow ns element | isElem ns "w" "tr" element =
   do
     let cellElems = findChildrenByName ns "w" "tc" element
     cells <- mapD (elemToCell ns) cellElems
-    return $ Row cells
+    let hasTblHeader = maybe NoTblHeader (const HasTblHeader)
+          (findChildByName ns "w" "trPr" element
+           >>= findChildByName ns "w" "tblHeader")
+    return $ Row hasTblHeader cells
 elemToRow _ _ = throwError WrongElem
 
 elemToCell :: NameSpaces -> Element -> D Cell
 elemToCell ns element | isElem ns "w" "tc" element =
   do
+    let properties = findChildByName ns "w" "tcPr" element
+    let gridSpan = properties
+                     >>= findChildByName ns "w" "gridSpan"
+                     >>= findAttrByName ns "w" "val"
+                     >>= stringToInteger
+    let vMerge = case properties >>= findChildByName ns "w" "vMerge" of
+                   Nothing -> Restart
+                   Just e ->
+                     fromMaybe Continue $ do
+                       s <- findAttrByName ns "w" "val" e
+                       case s of
+                         "continue" -> Just Continue
+                         "restart" -> Just Restart
+                         _ -> Nothing
     cellContents <- mapD (elemToBodyPart ns) (elChildren element)
-    return $ Cell cellContents
+    return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
 elemToCell _ _ = throwError WrongElem
 
 elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
@@ -626,10 +696,9 @@ pNumInfo = getParStyleField numInfo . pStyle
 elemToBodyPart :: NameSpaces -> Element -> D BodyPart
 elemToBodyPart ns element
   | isElem ns "w" "p" element
-  , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
-      do
-        expsLst <- eitherToD $ readOMML $ showElement c
-        return $ OMathPara expsLst
+  , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do
+      expsLst <- eitherToD $ readOMML $ showElement c
+      return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
   , Just (numId, lvl) <- getNumInfo ns element = do
@@ -647,13 +716,31 @@ elemToBodyPart ns element
         Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
                     levelInfo <- lookupLevel numId lvl <$> asks envNumbering
                     return $ ListItem parstyle numId lvl levelInfo parparts
-        _ -> return $ Paragraph parstyle parparts
+        _ -> let
+          hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+          hasSimpleTableField = fromMaybe False $ do
+            fldSimple <- findChildByName ns "w" "fldSimple" element
+            instr <- findAttrByName ns "w" "instr" fldSimple
+            pure ("Table" `elem` T.words instr)
+
+          hasComplexTableField = fromMaybe False $ do
+            instrText <- findElementByName ns "w" "instrText" element
+            pure ("Table" `elem` T.words (strContent instrText))
+
+          in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+             then return $ TblCaption parstyle parparts
+             else return $ Paragraph parstyle parparts
+
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
-    let caption' = findChildByName ns "w" "tblPr" element
+    let tblProperties = findChildByName ns "w" "tblPr" element
+        caption = fromMaybe "" $ tblProperties
                    >>= findChildByName ns "w" "tblCaption"
                    >>= findAttrByName ns "w" "val"
-        caption = fromMaybe "" caption'
+        description = fromMaybe "" $ tblProperties
+                       >>= findChildByName ns "w" "tblDescription"
+                       >>= findAttrByName ns "w" "val"
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
           Nothing -> return []
@@ -666,7 +753,7 @@ elemToBodyPart ns element
     grid <- grid'
     tblLook <- tblLook'
     rows <- mapD (elemToRow ns) (elChildren element)
-    return $ Tbl caption grid tblLook rows
+    return $ Tbl (caption <> description) grid tblLook rows
 elemToBodyPart _ _ = throwError WrongElem
 
 lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index ac331cba6..970697a2d 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
+                                      , findElementByName
                                       , findAttrByName
                                       ) where
 
@@ -56,6 +57,12 @@ findChildrenByName ns pref name el =
   let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
+-- | Like 'findChildrenByName', but searches descendants.
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findElement (elemName ns' pref name) el
+
 findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
   let ns' = ns <> elemToNameSpaces el
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 939ff9939..220c7d9c5 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -317,14 +317,30 @@ tests = [ testGroup "document"
             "tables with lists in cells"
             "docx/table_with_list_cell.docx"
             "docx/table_with_list_cell.native"
+          , testCompare
+            "a table with a header which contains rowspans greater than 1"
+            "docx/table_header_rowspan.docx"
+            "docx/table_header_rowspan.native"
           , testCompare
             "tables with one row"
             "docx/table_one_row.docx"
             "docx/table_one_row.native"
+          , testCompare
+            "tables with just one row, which is a header"
+            "docx/table_one_header_row.docx"
+            "docx/table_one_header_row.native"
           , testCompare
             "tables with variable width"
             "docx/table_variable_width.docx"
             "docx/table_variable_width.native"
+          , testCompare
+            "tables with captions which contain a Table field"
+            "docx/table_captions_with_field.docx"
+            "docx/table_captions_with_field.native"
+          , testCompare
+            "tables with captions which don't contain a Table field"
+            "docx/table_captions_no_field.docx"
+            "docx/table_captions_no_field.native"
           , testCompare
             "code block"
             "docx/codeblock.docx"
diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native
index a072c0d39..d2aa00994 100644
--- a/test/docx/sdt_elements.native
+++ b/test/docx/sdt_elements.native
@@ -4,17 +4,16 @@
  ,(AlignDefault,ColWidth 0.22069570301081556)
  ,(AlignDefault,ColWidth 0.5586085939783689)]
  (TableHead ("",[],[])
- [])
- [(TableBody ("",[],[]) (RowHeadColumns 0)
-  []
-  [Row ("",[],[])
+ [Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Strong [Str "col1Header"]]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Strong [Str "col2Header"]]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    [Plain [Strong [Str "col3Header"]]]]
-  ,Row ("",[],[])
+    [Plain [Strong [Str "col3Header"]]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "col1",Space,Str "content"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -22,4 +21,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "col3",Space,Str "content"]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_captions_no_field.docx b/test/docx/table_captions_no_field.docx
new file mode 100644
index 000000000..1687d32a2
Binary files /dev/null and b/test/docx/table_captions_no_field.docx differ
diff --git a/test/docx/table_captions_no_field.native b/test/docx/table_captions_no_field.native
new file mode 100644
index 000000000..b8f54d541
--- /dev/null
+++ b/test/docx/table_captions_no_field.native
@@ -0,0 +1,34 @@
+[Para [Str "See",Space,Str "Table",Space,Str "5.1."]
+,Para [Str "Table",Space,Str "5.1"]
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.7605739372523825)
+ ,(AlignDefault,ColWidth 0.11971303137380876)
+ ,(AlignDefault,ColWidth 0.11971303137380876)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  []
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Count"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "%"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "First",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "242"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "45"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Second",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "99"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "18"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Header 2 ("section", [], []) []]
diff --git a/test/docx/table_captions_with_field.docx b/test/docx/table_captions_with_field.docx
new file mode 100644
index 000000000..db6de3088
Binary files /dev/null and b/test/docx/table_captions_with_field.docx differ
diff --git a/test/docx/table_captions_with_field.native b/test/docx/table_captions_with_field.native
new file mode 100644
index 000000000..deb8afc6b
--- /dev/null
+++ b/test/docx/table_captions_with_field.native
@@ -0,0 +1,54 @@
+[Para [Str "See",Space,Str "Table",Space,Str "1."]
+,Para []
+,Table ("",[],[]) (Caption Nothing
+ [Para [Str "Table",Space,Str "1"]])
+ [(AlignDefault,ColWidth 0.7605739372523825)
+ ,(AlignDefault,ColWidth 0.11971303137380876)
+ ,(AlignDefault,ColWidth 0.11971303137380876)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  []
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Count"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "%"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "First",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "242"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "45"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Second",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "99"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "18"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Header 2 ("section", [], []) []
+,Table ("",[],[]) (Caption Nothing
+ [Para [Str "Table",Space,Str "2"]])
+ [(AlignDefault,ColWidth 0.3332963620230701)
+ ,(AlignDefault,ColWidth 0.3332963620230701)
+ ,(AlignDefault,ColWidth 0.3334072759538598)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "One"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Two"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Three"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [])]
+ (TableFoot ("",[],[])
+ [])
+,Para []
+,Para [Str "See",Space,Str "Table",Space,Str "2."]]
diff --git a/test/docx/table_header_rowspan.docx b/test/docx/table_header_rowspan.docx
new file mode 100644
index 000000000..1cc32a105
Binary files /dev/null and b/test/docx/table_header_rowspan.docx differ
diff --git a/test/docx/table_header_rowspan.native b/test/docx/table_header_rowspan.native
new file mode 100644
index 000000000..d951f29e4
--- /dev/null
+++ b/test/docx/table_header_rowspan.native
@@ -0,0 +1,189 @@
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.30701754385964913)
+ ,(AlignDefault,ColWidth 0.1364522417153996)
+ ,(AlignDefault,ColWidth 0.10009746588693957)
+ ,(AlignDefault,ColWidth 9.707602339181287e-2)
+ ,(AlignDefault,ColWidth 7.719298245614035e-2)
+ ,(AlignDefault,ColWidth 7.085769980506823e-2)
+ ,(AlignDefault,ColWidth 7.09551656920078e-2)
+ ,(AlignDefault,ColWidth 0.14035087719298245)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Str "A"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "B"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "C"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "D"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3)
+  [Plain [Str "E"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Str "F"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "G"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "H"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "I"]]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ])]
+ (TableFoot ("",[],[])
+ [])]
diff --git a/test/docx/table_one_header_row.docx b/test/docx/table_one_header_row.docx
new file mode 100644
index 000000000..db715dda8
Binary files /dev/null and b/test/docx/table_one_header_row.docx differ
diff --git a/test/docx/table_one_header_row.native b/test/docx/table_one_header_row.native
new file mode 100644
index 000000000..4aae830ac
--- /dev/null
+++ b/test/docx/table_one_header_row.native
@@ -0,0 +1,18 @@
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.33302433371958284)
+ ,(AlignDefault,ColWidth 0.3332560834298957)
+ ,(AlignDefault,ColWidth 0.33371958285052145)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "One"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Row"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Table"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [])]
+ (TableFoot ("",[],[])
+ [])]
diff --git a/test/docx/table_one_row.docx b/test/docx/table_one_row.docx
index f7e0ebe43..d05a856b5 100644
Binary files a/test/docx/table_one_row.docx and b/test/docx/table_one_row.docx differ
diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native
index 43ac40cca..ff1cc0dc4 100644
--- a/test/docx/table_variable_width.native
+++ b/test/docx/table_variable_width.native
@@ -4,7 +4,8 @@
  ,(AlignDefault,ColWidth 1.9882415820416888e-2)
  ,(AlignDefault,ColWidth 0.22202030999465527)
  ,(AlignDefault,ColWidth 0.4761090326028862)
- ,(AlignDefault,ColWidth 1.0689470871191876e-4)]
+ ,(AlignDefault,ColWidth 1.0689470871191876e-4)
+ ,(AlignDefault,ColWidth 0.26178514163548905)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -13,33 +14,27 @@
    []
   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
    [Plain [Str "h3"]]
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
    [Plain [Str "h4"]]
   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
    [Plain [Str "h5"]]]])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
-   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3)
     [Plain [Str "c11"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     []]
   ,Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     [Plain [Str "c22"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "c23"]]
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     []]])]
  (TableFoot ("",[],[])
  [])]
-- 
cgit v1.2.3


From b6b2331fdcee37f1bfb3fcc21816d73d6d56cfae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 28 May 2021 13:58:44 -0700
Subject: Support `rebase_relative_paths` for commonmark based formats.

(Including `gfm`.)
---
 src/Text/Pandoc/Extensions.hs         |  1 +
 src/Text/Pandoc/Readers/CommonMark.hs |  4 +++-
 test/command/3752.md                  | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index c4d54c06e..9c55d0a7a 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -517,6 +517,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_attributes
     , Ext_sourcepos
     , Ext_yaml_metadata_block
+    , Ext_rebase_relative_paths
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index b099a9b50..228e65312 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -112,5 +112,7 @@ specFor opts = foldr ($) defaultSyntaxSpec exts
            | isEnabled Ext_implicit_header_references opts ] ++
          [ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++
          [ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++
-         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ]
+         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++
+         [ (rebaseRelativePathsSpec <>)
+           | isEnabled Ext_rebase_relative_paths opts ]
 
diff --git a/test/command/3752.md b/test/command/3752.md
index df8af0ba1..2e96b531e 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -42,3 +42,19 @@ A spider: [spider]
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
+
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f commonmark+rebase_relative_paths -t html
+^D
+<h1>Chapter one</h1>
+<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p>
+<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
+<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
+<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
+<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
+<p>Empty path: <a href="">empty</a>.</p>
+<h1>Chapter two</h1>
+<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
+```
-- 
cgit v1.2.3


From 0d7103de7e893002eedcc50fea7f81aae9535106 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 29 May 2021 14:41:28 -0700
Subject: In rebasePath, check for absolute paths two ways.

isAbsolute from FilePath doesn't return True on Windows
for paths beginning with `/`, so we check that separately.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index bc5e3e30f..9a5e0889e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1924,7 +1924,10 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-   in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path
+      -- check for leading / because on Windows this won't be
+      -- recognized as absolute by isAbsolute
+      isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/"
+   in if T.null path || isFragment || isAbsolutePath || isURI path
          then path
          else
            case takeDirectory fp of
-- 
cgit v1.2.3


From cc6dcf03928404c08cfb3075a90d814b25fd7c69 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 29 May 2021 17:36:30 -0700
Subject: Markdown reader: in rebasePaths, check for both Windows and Posix

absolute paths.  Previously Windows pandoc was treating
`/foo/bar.jpg` as non-absolute.
---
 src/Text/Pandoc/Readers/Markdown.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9a5e0889e..1761ee2c1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,9 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
+import System.FilePath (addExtension, takeExtension, takeDirectory)
+import qualified System.FilePath.Windows as Windows
+import qualified System.FilePath.Posix as Posix
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -1924,9 +1926,8 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-      -- check for leading / because on Windows this won't be
-      -- recognized as absolute by isAbsolute
-      isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/"
+      path' = T.unpack path
+      isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path'
    in if T.null path || isFragment || isAbsolutePath || isURI path
          then path
          else
-- 
cgit v1.2.3


From fc70f44ee2814914c0d32f6dff30fb36cc51bf11 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 30 May 2021 17:15:14 -0700
Subject: HTML reader: fix column width regression.

Column widths specified with a style attribute were
off by a factor of 100 in 2.14.

Closes #7334.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 3a569dd0a..6e62e12f5 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -49,7 +49,7 @@ pCol = try $ do
   return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    maybe (Right ColWidthDefault) (Right . ColWidth)
+                    maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
                       $ safeRead (T.filter
                                    (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
                   _ -> Right ColWidthDefault
-- 
cgit v1.2.3


From 62f46b3995425c9a3ec87cba0eb8a4d736adec07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 31 May 2021 21:34:51 -0600
Subject: Fix regression with commonmark/gfm yaml metdata block parsing.

A regression in 2.14 led to the document body being omitted
after YAML metadata in some cases.  This is now fixed.

Closes #7339.
---
 src/Text/Pandoc/Readers/CommonMark.hs | 10 +++++-----
 test/command/7339.md                  | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7339.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 228e65312..411d64278 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,7 +30,7 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition,
+import Text.Pandoc.Parsing (runParserT, getInput,
                             runF, defaultParserState, option, many1, anyChar,
                             Sources(..), ToSources(..), ParserT, Future,
                             sourceName)
@@ -44,14 +44,14 @@ readCommonMark opts s
     let sources = toSources s
     let toks = concatMap sourceToToks (unSources sources)
     res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
-                          pos <- getPosition
-                          return (meta, pos))
+                          rest <- getInput
+                          return (meta, rest))
                       defaultParserState "YAML metadata" (toSources s)
     case res of
       Left _ -> readCommonMarkBody opts sources toks
-      Right (meta, pos) -> do
+      Right (meta, rest) -> do
         -- strip off metadata section and parse body
-        let body = dropWhile (\t -> tokPos t < pos) toks
+        let body = concatMap sourceToToks (unSources rest)
         Pandoc _ bs <- readCommonMarkBody opts sources body
         return $ Pandoc (runF meta defaultParserState) bs
   | otherwise = do
diff --git a/test/command/7339.md b/test/command/7339.md
new file mode 100644
index 000000000..9697c1c32
--- /dev/null
+++ b/test/command/7339.md
@@ -0,0 +1,11 @@
+```
+% pandoc -f gfm -s -t native
+---
+title: Test
+---
+
+Hi
+^D
+Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Test"])]})
+[Para [Str "Hi"]]
+```
-- 
cgit v1.2.3


From abb59bd58222c67fd36a8e447c01de3404a7ed1d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 1 Jun 2021 13:54:51 -0600
Subject: LaTeX reader: don't allow optional * on symbol control sequences.

Generally we allow optional starred variants of LaTeX commands
(since many allow them, and if we don't accept these explicitly,
ignoring the star usually gives acceptable results).  But we
don't want to do this for `\(*\)` and similar cases.

Closes #7340.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++--
 test/command/7340.md             | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 test/command/7340.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2ace18d1b..9e14c159a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -24,7 +24,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toUpper, chr)
+import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr)
 import Data.Default
 import Data.List (intercalate)
 import qualified Data.Map as M
@@ -300,7 +300,9 @@ inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
   guard $ name /= "begin" && name /= "end" && name /= "and"
-  star <- option "" ("*" <$ symbol '*' <* sp)
+  star <- if T.all isAlphaNum name
+             then option "" ("*" <$ symbol '*' <* sp)
+             else pure ""
   overlay <- option "" overlaySpecification
   let name' = name <> star <> overlay
   let names = ordNub [name', name] -- check non-starred as fallback
diff --git a/test/command/7340.md b/test/command/7340.md
new file mode 100644
index 000000000..25decd732
--- /dev/null
+++ b/test/command/7340.md
@@ -0,0 +1,6 @@
+```
+% pandoc -f latex -t native
+\(*\)
+^D
+[Para [Math InlineMath "*"]]
+```
-- 
cgit v1.2.3


From 2e4ef14d9112f230e336a1f3767f87293bdfb73e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 1 Jun 2021 21:44:55 -0600
Subject: Markdown reader: fix pipe table regression in 2.11.4.

Previously pipe tables with empty headers (that is, a header
line with all empty cells) would be rendered as headerless
tables.  This broke in 2.11.4.

The fix here is to produce an AST with an empty table head
when a pipe table has all empty header cells.

Closes #7343.
---
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 test/pipe-tables.native             | 12 ++----------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1761ee2c1..1e9867d07 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2254,4 +2254,4 @@ toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
 
 toHeaderRow :: [Blocks] -> [Row]
-toHeaderRow l = [toRow l | not (null l)]
+toHeaderRow l = [toRow l | not (null l) && not (all null l)]
diff --git a/test/pipe-tables.native b/test/pipe-tables.native
index 557cd0642..249eec17e 100644
--- a/test/pipe-tables.native
+++ b/test/pipe-tables.native
@@ -131,13 +131,7 @@
  ,(AlignLeft,ColWidthDefault)
  ,(AlignCenter,ColWidthDefault)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -213,9 +207,7 @@
  [])
  [(AlignCenter,ColWidthDefault)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
-- 
cgit v1.2.3


From c8ab8bccf25ad01b9a98b2e59c84c151c5b0c371 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 14:26:55 +0200
Subject: DocBook reader: Add support for danger element

Added in DocBook 5.2:

- https://github.com/docbook/docbook/pull/64
- https://tdg.docbook.org/tdg/5.2/danger.html
---
 src/Text/Pandoc/Readers/DocBook.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b01ad3252..6ac1c99f9 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -134,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] corpcredit - A corporation or organization credited in a document
 [ ] corpname - The name of a corporation
 [ ] country - The name of a country
+[x] danger - An admonition set off from the text indicating hazardous situation
 [ ] database - The name of a database, or part of a database
 [x] date - The date of publication or revision of a document
 [ ] dedication - A wrapper for the dedication section of a book
@@ -718,7 +719,7 @@ blockTags =
   ] ++ admonitionTags
 
 admonitionTags :: [Text]
-admonitionTags = ["important","caution","note","tip","warning"]
+admonitionTags = ["caution","danger","important","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
 trimNl :: Text -> Text
-- 
cgit v1.2.3


From cfa26e3ca0346397f41af9aed5b4cd1d86be1220 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 12 Jun 2021 13:56:09 -0700
Subject: Docx reader: handle absolute URIs in Relationship Target.

Closes #7374.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index aaa8f4ad0..dbb16a821 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -479,20 +479,26 @@ filePathToRelType path docXmlPath =
   then Just InDocument
   else Nothing
 
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
+relElemToRelationship :: FilePath -> DocumentLocation -> Element
+                      -> Maybe Relationship
+relElemToRelationship fp relType element | qName (elName element) == "Relationship" =
   do
     relId <- findAttr (QName "Id" Nothing Nothing) element
     target <- findAttr (QName "Target" Nothing Nothing) element
-    return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
+    -- target may be relative (media/image1.jpeg) or absolute
+    -- (/word/media/image1.jpeg); we need to relativize it (see #7374)
+    let frontOfFp = T.pack $ takeWhile (/= '_') fp
+    let target' = fromMaybe target $
+           T.stripPrefix frontOfFp $ T.dropWhile (== '/') target
+    return $ Relationship relType relId target'
+relElemToRelationship _ _ _ = Nothing
 
 filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
   , Just relElems <- parseXMLFromEntry entry =
-  mapMaybe (relElemToRelationship relType) $ elChildren relElems
+  mapMaybe (relElemToRelationship fp relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
 archiveToRelationships :: Archive -> FilePath -> [Relationship]
-- 
cgit v1.2.3


From 0352f7845bfa2053797850c3639414978285b63e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 22:35:07 -0700
Subject: Improve emailAddress in Text.Pandoc.Parsing.

Previously the parser would accept characters in domains
that are illegal in domains, and this sometimes caused it
to gobble bits of the following text.

Closes #7398.

Note that this change, by itself, caused some txt2tag reader
tests to fail. txt2tags allows bare email addresses with
a following form query.  So, in addition to the change
to emailAddress, we modify the txt2tags parser so it can
still handle these cases.
---
 src/Text/Pandoc/Parsing.hs          |  7 +++----
 src/Text/Pandoc/Readers/Txt2Tags.hs | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0bb794ba1..082d9565b 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -693,13 +693,12 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        mailbox           = intercalate "." <$> (emailWord `sepBy1'` dot)
        domain            = intercalate "." <$> (subdomain `sepBy1'` dot)
        dot               = char '.'
-       subdomain         = many1 $ alphaNum <|> innerPunct
+       subdomain         = many1 $ alphaNum <|> innerPunct (=='-')
        -- this excludes some valid email addresses, since an
        -- email could contain e.g. '__', but gives better results
        -- for our purposes, when combined with markdown parsing:
-       innerPunct        = try (satisfy (\c -> isEmailPunct c || c == '@')
-                                 <* notFollowedBy space
-                                 <* notFollowedBy (satisfy isPunctuation))
+       innerPunct f      = try (satisfy f
+                                 <* notFollowedBy (satisfy (not . isAlphaNum)))
        -- technically an email address could begin with a symbol,
        -- but allowing this creates too many problems.
        -- See e.g. https://github.com/jgm/pandoc/issues/2940
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 6f92f0063..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -478,9 +478,29 @@ macro = try $ do
 -- raw URLs in text are automatically linked
 url :: T2T Inlines
 url = try $ do
-  (rawUrl, escapedUrl) <- try uri <|> emailAddress
+  (rawUrl, escapedUrl) <- try uri <|> emailAddress'
   return $ B.link rawUrl "" (B.str escapedUrl)
 
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+  (base, mailURI) <- emailAddress
+  query <- option "" emailQuery
+  return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+  char '?'
+  parts <- kv `sepBy1` (char '&')
+  return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+  k <- T.pack <$> many1 alphaNum
+  char '='
+  let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+  v <- T.pack <$> many1 vchar
+  return (k <> "=" <> v)
+
 uri :: T2T (Text, Text)
 uri = try $ do
   address <- t2tURI
-- 
cgit v1.2.3


From 1b07997f4a6870650f20702ed6d962f9471e3d40 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 22 Jun 2021 09:55:50 -0700
Subject: Fix regression with comment-only YAML metadata blocks.

Closes #7400.
---
 src/Text/Pandoc/Readers/Metadata.hs | 3 +++
 test/command/7400.md                | 9 +++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 test/command/7400.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index 45eddf25a..cbc523b25 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -45,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do
        Right [] -> return . return $ mempty
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
+       -- the following is what we get from a comment:
+       Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
+                -> return . return $ mempty
        Right _  -> Prelude.fail "expected YAML object"
        Left (yamlpos, err')
                 -> do pos <- getPosition
diff --git a/test/command/7400.md b/test/command/7400.md
new file mode 100644
index 000000000..d4be32d72
--- /dev/null
+++ b/test/command/7400.md
@@ -0,0 +1,9 @@
+```
+% pandoc -t native -s
+---
+# Comment only
+...
+^D
+Pandoc (Meta {unMeta = fromList []})
+[]
+```
-- 
cgit v1.2.3


From 3ed37f00771d20a1b7516f2a37b7b424b3b2f1d8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Jul 2021 10:21:59 -0700
Subject: HTML reader: add col, colgroup to 'closes' definitions

---
 src/Text/Pandoc/Readers/HTML/Parsing.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 4c069e2c3..bd8d7c96c 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -161,10 +161,12 @@ _ `closes` "html" = False
 "li" `closes` "li" = True
 "th" `closes` t | t `elem` ["th","td"] = True
 "td" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True
 "dd" `closes` t | t `elem` ["dt", "dd"] = True
 "dt" `closes` t | t `elem` ["dt","dd"] = True
 "rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"col" `closes` "col" = True
+"colgroup" `closes` "col" = True
 "optgroup" `closes` "optgroup" = True
 "optgroup" `closes` "option" = True
 "option" `closes` "option" = True
-- 
cgit v1.2.3


From f88ebf3ebf49e00ffa12778caf6817cc34459e6a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Jul 2021 10:22:07 -0700
Subject: Markdown reader:  don't try to read contents in self-closing HTML
 tag.

Previously we had problems parsing raw HTML with self-closing
tags like `<col/>`. The problem was that pandoc would look
for a closing tag to close the markdown contents, but the
closing tag had, in effect, already been parsed by `htmlTag`.

This fixes the issue described in
<https://groups.google.com/d/msgid/pandoc-discuss/297bc662-7841-4423-bcbb-534e99bbba09n%40googlegroups.com>.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1e9867d07..2dc7ddf52 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1121,6 +1121,7 @@ rawTeXBlock = do
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
   indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
@@ -1134,7 +1135,9 @@ rawHtmlBlocks = do
                  gobbleAtMostSpaces indentlevel
                  notFollowedBy' closer
                  block
-  contents <- mconcat <$> many block'
+  contents <- if selfClosing
+                 then return mempty
+                 else mconcat <$> many block'
   result <-
     try
     (do gobbleAtMostSpaces indentlevel
-- 
cgit v1.2.3


From e56e2b0e0be9256ddef798d28f5d2af6e756508d Mon Sep 17 00:00:00 2001
From: Michael Hoffmann <brennan.brisad@gmail.com>
Date: Wed, 7 Jul 2021 01:06:29 +0200
Subject: Recognize data-external when reading HTML img tags (#7429)

Preserve all attributes in img tags.  If attributes have a `data-`
prefix, it will be stripped.  In particular, this preserves a
`data-external` attribute as an `external` attribute in the pandoc AST.
---
 src/Text/Pandoc/Readers/HTML.hs | 11 +++--------
 test/Tests/Readers/HTML.hs      |  6 ++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fc4575f2d..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -722,17 +722,12 @@ pLink = try $ do
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
-  tag <- pSelfClosing (=="img") (isJust . lookup "src")
+  tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
   url <- canonicalizeUrl $ fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  let uid = fromAttrib "id" tag
-  let cls = T.words $ fromAttrib "class" tag
-  let getAtt k = case fromAttrib k tag of
-                   "" -> []
-                   v  -> [(k, v)]
-  let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
-  return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+  let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+  return $ B.imageWith attr (escapeURI url) title (B.text alt)
 
 pSvg :: PandocMonad m => TagParser m Inlines
 pSvg = do
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index 9bf567194..4ed1e44af 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -74,6 +74,12 @@ tests = [ testGroup "base tag"
           [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
             plain (spanWith ("anchor",[],[]) mempty)
           ]
+        , testGroup "img"
+          [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?>
+            plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "")
+          , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?>
+            plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "")
+          ]
         , testGroup "lang"
           [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?>
             setMeta "lang" (text "es") (doc (plain (text "hola")))
-- 
cgit v1.2.3


From ae22b1e977cfb1357bb21fabc227e76a6adb0599 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 9 Jul 2021 12:27:41 -0700
Subject: RST reader: fix regression with code includes.

With the recent changes to include infrastructure,
included code blocks were getting an extra newline.

Closes #7436.  Added regression test.
---
 pandoc.cabal                   |  1 +
 src/Text/Pandoc/Readers/RST.hs |  6 +++++-
 test/command/7436.md           | 14 ++++++++++++++
 test/command/three.txt         |  3 +++
 4 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7436.md
 create mode 100644 test/command/three.txt

(limited to 'src/Text/Pandoc/Readers')

diff --git a/pandoc.cabal b/pandoc.cabal
index 84a04c6b6..e3cd7e54f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -213,6 +213,7 @@ extra-source-files:
                  test/command/B.txt
                  test/command/C.txt
                  test/command/D.txt
+                 test/command/three.txt
                  test/command/01.csv
                  test/command/chap1/spider.png
                  test/command/chap2/spider.png
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 35292d949..3990f0cb5 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -474,6 +474,7 @@ includeDirective top fields body = do
            case lookup "literal" fields of
              Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
              Nothing -> parseBlocks
+  let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
   let selectLines =
         (case trim <$> lookup "end-before" fields of
                          Just patt -> takeWhile (not . (patt `T.isInfixOf`))
@@ -482,8 +483,11 @@ includeDirective top fields body = do
                          Just patt -> drop 1 .
                                         dropWhile (not . (patt `T.isInfixOf`))
                          Nothing   -> id)
+
   let toStream t =
-        toSources [(f, T.unlines . selectLines . T.lines $ t)]
+        Sources [(initialPos f,
+                   (T.unlines . selectLines . T.lines $ t) <>
+                    if isLiteral then mempty else "\n")]  -- see #7436
   currentDir <- takeDirectory . sourceName <$> getPosition
   insertIncludedFile parser toStream [currentDir] f startLine endLine
 
diff --git a/test/command/7436.md b/test/command/7436.md
new file mode 100644
index 000000000..ad4cb8c2f
--- /dev/null
+++ b/test/command/7436.md
@@ -0,0 +1,14 @@
+```
+% pandoc -f rst -t native
+.. include:: command/three.txt
+   :code:
+
+.. include:: command/three.txt
+   :literal:
+
+.. include:: command/three.txt
+^D
+[CodeBlock ("",[""],[("code","")]) "1st line.\n2nd line.\n3rd line.\n"
+,RawBlock (Format "rst") "1st line.\n2nd line.\n3rd line.\n"
+,Para [Str "1st",Space,Str "line.",SoftBreak,Str "2nd",Space,Str "line.",SoftBreak,Str "3rd",Space,Str "line."]]
+```
diff --git a/test/command/three.txt b/test/command/three.txt
new file mode 100644
index 000000000..3ca3fdd4e
--- /dev/null
+++ b/test/command/three.txt
@@ -0,0 +1,3 @@
+1st line.
+2nd line.
+3rd line.
-- 
cgit v1.2.3


From ac0a9da6d85e9b7a73973a20019caa324b2c1aff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Jul 2021 13:01:45 -0700
Subject: Improved parsing of raw LaTeX from Text streams (rawLaTeXParser).

We now use source positions from the token stream to tell us
how much of the text stream to consume.  Getting this to
work required a few other changes to make token source positions
accurate.

Closes #7434.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  9 +++-----
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 39 ++++++++++++++++++++++++++++----
 test/command/7434.md                     | 15 ++++++++++++
 3 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 test/command/7434.md

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9e14c159a..31c8d9095 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -136,8 +136,7 @@ rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -167,8 +166,7 @@ rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -182,8 +180,7 @@ rawLaTeXInline = do
 inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a17b1f324..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
@@ -28,6 +29,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , applyMacros
   , tokenize
   , tokenizeSources
+  , getInputTokens
   , untokenize
   , untoken
   , totoks
@@ -246,18 +248,23 @@ withVerbatimMode parser = do
        updateState $ \st -> st{ sVerbatimMode = False }
        return result
 
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
                => [Tok] -> Bool -> LP m a -> LP m a
                -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
   let lstate' = lstate { sMacros = extractMacros pstate }
+  let setStartPos = case toks of
+                      Tok pos _ _ : _ -> setPosition pos
+                      _ -> return ()
+  let preparser = setStartPos >> parser
   let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+                            lstate "chunk" toks
   case res' of
        Left _    -> mzero
-       Right toks' -> do
+       Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
                                         ts <- many (satisfyTok (const True))
@@ -268,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                void $ count (T.length (untokenize toks')) anyChar
+                let skipTilPos stopPos = do
+                      anyChar
+                      pos <- getPosition
+                      if pos >= stopPos
+                         then return ()
+                         else skipTilPos stopPos
+                skipTilPos endpos
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -306,6 +319,17 @@ tokenizeSources = concatMap tokenizeSource . unSources
  where
    tokenizeSource (pos, t) = totoks pos t
 
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+  pos <- getPosition
+  ss <- getInput
+  return $
+    case ss of
+      Sources [] -> []
+      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
@@ -433,8 +457,13 @@ parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
 parseFromToks parser toks = do
   oldInput <- getInput
   setInput toks
+  oldpos <- getPosition
+  case toks of
+     Tok pos _ _ : _ -> setPosition pos
+     _ -> return ()
   result <- disablingWithRaw parser
   setInput oldInput
+  setPosition oldpos
   return result
 
 disablingWithRaw :: PandocMonad m => LP m a -> LP m a
@@ -458,7 +487,7 @@ satisfyTok f = do
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
         updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
+        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
diff --git a/test/command/7434.md b/test/command/7434.md
new file mode 100644
index 000000000..be8732bc2
--- /dev/null
+++ b/test/command/7434.md
@@ -0,0 +1,15 @@
+```
+% pandoc -f markdown -t native
+\begin{proof}
+\newcommand{\x}{\left.\right.}
+\x
+\end{proof}
+
+1234567890abcdefghi
+
+[\*\a](x)
+^D
+[RawBlock (Format "tex") "\\begin{proof}\n\\newcommand{\\x}{\\left.\\right.}\n\\left.\\right.\n\\end{proof}"
+,Para [Str "1234567890abcdefghi"]
+,Para [Link ("",[],[]) [Str "*",RawInline (Format "tex") "\\a"] ("x","")]]
+```
-- 
cgit v1.2.3


From 06408d08e5ccf06a6a04c9b77470e6a67d98e52c Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Mon, 12 Jul 2021 00:28:52 +0200
Subject: DocBook reader: add support for citerefentry (#7437)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally intended for referring to UNIX manual pages, either part of the same DocBook document as refentry element, or external – hence the manvolnum element.
These days, refentry is more general, for example the element documentation pages linked below are each a refentry.

As per the *Processing expectations* section of citerefentry, the element is supposed to be a hyperlink to a refentry (when in the same document) but pandoc does not support refentry tag at the moment so that is moot.

https://tdg.docbook.org/tdg/5.1/citerefentry.html
https://tdg.docbook.org/tdg/5.1/manvolnum.html
https://tdg.docbook.org/tdg/5.1/refentry.html

This roughly corresponds to a `manpage` role in rST syntax, which produces a `Code` AST node with attributes `.interpreted-text role=manpage` but that does not fit DocBook parser.

https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage
---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++++-
 test/docbook-reader.docbook        | 3 +++
 test/docbook-reader.native         | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6ac1c99f9..a31819ece 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -97,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] chapterinfo - Meta-information for a Chapter
 [ ] citation - An inline bibliographic reference to another published work
 [ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
+[x] citerefentry - A citation to a reference page
 [ ] citetitle - The title of a cited work
 [ ] city - The name of a city in an address
 [x] classname - The name of a class, in the object-oriented programming sense
@@ -1112,6 +1112,10 @@ parseInline (Elem e) =
         "segmentedlist" -> segmentedList
         "classname" -> codeWithLang
         "code" -> codeWithLang
+        "citerefentry" -> do
+             let title = maybe mempty strContent $ filterChild (named "refentrytitle") e
+             let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e
+             return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum)
         "filename" -> codeWithLang
         "envar" -> codeWithLang
         "literal" -> codeWithLang
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 51e62942b..f021dc8be 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -725,6 +725,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
   <para>
     More code: <classname>Class</classname> and <type>Type</type>
   </para>
+  <para>
+    Referencing a man page: <citerefentry><refentrytitle>nix.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+  </para>
   <para>
     <emphasis role="strikethrough">This is
     <emphasis>strikeout</emphasis>.</emphasis>
diff --git a/test/docbook-reader.native b/test/docbook-reader.native
index 1961949d9..b1f5fd085 100644
--- a/test/docbook-reader.native
+++ b/test/docbook-reader.native
@@ -188,6 +188,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Sof
 ,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."]
 ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",SoftBreak,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",SoftBreak,Code ("",[],[]) "<html>",Str "."]
 ,Para [Str "More",Space,Str "code:",Space,Code ("",[],[]) "Class",Space,Str "and",Space,Code ("",[],[]) "Type"]
+,Para [Str "Referencing",Space,Str "a",Space,Str "man",Space,Str "page:",Space,Code ("",["citerefentry"],[]) "nix.conf(5)"]
 ,Para [Strikeout [Str "This",Space,Str "is",SoftBreak,Emph [Str "strikeout"],Str "."]]
 ,Para [Str "Superscripts:",Space,Str "a",Superscript [Str "bc"],Str "d",SoftBreak,Str "a",Superscript [Emph [Str "hello"]],SoftBreak,Str "a",Superscript [Str "hello\160there"],Str "."]
 ,Para [Str "Subscripts:",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",SoftBreak,Str "H",Subscript [Str "many\160of\160them"],Str "O."]
-- 
cgit v1.2.3


From 493522c5626464fb7a27216ccee381af4a199eef Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 12:04:43 -0700
Subject: LaTeX reader: Support `\cline` in LaTeX tables.

Closes #7442.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 7833da081..f56728fe1 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -41,6 +41,7 @@ hline :: PandocMonad m => LP m ()
 hline = try $ do
   spaces
   controlSeq "hline" <|>
+    (controlSeq "cline" <* braced) <|>
     -- booktabs rules:
     controlSeq "toprule" <|>
     controlSeq "bottomrule" <|>
-- 
cgit v1.2.3


From 46099e79defe662e541b12548200caf29063c1c6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 13:10:45 -0700
Subject: DocBook reader: handle images with imageobjectco elements.

Closes #7440.
---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc/Readers')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index a31819ece..c49b82ccf 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -741,9 +741,9 @@ getMediaobject e = do
   figTitle <- gets dbFigureTitle
   ident <- gets dbFigureId
   (imageUrl, attr) <-
-    case filterChild (named "imageobject") e of
-      Nothing  -> return (mempty, nullAttr)
-      Just z   -> case filterChild (named "imagedata") z of
+    case filterElements (named "imageobject") e of
+      []  -> return (mempty, nullAttr)
+      (z:_) -> case filterChild (named "imagedata") z of
                     Nothing -> return (mempty, nullAttr)
                     Just i  -> let atVal a = attrValue a i
                                    w = case atVal "width" of
-- 
cgit v1.2.3