From 8c38390038edcebd55f9dec8359ef983f3813425 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 27 Nov 2020 21:21:25 +0100
Subject: HTML reader tests: improve test coverage of new features

---
 src/Text/Pandoc/Readers/HTML/Table.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 5a783988f..91639fa4c 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -214,7 +214,8 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
           -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
 normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
-  let rowLength = length . rowCells
+  let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
+  let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
   let ncols = maximum (map rowLength rows)
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
-- 
cgit v1.2.3


From 83d63b72e1b9eff9f2aa3b9f36b56d348f0909a2 Mon Sep 17 00:00:00 2001
From: Tassos Manganaris <tassos.manganaris@gmail.com>
Date: Fri, 27 Nov 2020 23:42:53 -0500
Subject: Fix a tiny Typo in the CSV reader module

Header comment in the CSV reader module says "RST" instead of "CSV".
---
 src/Text/Pandoc/Readers/CSV.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index f0edcaa16..45f4d88d4 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -2,7 +2,7 @@
 
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-   Module      : Text.Pandoc.Readers.RST
+   Module      : Text.Pandoc.Readers.CSV
    Copyright   : Copyright (C) 2006-2020 John MacFarlane
    License     : GNU GPL, version 2 or above
 
-- 
cgit v1.2.3


From bff9c129c3579f928a0067759d0a784eb5c07d30 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 29 Nov 2020 10:29:51 -0800
Subject: LaTeX reader: don't parse `\rule` with width 0 as horizontal rule.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2a9bff746..15a1a19fc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1225,6 +1225,16 @@ preamble = mconcat <$> many preambleBlock
                              anyTok
                              return mempty)
 
+rule :: PandocMonad m => LP m Blocks
+rule = do
+  skipopts
+  width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok
+  _thickness <- tok
+  -- 0-width rules are used to fix spacing issues:
+  case safeRead width of
+    Just (0 :: Double) -> return mempty
+    _ -> return horizontalRule
+
 paragraph :: PandocMonad m => LP m Blocks
 paragraph = do
   x <- trimInlines . mconcat <$> many1 inline
@@ -1595,7 +1605,7 @@ blockCommands = M.fromList
    --
    , ("hrule", pure horizontalRule)
    , ("strut", pure mempty)
-   , ("rule", skipopts *> tok *> tok $> horizontalRule)
+   , ("rule", rule)
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-- 
cgit v1.2.3


From 7b11cdee49ec491a33dcade6ea68c3084a425992 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 2 Dec 2020 10:46:23 -0800
Subject: Citeproc: ensure that BCP47 lang codes can be used.

We ignore the variants and just use the base lang code
and country code when passing off to citeproc.
---
 src/Text/Pandoc/Citeproc.hs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index a48f97c3b..0de2882ae 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -16,6 +16,7 @@ import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
 import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir)
+import qualified Text.Pandoc.BCP47 as BCP47
 import Data.ByteString (ByteString)
 import qualified Data.ByteString.Lazy as L
 import Text.Pandoc.Definition as Pandoc
@@ -91,8 +92,9 @@ processCitations (Pandoc meta bs) = do
     case styleRes of
        Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
        Right style -> return style{ styleAbbreviations = mbAbbrevs }
-  let mblang = parseLang <$>
-         ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText)
+  mblang <- maybe (return Nothing) bcp47LangToIETF
+               ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
+                 metaValueToText)
   let locale = Citeproc.mergeLocales mblang style
   let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
       getCiteId _ = mempty
@@ -579,3 +581,16 @@ removeFinalPeriod ils =
   isRightQuote "\8217" = True
   isRightQuote "\187"  = True
   isRightQuote _       = False
+
+bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
+bcp47LangToIETF bcplang =
+  case BCP47.parseBCP47 bcplang of
+    Left _ -> do
+      report $ InvalidLang bcplang
+      return Nothing
+    Right lang ->
+      return $ Just
+             $ Lang (BCP47.langLanguage lang)
+                    (if T.null (BCP47.langRegion lang)
+                        then Nothing
+                        else Just (BCP47.langRegion lang))
-- 
cgit v1.2.3


From 5bbd5a9e80c4f4356d71b54315cf75b5ae2f3650 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 3 Dec 2020 09:51:23 -0800
Subject: Docx writer: Support bold and italic in "complex script."

Previously bold and italics didn't work properly in LTR
text.  This commit causes the w:bCs and w:iCs attributes
to be used, in addition to w:b and w:i, for bold and
italics respectively.

Closes #6911.
---
 src/Text/Pandoc/Writers/Docx.hs             |   8 ++++++--
 test/docx/golden/custom_style_preserve.docx | Bin 10573 -> 10578 bytes
 test/docx/golden/definition_list.docx       | Bin 9844 -> 9850 bytes
 test/docx/golden/inline_formatting.docx     | Bin 9959 -> 9969 bytes
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index a380fd4fa..4cb879e6a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1272,9 +1272,13 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                      $ inlinesToOpenXML opts ils
   wrapBookmark ident contents
 inlineToOpenXML' opts (Strong lst) =
-  withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
+  withTextProp (mknode "w:b" [] ()) $
+  withTextProp (mknode "w:bCs" [] ()) $ -- needed for LTR, #6911
+  inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Emph lst) =
-  withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
+  withTextProp (mknode "w:i" [] ()) $
+  withTextProp (mknode "w:iCs" [] ()) $  -- needed for LTR, #6911
+  inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Underline lst) =
   withTextProp (mknode "w:u" [("w:val","single")] ()) $
     inlinesToOpenXML opts lst
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index ac595cdd4..17804bb81 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 005f5dbe3..21629e208 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index ce1d16daa..e12e3b38d 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
-- 
cgit v1.2.3


From 9c6cc79c11d191c74aae8646279a2b94ae253330 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 3 Dec 2020 21:24:27 -0800
Subject: EPUB writer: add frontmatter type on body element for nav.xhtml.

Closes #6918.
---
 src/Text/Pandoc/Writers/EPUB.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 12004889f..eadd1100f 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -845,7 +845,9 @@ pandocToEPUB version opts doc = do
                     [ unode "ol" landmarkItems ]
                   | not (null landmarkItems)]
   navData <- lift $ writeHtml opts'{ writerVariables =
-                     Context (M.fromList [("navpage", toVal' "true")])
+                     Context (M.fromList [("navpage", toVal' "true")
+                                         ,("body-type",  toVal' "frontmatter")
+                                         ])
                      <> cssvars False <> vars }
             (Pandoc (setMeta "title"
                      (walk removeNote $ fromList $ docTitle' meta) nullMeta)
-- 
cgit v1.2.3


From 7199d68ba078148ff76a38f2c483da73edd62747 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 3 Dec 2020 21:39:19 -0800
Subject: EPUB writer: include title page in landmarks.

Closes #6919.

Note that the toc is also included if `--toc` is specified.
---
 src/Text/Pandoc/Writers/EPUB.hs | 9 +++++++--
 test/command/5986.md            | 3 ++-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index eadd1100f..5867b7c80 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -825,7 +825,12 @@ pandocToEPUB version opts doc = do
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
                     , unode "ol" ! [("class","toc")] $ tocBlocks ]]
   let landmarkItems = if epub3
-                         then [ unode "li"
+                         then unode "li"
+                                [ unode "a" ! [("href",
+                                                  "text/title_page.xhtml")
+                                               ,("epub:type", "titlepage")] $
+                                  ("Title Page" :: String) ] :
+                              [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
                                   ("Cover" :: String)] |
@@ -834,7 +839,7 @@ pandocToEPUB version opts doc = do
                               [ unode "li"
                                 [ unode "a" ! [("href", "#toc")
                                               ,("epub:type", "toc")] $
-                                    ("Table of contents" :: String)
+                                    ("Table of Contents" :: String)
                                 ] | writerTableOfContents opts
                               ]
                          else []
diff --git a/test/command/5986.md b/test/command/5986.md
index e24aebb64..ea0ca70c1 100644
--- a/test/command/5986.md
+++ b/test/command/5986.md
@@ -6,7 +6,8 @@
 <p><span id="nav.xhtml"></span></p>
 <nav type="landmarks" id="landmarks" hidden="hidden">
 <ol>
-<li><a href="#nav.xhtml#toc">Table of contents</a></li>
+<li><a href="text/title_page.xhtml">Title Page</a></li>
+<li><a href="#nav.xhtml#toc">Table of Contents</a></li>
 </ol>
 </nav>
 <p><span id="ch001.xhtml"></span></p>
-- 
cgit v1.2.3


From 171d3db3848a8ca79480688748d0ffff67ed2039 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 4 Dec 2020 09:47:56 -0800
Subject: HTML writer: Fix handling of nested csl- display spans.

Previously inner Spans used to represent
CSL display attributes were not rendered as div tags.

See #6921.
---
 src/Text/Pandoc/Writers/HTML.hs | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index c92131d5a..76f17f77a 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -695,12 +695,12 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
   let fragmentClass = case slideVariant of
                            RevealJsSlides -> "fragment"
                            _              -> "incremental"
-  let inDiv zs = RawBlock (Format "html") ("<div class=\""
+  let inDiv' zs = RawBlock (Format "html") ("<div class=\""
                        <> fragmentClass <> "\">") :
                    (zs ++ [RawBlock (Format "html") "</div>"])
   let breakOnPauses zs = case splitBy isPause zs of
                            []   -> []
-                           y:ys -> y ++ concatMap inDiv ys
+                           y:ys -> y ++ concatMap inDiv' ys
   let (titleBlocks, innerSecs) =
         if titleSlide
            -- title slides have no content of their own
@@ -783,9 +783,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
                       -- a newline between the column divs, which throws
                       -- off widths! see #4028
                       mconcat <$> mapM (blockToHtml opts) bs
-                 else if isCslBibEntry
-                         then mconcat <$> mapM (cslEntryToHtml opts') bs
-                         else blockListToHtml opts' bs
+                 else blockListToHtml opts' bs
   let contents' = nl opts >> contents >> nl opts
   let (divtag, classes'') = if html5 && "section" `elem` classes'
                             then (H5.section, filter (/= "section") classes')
@@ -1213,6 +1211,10 @@ inlineToHtml opts inline = do
     LineBreak      -> return $ do
                         if html5 then H5.br else H.br
                         strToHtml "\n"
+    (Span ("",[cls],[]) ils)
+        | cls == "csl-block" || cls == "csl-left-margin" ||
+          cls == "csl-right-inline" || cls == "csl-indent"
+        -> inlineListToHtml opts ils >>= inDiv cls
 
     (Span (id',classes,kvs) ils) ->
                         let spanLikeTag = case classes of
@@ -1462,22 +1464,12 @@ blockListToNote opts ref blocks = do
                        _          -> noteItem
   return $ nl opts >> noteItem'
 
-cslEntryToHtml :: PandocMonad m
-               => WriterOptions
-               -> Block
-               -> StateT WriterState m Html
-cslEntryToHtml opts (Para xs) = do
+inDiv :: PandocMonad m=> Text -> Html -> StateT WriterState m Html
+inDiv cls x = do
   html5 <- gets stHtml5
-  let inDiv :: Text -> Html -> Html
-      inDiv cls x = (if html5 then H5.div else H.div)
-                      x ! A.class_ (toValue cls)
-  let go (Span ("",[cls],[]) ils)
-        | cls == "csl-block" || cls == "csl-left-margin" ||
-          cls == "csl-right-inline" || cls == "csl-indent"
-        = inDiv cls <$> inlineListToHtml opts ils
-      go il = inlineToHtml opts il
-  mconcat <$> mapM go xs
-cslEntryToHtml opts x = blockToHtml opts x
+  return $
+    (if html5 then H5.div else H.div)
+                x ! A.class_ (toValue cls)
 
 isMathEnvironment :: Text -> Bool
 isMathEnvironment s = "\\begin{" `T.isPrefixOf` s &&
-- 
cgit v1.2.3


From 68bcddeb2136242aabee716420ecad251fc38920 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 4 Dec 2020 10:14:19 -0800
Subject: LaTeX writer: Fix bug with nested csl- display Spans.

See #6921.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 68 +++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d665269ab..e02cc2833 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -553,9 +553,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
                              Just s  -> braces (literal s))
                           $$ inner
                           $+$ "\\end{CSLReferences}"
-               else if "csl-entry" `elem` classes
-                       then vcat <$> mapM cslEntryToLaTeX bs
-                       else blockListToLaTeX bs
+               else blockListToLaTeX bs
   modify $ \st -> st{ stIncremental = oldIncremental }
   linkAnchor' <- hypertarget True identifier empty
   -- see #2704 for the motivation for adding \leavevmode:
@@ -1180,23 +1178,6 @@ isQuoted :: Inline -> Bool
 isQuoted (Quoted _ _) = True
 isQuoted _            = False
 
-cslEntryToLaTeX :: PandocMonad m
-                => Block
-                -> LW m (Doc Text)
-cslEntryToLaTeX (Para xs) =
-  mconcat <$> mapM go xs
- where
-   go (Span ("",["csl-block"],[]) ils) =
-     (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-left-margin"],[]) ils) =
-     inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-right-inline"],[]) ils) =
-     (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils
-   go (Span ("",["csl-indent"],[]) ils) =
-     (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils
-   go il = inlineToLaTeX il
-cslEntryToLaTeX x = blockToLaTeX x
-
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: PandocMonad m
               => Inline    -- ^ Inline to convert
@@ -1204,23 +1185,38 @@ inlineToLaTeX :: PandocMonad m
 inlineToLaTeX (Span (id',classes,kvs) ils) = do
   linkAnchor <- hypertarget False id' empty
   lang <- toLang $ lookup "lang" kvs
-  let cmds = ["textup" | "csl-no-emph" `elem` classes] ++
-             ["textnormal" | "csl-no-strong" `elem` classes ||
-                             "csl-no-smallcaps" `elem` classes] ++
-             ["RL" | ("dir", "rtl") `elem` kvs] ++
-             ["LR" | ("dir", "ltr") `elem` kvs] ++
-             (case lang of
-                Just lng -> let (l, o) = toPolyglossia lng
-                                ops = if T.null o then "" else "[" <> o <> "]"
-                            in  ["text" <> l <> ops]
-                Nothing  -> [])
+  let classToCmd "csl-no-emph" = Just "textup"
+      classToCmd "csl-no-strong" = Just "textnormal"
+      classToCmd "csl-no-smallcaps" = Just "textnormal"
+      classToCmd "csl-block" = Just "CSLBlock"
+      classToCmd "csl-left-margin" = Just "CSLLeftMargin"
+      classToCmd "csl-right-inline" = Just "CSLRightInline"
+      classToCmd "csl-indent" = Just "CSLIndent"
+      classToCmd _ = Nothing
+      kvToCmd ("dir","rtl") = Just "RL"
+      kvToCmd ("dir","ltr") = Just "LR"
+      kvToCmd _ = Nothing
+      langCmds =
+        case lang of
+           Just lng -> let (l, o) = toPolyglossia lng
+                           ops = if T.null o then "" else "[" <> o <> "]"
+                       in  ["text" <> l <> ops]
+           Nothing  -> []
+  let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds
   contents <- inlineListToLaTeX ils
-  return $ (if T.null id'
-               then empty
-               else "\\protect" <> linkAnchor) <>
-           (if null cmds
-               then braces contents
-               else foldr inCmd contents cmds)
+  return $
+    (case classes of
+              ["csl-block"] -> (cr <>)
+              ["csl-left-margin"] -> (cr <>)
+              ["csl-right-inline"] -> (cr <>)
+              ["csl-indent"] -> (cr <>)
+              _ -> id) $
+    (if T.null id'
+        then empty
+        else "\\protect" <> linkAnchor) <>
+    (if null cmds
+        then braces contents
+        else foldr inCmd contents cmds)
 inlineToLaTeX (Emph lst) = inCmd "emph" <$> inlineListToLaTeX lst
 inlineToLaTeX (Underline lst) = inCmd "underline" <$> inlineListToLaTeX lst
 inlineToLaTeX (Strong lst) = inCmd "textbf" <$> inlineListToLaTeX lst
-- 
cgit v1.2.3


From dc3ef5201f9531bc405ac07e763d9f004bb6bc91 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 4 Dec 2020 10:55:48 -0800
Subject: Markdown writer: ensure that a new csl-block begins on a new line.

This just looks better and doesn't affect the semantics.
See #6921.
---
 src/Text/Pandoc/Writers/Markdown.hs | 7 ++++++-
 test/command/pandoc-citeproc-53.md  | 6 ++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 6aec6b244..5eb47b261 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1058,7 +1058,12 @@ inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) =
 inlineToMarkdown opts (Span attrs ils) = do
   variant <- asks envVariant
   contents <- inlineListToMarkdown opts ils
-  return $ case variant of
+  return $ case attrs of
+             (_,["csl-block"],_) -> (cr <>)
+             (_,["csl-left-margin"],_) -> (cr <>)
+             (_,["csl-indent"],_) -> (cr <>)
+             _ -> id
+         $ case variant of
                 PlainText -> contents
                 _     | attrs == nullAttr -> contents
                       | isEnabled Ext_bracketed_spans opts ->
diff --git a/test/command/pandoc-citeproc-53.md b/test/command/pandoc-citeproc-53.md
index 295f52049..fb8d5c35e 100644
--- a/test/command/pandoc-citeproc-53.md
+++ b/test/command/pandoc-citeproc-53.md
@@ -28,11 +28,13 @@ Doe[^1] Doe[^2] Roe[^3] Roe[^4] Doe[^5] Doe[^6] Roe[^7] Roe[^8]
 
 ::: {#refs .references .csl-bib-body}
 ::: {#ref-a .csl-entry}
-[[Doe J.]{.smallcaps} ]{.csl-block}[2000, *Work A*,.]{.csl-left-margin}
+[[Doe J.]{.smallcaps} ]{.csl-block}
+[2000, *Work A*,.]{.csl-left-margin}
 :::
 
 ::: {#ref-b .csl-entry}
-[[Roe J.]{.smallcaps} ]{.csl-block}[1990, *Work B*,.]{.csl-left-margin}
+[[Roe J.]{.smallcaps} ]{.csl-block}
+[1990, *Work B*,.]{.csl-left-margin}
 :::
 :::
 
-- 
cgit v1.2.3


From 6f35600204132c5a0907bb7cccb2e10ffb27506a Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Dec 2020 06:00:21 +0100
Subject: Docbook writer: add XML namespaces to top-level elements (#6923)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, we only added xmlns attributes to chapter elements,
even when running with --top-level-division=section.
Let’s add the namespaces to part and section elements too,
when they are the selected top-level divisions.

We do not need to add namespaces to documents produced with
--standalone flag, since those will already have xmlns attribute
on the root element in the template.
---
 src/Text/Pandoc/Writers/Docbook.hs | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 408d8cc0c..3f4c67f10 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -15,6 +15,7 @@ Conversion of 'Pandoc' documents to Docbook XML.
 module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where
 import Control.Monad.Reader
 import Data.Generics (everywhere, mkT)
+import Data.Maybe (isNothing)
 import Data.Monoid (Any (..))
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -40,6 +41,18 @@ data DocBookVersion = DocBook4 | DocBook5
 
 type DB = ReaderT DocBookVersion
 
+-- | Get level of the top-level headers based on the configured top-level division.
+-- The header level can then be used to determine appropriate DocBook element
+-- for each subdivision associated with a header.
+-- The numbering here follows LaTeX's internal numbering
+getStartLvl :: WriterOptions -> Int
+getStartLvl opts =
+  case writerTopLevelDivision opts of
+       TopLevelPart    -> -1
+       TopLevelChapter -> 0
+       TopLevelSection -> 1
+       TopLevelDefault -> 1
+
 -- | Convert list of authors to a docbook <author> section
 authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
 authorToDocbook opts name' = do
@@ -79,12 +92,7 @@ writeDocbook opts (Pandoc meta blocks) = do
   let colwidth = if writerWrapText opts == WrapAuto
                     then Just $ writerColumns opts
                     else Nothing
-  -- The numbering here follows LaTeX's internal numbering
-  let startLvl = case writerTopLevelDivision opts of
-                   TopLevelPart    -> -1
-                   TopLevelChapter -> 0
-                   TopLevelSection -> 1
-                   TopLevelDefault -> 1
+  let startLvl = getStartLvl opts
   let fromBlocks = blocksToDocbook opts .
                    makeSections False (Just startLvl)
   auths' <- mapM (authorToDocbook opts) $ docAuthors meta
@@ -170,8 +178,12 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
                  then "xml:id"
                  else "id"
       idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')]
-      nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
-                                      else []
+      -- We want to add namespaces to the root (top-level) element.
+      nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts)
+      -- Though, DocBook 4 does not support namespaces and
+      -- standalone documents will include them in the template.
+                 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
+                 else []
       attribs = nsAttr <> idAttr
   title' <- inlinesToDocbook opts ils
   contents <- blocksToDocbook opts bs
-- 
cgit v1.2.3


From ddb76cb356a82f6a9e51a6f3626dd154816e9205 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 5 Dec 2020 09:53:39 -0800
Subject: LaTeX reader: don't apply theorem default styling to a figure inside.

If we put an image in italics, then when rendering to Markdown
we no longer get an implicit figure.

Closes #6925.
---
 src/Text/Pandoc/Readers/LaTeX.hs |  1 +
 test/command/6925.md             | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 test/command/6925.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 15a1a19fc..afe960454 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1812,6 +1812,7 @@ theoremEnvironment name = do
                   _          -> bs
 
 italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
 italicize (Para ils) = Para [Emph ils]
 italicize (Plain ils) = Plain [Emph ils]
 italicize x = x
diff --git a/test/command/6925.md b/test/command/6925.md
new file mode 100644
index 000000000..458a0b91d
--- /dev/null
+++ b/test/command/6925.md
@@ -0,0 +1,34 @@
+```
+% pandoc -f latex -t markdown
+\documentclass{amsart}
+\newtheorem{thm}{Theorem}[section]
+\theoremstyle{definition}
+\newtheorem{thm2}[section]{Theorem}
+\begin{document}
+\begin{thm}
+a
+\begin{figure}
+\includegraphics[]{1.png}
+\end{figure}
+\end{thm}
+
+\begin{thm2}
+a
+\begin{figure}
+\includegraphics[]{1.png}
+\end{figure}
+\end{thm2}
+\end{document}
+^D
+::: {.thm}
+**Theorem 1**. *a*
+
+![image](1.png)
+:::
+
+::: {.thm2}
+**Theorem 1**. a
+
+![image](1.png)
+:::
+```
-- 
cgit v1.2.3


From c161893f442a3e001b64af1421e9f62376d71c92 Mon Sep 17 00:00:00 2001
From: Nils Carlson <nils@nilscarlson.se>
Date: Sat, 5 Dec 2020 18:00:04 +0000
Subject: OpenDocument writer: Allow references for internal links (#6774)

This commit adds two extensions to the OpenDocument writer,
`xrefs_name` and `xrefs_number`.

Links to headings, figures and tables inside the document are
substituted with cross-references that will use the name or caption
of the referenced item for `xrefs_name` or the number for `xrefs_number`.

For the `xrefs_number` to be useful heading numbers must be enabled
in the generated document and table and figure captions must be enabled using for example the `native_numbering` extension.

In order for numbers and reference text to be updated the generated
document must be refreshed.

Co-authored-by: Nils Carlson <nils.carlson@ludd.ltu.se>
---
 MANUAL.txt                              | 37 ++++++++++++++
 src/Text/Pandoc/Extensions.hs           |  4 ++
 src/Text/Pandoc/Writers/OpenDocument.hs | 91 ++++++++++++++++++++++++++-------
 test/command/6774.md                    | 63 +++++++++++++++++++++++
 4 files changed, 177 insertions(+), 18 deletions(-)
 create mode 100644 test/command/6774.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index fad885fed..4e1615ff2 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3027,6 +3027,43 @@ This extension can be enabled/disabled for the following formats:
 output formats
 :  `odt`, `opendocument`
 
+#### Extension: `xrefs_name` ####
+
+Links to headings, figures and tables inside the document are
+substituted with cross-references that will use the name or caption
+of the referenced item. The original link text is replaced once
+the generated document is refreshed. This extension can be combined
+with `xrefs_number` in which case numbers will appear before the
+name.
+
+Text in cross-references is only made consistent with the referenced
+item once the document has been refreshed.
+
+This extension can be enabled/disabled for the following formats:
+
+output formats
+:  `odt`, `opendocument`
+
+#### Extension: `xrefs_number` ####
+
+Links to headings, figures and tables inside the document are
+substituted with cross-references that will use the number
+of the referenced item. The original link text is discarded.
+This extension can be combined with `xrefs_name` in which case
+the name or caption numbers will appear after the number.
+
+For the `xrefs_number` to be useful heading numbers must be enabled
+in the generated document, also table and figure captions must be enabled
+using for example the `native_numbering` extension.
+
+Numbers in cross-references are only visible in the final document once
+it has been refreshed.
+
+This extension can be enabled/disabled for the following formats:
+
+output formats
+:  `odt`, `opendocument`
+
 #### Extension: `styles` #### {#ext-styles}
 
 When converting from docx, read all docx styles as divs (for
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 646f7abfb..a94e24e2c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -149,6 +149,8 @@ data Extension =
     | Ext_tex_math_dollars    -- ^ TeX math between $..$ or $$..$$
     | Ext_tex_math_double_backslash  -- ^ TeX math btw \\(..\\) \\[..\\]
     | Ext_tex_math_single_backslash  -- ^ TeX math btw \(..\) \[..\]
+    | Ext_xrefs_name          -- ^ Use xrefs with names
+    | Ext_xrefs_number        -- ^ Use xrefs with numbers
     | Ext_yaml_metadata_block -- ^ YAML metadata block
     | Ext_gutenberg           -- ^ Use Project Gutenberg conventions for plain
     | Ext_attributes          -- ^ Generic attribute syntax
@@ -465,6 +467,8 @@ getAllExtensions f = universalExtensions <> getAll f
   getAll "opendocument"    = extensionsFromList
     [ Ext_empty_paragraphs
     , Ext_native_numbering
+    , Ext_xrefs_name
+    , Ext_xrefs_number
     ]
   getAll "odt"             = getAll "opendocument" <> autoIdExtensions
   getAll "muse"            = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 071a5542f..cf42f2228 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
 import Control.Arrow ((***), (>>>))
 import Control.Monad.State.Strict hiding (when)
 import Data.Char (chr)
+import Data.Foldable (find)
 import Data.List (sortOn, sortBy, foldl')
 import qualified Data.Map as Map
 import Data.Maybe (fromMaybe, isNothing)
@@ -35,6 +36,7 @@ import Text.DocLayout
 import Text.Pandoc.Shared (linesToPara, tshow, blocksToInlines)
 import Text.Pandoc.Templates (renderTemplate)
 import qualified Text.Pandoc.Translations as Term (Term(Figure, Table))
+import Text.Pandoc.Walk
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
@@ -54,6 +56,11 @@ plainToPara x         = x
 
 type OD m = StateT WriterState m
 
+data ReferenceType
+  = HeaderRef
+  | TableRef
+  | ImageRef
+
 data WriterState =
     WriterState { stNotes          :: [Doc Text]
                 , stTableStyles    :: [Doc Text]
@@ -69,6 +76,7 @@ data WriterState =
                 , stImageId        :: Int
                 , stTableCaptionId :: Int
                 , stImageCaptionId :: Int
+                , stIdentTypes     :: [(Text,ReferenceType)]
                 }
 
 defaultWriterState :: WriterState
@@ -86,6 +94,7 @@ defaultWriterState =
                 , stImageId        = 1
                 , stTableCaptionId = 1
                 , stImageCaptionId = 1
+                , stIdentTypes     = []
                 }
 
 when :: Bool -> Doc Text -> Doc Text
@@ -243,6 +252,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do
                         meta
   ((body, metadata),s) <- flip runStateT
         defaultWriterState $ do
+           let collectInlineIdent (Image (ident,_,_) _ _) = [(ident,ImageRef)]
+               collectInlineIdent _                       = []
+           let collectBlockIdent (Header _ (ident,_,_) _)      = [(ident,HeaderRef)]
+               collectBlockIdent (Table (ident,_,_) _ _ _ _ _) = [(ident,TableRef)]
+               collectBlockIdent _                             = []
+           modify $ \s -> s{ stIdentTypes = query collectBlockIdent blocks ++ query collectInlineIdent blocks }
            m <- metaToContext opts
                   (blocksToOpenDocument opts)
                   (fmap chomp . inlinesToOpenDocument opts)
@@ -411,7 +426,7 @@ blockToOpenDocument o bs
                            inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)]
                                       <$> orderedListToOpenDocument o pn b
       table :: PandocMonad m => Ann.Table -> OD m (Doc Text)
-      table (Ann.Table _ (Caption _ c) colspecs thead tbodies _) = do
+      table (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies _) = do
         tn <- length <$> gets stTableStyles
         pn <- length <$> gets stParaStyles
         let  genIds      = map chr [65..]
@@ -433,7 +448,7 @@ blockToOpenDocument o bs
                       then return empty
                       else inlinesToOpenDocument o (blocksToInlines c) >>=
                              if isEnabled Ext_native_numbering o
-                                then numberedTableCaption
+                                then numberedTableCaption ident
                                 else unNumberedCaption "TableCaption"
         th <- colHeadsToOpenDocument o (map fst paraHStyles) thead
         tr <- mapM (tableBodyToOpenDocument o (map fst paraStyles)) tbodies
@@ -442,36 +457,39 @@ blockToOpenDocument o bs
                           , ("table:style-name", name)
                           ] (vcat columns $$ th $$ vcat tr)
         return $ captionDoc $$ tableDoc
-      figure attr caption source title | null caption =
+      figure attr@(ident, _, _) caption source title | null caption =
         withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]]
                                   | otherwise    = do
         imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]]
         captionDoc <- inlinesToOpenDocument o caption >>=
                          if isEnabled Ext_native_numbering o
-                            then numberedFigureCaption
+                            then numberedFigureCaption ident
                             else unNumberedCaption "FigureCaption"
         return $ imageDoc $$ captionDoc
 
 
-numberedTableCaption :: PandocMonad m => Doc Text -> OD m (Doc Text)
-numberedTableCaption caption = do
+numberedTableCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text)
+numberedTableCaption ident caption = do
     id' <- gets stTableCaptionId
     modify (\st -> st{ stTableCaptionId = id' + 1 })
     capterm <- translateTerm Term.Table
-    return $ numberedCaption "TableCaption" capterm "Table" id' caption
+    return $ numberedCaption "TableCaption" capterm "Table" id' ident caption
 
-numberedFigureCaption :: PandocMonad m => Doc Text -> OD m (Doc Text)
-numberedFigureCaption caption = do
+numberedFigureCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text)
+numberedFigureCaption ident caption = do
     id' <- gets stImageCaptionId
     modify (\st -> st{ stImageCaptionId = id' + 1 })
     capterm <- translateTerm Term.Figure
-    return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption
+    return $ numberedCaption "FigureCaption" capterm  "Illustration" id' ident caption
 
-numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text
-numberedCaption style term name num caption =
+numberedCaption :: Text -> Text -> Text -> Int -> Text -> Doc Text -> Doc Text
+numberedCaption style term name num ident caption =
     let t = text $ T.unpack term
         r = num - 1
-        s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r),
+        ident' = case ident of
+          "" -> "ref" <> name <> tshow r
+          _ -> ident
+        s = inTags False "text:sequence" [ ("text:ref-name", ident'),
                                            ("text:name", name),
                                            ("text:formula", "ooow:" <> name <> "+1"),
                                            ("style:num-format", "1") ] $ text $ show num
@@ -607,7 +625,9 @@ inlineToOpenDocument o ils
                        else do
                          report $ InlineNotRendered ils
                          return empty
-    Link _ l (s,t) ->  mkLink s t <$> inlinesToOpenDocument o l
+    Link _ l (s,t) -> do
+      identTypes <- gets stIdentTypes
+      mkLink o identTypes s t <$> inlinesToOpenDocument o l
     Image attr _ (s,t) -> mkImg attr s t
     Note        l  -> mkNote l
     where
@@ -619,10 +639,6 @@ inlineToOpenDocument o ils
       unhighlighted s = inlinedCode $ preformatted s
       preformatted s = handleSpaces $ escapeStringForXML s
       inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s
-      mkLink   s t = inTags False "text:a" [ ("xlink:type" , "simple")
-                                           , ("xlink:href" , s       )
-                                           , ("office:name", t       )
-                                           ] . inSpanTags "Definition"
       mkImg (_, _, kvs) s _ = do
                id' <- gets stImageId
                modify (\st -> st{ stImageId = id' + 1 })
@@ -659,6 +675,45 @@ inlineToOpenDocument o ils
         addNote nn
         return nn
 
+mkLink :: WriterOptions -> [(Text,ReferenceType)] -> Text -> Text -> Doc Text -> Doc Text
+mkLink o identTypes s t d =
+  let maybeIdentAndType = case T.uncons s of
+                            Just ('#', ident) -> find ((ident ==) . fst) identTypes
+                            _                 -> Nothing
+      d' = inSpanTags "Definition" d
+      ref refType format ident       = inTags False refType
+                                       [ ("text:reference-format", format ),
+                                         ("text:ref-name", ident) ]
+      inlineSpace                    = selfClosingTag "text:s" []
+      bookmarkRef                    = ref "text:bookmark-ref"
+      bookmarkRefNumber ident        = bookmarkRef "number" ident mempty
+      bookmarkRefName ident          = bookmarkRef "text" ident d
+      bookmarkRefNameNumber ident    = bookmarkRefNumber ident <> inlineSpace <> bookmarkRefName ident
+      bookmarkRef'
+        | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = bookmarkRefNameNumber
+        | isEnabled Ext_xrefs_name o                                 = bookmarkRefName
+        | otherwise                                                  = bookmarkRefNumber
+      sequenceRef                    = ref "text:sequence-ref"
+      sequenceRefNumber ident        = sequenceRef "value" ident mempty
+      sequenceRefName ident          = sequenceRef "caption" ident d
+      sequenceRefNameNumber ident    = sequenceRefNumber ident <> inlineSpace <> sequenceRefName ident
+      sequenceRef'
+        | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = sequenceRefNameNumber
+        | isEnabled Ext_xrefs_name o                                 = sequenceRefName
+        | otherwise                                                  = sequenceRefNumber
+      link = inTags False "text:a" [ ("xlink:type" , "simple")
+                                          , ("xlink:href" , s       )
+                                          , ("office:name", t       )
+                                          ] d'
+      linkOrReference = case maybeIdentAndType of
+                          Just (ident, HeaderRef) -> bookmarkRef' ident
+                          Just (ident, TableRef)  -> sequenceRef' ident
+                          Just (ident, ImageRef)  -> sequenceRef' ident
+                          _                       -> link
+      in if isEnabled Ext_xrefs_name o || isEnabled Ext_xrefs_number o
+            then linkOrReference
+            else link
+
 bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text]))
 bulletListStyle l = do
   let doStyles  i = inTags True "text:list-level-style-bullet"
diff --git a/test/command/6774.md b/test/command/6774.md
new file mode 100644
index 000000000..66549c0f2
--- /dev/null
+++ b/test/command/6774.md
@@ -0,0 +1,63 @@
+```
+% pandoc -f native -t opendocument --quiet
+[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"]
+,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]]
+^D
+<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The
+Chapter<text:bookmark-end text:name="chapter1" /></text:h>
+<text:p text:style-name="First_20_paragraph">Chapter 1 references
+<text:a xlink:type="simple" xlink:href="#chapter1" office:name=""><text:span text:style-name="Definition">The
+Chapter</text:span></text:a></text:p>
+```
+```
+% pandoc -f native -t opendocument+xrefs_name --quiet
+[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"]
+,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]
+,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")]
+,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]]
+^D
+<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The
+Chapter<text:bookmark-end text:name="chapter1" /></text:h>
+<text:p text:style-name="First_20_paragraph">Chapter 1 references
+<text:bookmark-ref text:reference-format="text" text:ref-name="chapter1">The
+Chapter</text:bookmark-ref></text:p>
+<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p>
+<text:p text:style-name="FigureCaption">lalune</text:p>
+<text:p text:style-name="Text_20_body">Image 1 references
+<text:sequence-ref text:reference-format="caption" text:ref-name="lalune">La
+Lune</text:sequence-ref></text:p>
+```
+```
+% pandoc -f native -t opendocument+xrefs_number --quiet
+[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"]
+,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]
+,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")]
+,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]]
+^D
+<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The
+Chapter<text:bookmark-end text:name="chapter1" /></text:h>
+<text:p text:style-name="First_20_paragraph">Chapter 1 references
+<text:bookmark-ref text:reference-format="number" text:ref-name="chapter1"></text:bookmark-ref></text:p>
+<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p>
+<text:p text:style-name="FigureCaption">lalune</text:p>
+<text:p text:style-name="Text_20_body">Image 1 references
+<text:sequence-ref text:reference-format="value" text:ref-name="lalune"></text:sequence-ref></text:p>
+```
+```
+% pandoc -f native -t opendocument+xrefs_number+xrefs_name --quiet
+[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"]
+,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]
+,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")]
+,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]]
+^D
+<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The
+Chapter<text:bookmark-end text:name="chapter1" /></text:h>
+<text:p text:style-name="First_20_paragraph">Chapter 1 references
+<text:bookmark-ref text:reference-format="number" text:ref-name="chapter1"></text:bookmark-ref><text:s /><text:bookmark-ref text:reference-format="text" text:ref-name="chapter1">The
+Chapter</text:bookmark-ref></text:p>
+<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p>
+<text:p text:style-name="FigureCaption">lalune</text:p>
+<text:p text:style-name="Text_20_body">Image 1 references
+<text:sequence-ref text:reference-format="value" text:ref-name="lalune"></text:sequence-ref><text:s /><text:sequence-ref text:reference-format="caption" text:ref-name="lalune">La
+Lune</text:sequence-ref></text:p>
+```
-- 
cgit v1.2.3


From acf932825bfe40d9a18046c9d304f4f14363a88a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 5 Dec 2020 22:05:37 +0100
Subject: Org reader: preserve targets of spurious links

Links with (internal) targets that the reader doesn't know about are
converted into emphasized text. Information on the link target is now
preserved by wrapping the text in a Span of class `spurious-link`, with
an attribute `target` set to the link's original target. This allows to
recover and fix broken or unknown links with filters.

See: #6916
---
 src/Text/Pandoc/Readers/Org/Inlines.hs | 9 ++++-----
 test/Tests/Readers/Org/Meta.hs         | 6 ++++--
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b234bee58..0330cf55f 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -477,17 +477,17 @@ linkToInlinesF linkStr =
 
 internalLink :: Text -> Inlines -> F Inlines
 internalLink link title = do
-  anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
-  if anchorB
+  ids <- asksF orgStateAnchorIds
+  if link `elem` ids
     then return $ B.link ("#" <> link) "" title
-    else return $ B.emph title
+    else let attr' = ("", ["spurious-link"] , [("target", link)])
+         in return $ B.spanWith attr' (B.emph title)
 
 -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
 -- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
 -- @org-target-regexp@, which is fairly liberal.  Since no link is created if
 -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
 -- an anchor.
-
 anchor :: PandocMonad m => OrgParser m (F Inlines)
 anchor =  try $ do
   anchorId <- parseAnchor
@@ -501,7 +501,6 @@ anchor =  try $ do
 
 -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'.  This mirrors
 -- the org function @org-export-solidify-link-text@.
-
 solidify :: Text -> Text
 solidify = T.map replaceSpecialChar
  where replaceSpecialChar c
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index 041016f64..bc167f2a5 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -270,7 +270,8 @@ tests =
 
   , "Search links are read as emph" =:
       "[[Wally][Where's Wally?]]" =?>
-      para (emph $ "Where's" <> space <> "Wally?")
+      para (spanWith ("", ["spurious-link"], [("target", "Wally")])
+                     (emph $ "Where's" <> space <> "Wally?"))
 
   , "Link to nonexistent anchor" =:
       T.unlines [ "<<link-here>> Target."
@@ -278,5 +279,6 @@ tests =
                 , "[[link$here][See here!]]"
                 ] =?>
       (para (spanWith ("link-here", [], []) mempty <> "Target.") <>
-       para (emph ("See" <> space <> "here!")))
+       para (spanWith ("", ["spurious-link"], [("target", "link$here")])
+                      (emph ("See" <> space <> "here!"))))
   ]
-- 
cgit v1.2.3


From dc6856530c2cb6ca58ed82721ab895b86cfe0c1c Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Fri, 4 Dec 2020 09:16:56 +0100
Subject: Docbook writer: handle admonitions

Similarly to https://github.com/jgm/pandoc/commit/d6fdfe6f2bba2a8ed25d6c9f11861774001f7a91,
we should handle admonitions.
---
 src/Text/Pandoc/Writers/Docbook.hs | 42 +++++++++++++++++++++---------
 test/Tests/Writers/Docbook.hs      | 52 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 3f4c67f10..da111cbc5 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -188,18 +188,36 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
   title' <- inlinesToDocbook opts ils
   contents <- blocksToDocbook opts bs
   return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents
-blockToDocbook opts (Div (ident,_,_) [Para lst]) =
-  let attribs = [("id", ident) | not (T.null ident)] in
-  if hasLineBreaks lst
-     then flush . nowrap . inTags False "literallayout" attribs
-                         <$> inlinesToDocbook opts lst
-     else inTags True "para" attribs <$> inlinesToDocbook opts lst
-blockToDocbook opts (Div (ident,_,_) bs) = do
-  contents <- blocksToDocbook opts (map plainToPara bs)
-  return $
-    (if T.null ident
-        then mempty
-        else selfClosingTag "anchor" [("id", ident)]) $$ contents
+blockToDocbook opts (Div (ident,classes,_) bs) =
+  let identAttribs = [("id", ident) | not (T.null ident)]
+      admonitions = ["attention","caution","danger","error","hint",
+                     "important","note","tip","warning"]
+  in case classes of
+    (l:_) | l `elem` admonitions -> do
+        let (mTitleBs, bodyBs) =
+                case bs of
+                  -- Matches AST produced by the Docbook reader.
+                  (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest)
+                  _ -> (Nothing, bs)
+        admonitionTitle <- case mTitleBs of
+                              Nothing -> return mempty
+                              -- id will be attached to the admonition so let’s pass empty identAttrs.
+                              Just titleBs -> inTags False "title" [] <$> titleBs
+        admonitionBody <- handleDivBody [] bodyBs
+        return (inTags True l identAttribs (admonitionTitle $$ admonitionBody))
+    _ -> handleDivBody identAttribs bs
+  where
+    handleDivBody identAttribs [Para lst] =
+      if hasLineBreaks lst
+         then flush . nowrap . inTags False "literallayout" identAttribs
+                             <$> inlinesToDocbook opts lst
+         else inTags True "para" identAttribs <$> inlinesToDocbook opts lst
+    handleDivBody identAttribs bodyBs = do
+      contents <- blocksToDocbook opts (map plainToPara bodyBs)
+      return $
+        (if null identAttribs
+            then mempty
+            else selfClosingTag "anchor" identAttribs) $$ contents
 blockToDocbook _ h@Header{} = do
   -- should be handled by Div section above, except inside lists/blockquotes
   report $ BlockNotRendered h
diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs
index f6a047b0b..1d53dcfe7 100644
--- a/test/Tests/Writers/Docbook.hs
+++ b/test/Tests/Writers/Docbook.hs
@@ -70,6 +70,58 @@ tests = [ testGroup "line blocks"
                                       , "</para>" ]
                                     )
           ]
+        , testGroup "divs"
+          [ "admonition" =: divWith ("foo", ["warning"], []) (para "This is a test")
+                              =?> unlines
+                                    [ "<warning id=\"foo\">"
+                                    , "  <para>"
+                                    , "    This is a test"
+                                    , "  </para>"
+                                    , "</warning>"
+                                    ]
+          , "admonition-with-title" =:
+                            divWith ("foo", ["attention"], []) (
+                              divWith ("foo", ["title"], [])
+                                (plain (text "This is title")) <>
+                              para "This is a test"
+                            )
+                              =?> unlines
+                                    [ "<attention id=\"foo\">"
+                                    , "  <title>This is title</title>"
+                                    , "  <para>"
+                                    , "    This is a test"
+                                    , "  </para>"
+                                    , "</attention>"
+                                    ]
+          , "single-child" =:
+                            divWith ("foo", [], []) (para "This is a test")
+                              =?> unlines
+                                    [ "<para id=\"foo\">"
+                                    , "  This is a test"
+                                    , "</para>"
+                                    ]
+          , "single-literal-child" =:
+                            divWith ("foo", [], []) lineblock
+                              =?> unlines
+                                    [ "<literallayout id=\"foo\">some text"
+                                    , "and more lines"
+                                    , "and again</literallayout>"
+                                    ]
+          , "multiple-children" =:
+                            divWith ("foo", [], []) (
+                              para "This is a test" <>
+                              para "This is an another test"
+                            )
+                              =?> unlines
+                                    [ "<anchor id=\"foo\" />"
+                                    , "<para>"
+                                    , "  This is a test"
+                                    , "</para>"
+                                    , "<para>"
+                                    , "  This is an another test"
+                                    , "</para>"
+                                    ]
+          ]
         , testGroup "compact lists"
           [ testGroup "bullet"
             [ "compact"    =: bulletList [plain "a", plain "b", plain "c"]
-- 
cgit v1.2.3


From 16ef87745702f69d5aa948fbe6d2101577dee8f4 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Fri, 4 Dec 2020 09:28:32 +0100
Subject: Docbook writer: Use correct id attribute consistently
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DocBook5 should always use xml:id instead of id so let’s use it everywhere.
---
 src/Text/Pandoc/Writers/Docbook.hs | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index da111cbc5..398920839 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -53,6 +53,13 @@ getStartLvl opts =
        TopLevelSection -> 1
        TopLevelDefault -> 1
 
+-- | Get correct name for the id attribute based on DocBook version.
+-- DocBook 4 used custom id attribute but DocBook 5 adopted the xml:id specification.
+-- https://www.w3.org/TR/xml-id/
+idName :: DocBookVersion -> Text
+idName DocBook5 = "xml:id"
+idName DocBook4 = "id"
+
 -- | Convert list of authors to a docbook <author> section
 authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines
 authorToDocbook opts name' = do
@@ -174,10 +181,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
                                               then "section"
                                               else "sect" <> tshow n
                  _                    -> "simplesect"
-      idName = if version == DocBook5
-                 then "xml:id"
-                 else "id"
-      idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')]
+      idAttr = [(idName version, writerIdentifierPrefix opts <> id') | not (T.null id')]
       -- We want to add namespaces to the root (top-level) element.
       nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts)
       -- Though, DocBook 4 does not support namespaces and
@@ -188,11 +192,12 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
   title' <- inlinesToDocbook opts ils
   contents <- blocksToDocbook opts bs
   return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents
-blockToDocbook opts (Div (ident,classes,_) bs) =
-  let identAttribs = [("id", ident) | not (T.null ident)]
+blockToDocbook opts (Div (ident,classes,_) bs) = do
+  version <- ask
+  let identAttribs = [(idName version, ident) | not (T.null ident)]
       admonitions = ["attention","caution","danger","error","hint",
                      "important","note","tip","warning"]
-  in case classes of
+  case classes of
     (l:_) | l `elem` admonitions -> do
         let (mTitleBs, bodyBs) =
                 case bs of
@@ -371,11 +376,12 @@ inlineToDocbook opts (Quoted _ lst) =
   inTagsSimple "quote" <$> inlinesToDocbook opts lst
 inlineToDocbook opts (Cite _ lst) =
   inlinesToDocbook opts lst
-inlineToDocbook opts (Span (ident,_,_) ils) =
+inlineToDocbook opts (Span (ident,_,_) ils) = do
+  version <- ask
   ((if T.null ident
        then mempty
-       else selfClosingTag "anchor" [("id", ident)]) <>) <$>
-  inlinesToDocbook opts ils
+       else selfClosingTag "anchor" [(idName version, ident)]) <>) <$>
+    inlinesToDocbook opts ils
 inlineToDocbook _ (Code _ str) =
   return $ inTagsSimple "literal" $ literal (escapeStringForXML str)
 inlineToDocbook opts (Math t str)
-- 
cgit v1.2.3


From 70c7c5703afcbd1cbf2a80c2be515e038abcd419 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Mon, 7 Dec 2020 07:28:39 +0100
Subject: Docbook writer: Handle admonition titles from Markdown reader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Docbook reader produces a `Div` with `title` class for `<title>` element
within an “admonition” element. Markdown writer then turns this
into a fenced div with `title` class attribute. Since fenced divs
are block elements, their content is recognized as a paragraph
by the Markdown reader. This is an issue for Docbook writer because
it would produce an invalid DocBook document from such AST –
the `<title>` element can only contain “inline” elements.

Let’s handle this invalid special case separately by unwrapping
the paragraph before creating the `<title>` element.
---
 src/Text/Pandoc/Writers/Docbook.hs |  2 ++
 test/Tests/Writers/Docbook.hs      | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 398920839..affa0de04 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -201,6 +201,8 @@ blockToDocbook opts (Div (ident,classes,_) bs) = do
     (l:_) | l `elem` admonitions -> do
         let (mTitleBs, bodyBs) =
                 case bs of
+                  -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain.
+                  (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest)
                   -- Matches AST produced by the Docbook reader.
                   (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest)
                   _ -> (Nothing, bs)
diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs
index 1d53dcfe7..621c1280b 100644
--- a/test/Tests/Writers/Docbook.hs
+++ b/test/Tests/Writers/Docbook.hs
@@ -93,6 +93,20 @@ tests = [ testGroup "line blocks"
                                     , "  </para>"
                                     , "</attention>"
                                     ]
+          , "admonition-with-title-in-para" =:
+                            divWith ("foo", ["attention"], []) (
+                              divWith ("foo", ["title"], [])
+                                (para "This is title") <>
+                              para "This is a test"
+                            )
+                              =?> unlines
+                                    [ "<attention id=\"foo\">"
+                                    , "  <title>This is title</title>"
+                                    , "  <para>"
+                                    , "    This is a test"
+                                    , "  </para>"
+                                    , "</attention>"
+                                    ]
           , "single-child" =:
                             divWith ("foo", [], []) (para "This is a test")
                               =?> unlines
-- 
cgit v1.2.3


From 501ea7f0c4735acdf1457da44fe04d811ac776d7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 12:15:14 -0800
Subject: Dokuwiki reader:  handle unknown interwiki links better.

DokuWiki lets the user define his own Interwiki links.
Previously pandoc reacted to these by emitting a
google search link, which is not helpful. Instead,
we now just emit the full URL including the
wikilink prefix, e.g. `faquk>FAQ-mathml`.
This at least gives users the ability to
modify the links using filters.

Closes #6932.
---
 src/Text/Pandoc/Readers/DokuWiki.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 336be09e5..dedc1f03f 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -317,7 +317,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
 interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
 interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
-interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
+interwikiToUrl unknown page = unknown <> ">" <> page
 
 linkText :: PandocMonad m => DWParser m B.Inlines
 linkText = parseLink fromRaw "[[" "]]"
-- 
cgit v1.2.3


From f2749ba6cd0ec2473332394bfbb6f479b667f35c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 12:56:03 -0800
Subject: Parsing: in nonspaceChar use satisfy instead of oneOf.

For efficiency.
---
 src/Text/Pandoc/Parsing.hs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4bae8942b..1d9e182c5 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -443,7 +443,13 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
 nonspaceChar :: Stream s m Char => ParserT s st m Char
-nonspaceChar = noneOf ['\t', '\n', ' ', '\r']
+nonspaceChar = satisfy (not . isSpaceChar)
+ where
+  isSpaceChar ' '  = True
+  isSpaceChar '\t' = True
+  isSpaceChar '\n' = True
+  isSpaceChar '\r' = True
+  isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
 skipSpaces :: Stream s m Char => ParserT s st m ()
-- 
cgit v1.2.3


From 2f9b684b3a793896bc28a79a07722415cfdc075e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 13:01:30 -0800
Subject: Bibtex parser: avoid noneOf.

---
 src/Text/Pandoc/Citeproc/BibTeX.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 552339df0..ed723a11c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -601,7 +601,7 @@ inBraces :: BibParser Text
 inBraces = do
   char '{'
   res <- manyTill
-         (  (T.pack <$> many1 (noneOf "{}\\"))
+         (  take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
         <|> (char '\\' >> (  (char '{' >> return "\\{")
                          <|> (char '}' >> return "\\}")
                          <|> return "\\"))
@@ -616,7 +616,7 @@ inQuotes :: BibParser Text
 inQuotes = do
   char '"'
   T.concat <$> manyTill
-             (  (T.pack <$> many1 (noneOf "\"\\{"))
+             ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
                <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
                <|> braced <$> inBraces
             ) (char '"')
-- 
cgit v1.2.3


From ce1791913da713ec35f514006bb532cb9c9c8d22 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 13:24:19 -0800
Subject: Small efficiency improvement in uri parser

---
 src/Text/Pandoc/Parsing.hs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 1d9e182c5..1e8518c90 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -654,7 +654,20 @@ uri = try $ do
   let uri' = scheme <> ":" <> fromEntities str'
   return (uri', escapeURI uri')
   where
-    wordChar = alphaNum <|> oneOf "#$%+/@\\_-&="
+    isWordChar '#' = True
+    isWordChar '$' = True
+    isWordChar '%' = True
+    isWordChar '+' = True
+    isWordChar '/' = True
+    isWordChar '@' = True
+    isWordChar '\\' = True
+    isWordChar '_' = True
+    isWordChar '-' = True
+    isWordChar '&' = True
+    isWordChar '=' = True
+    isWordChar c   = isAlphaNum c
+
+    wordChar = satisfy isWordChar
     percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit
     entity = try $ pure <$> characterReference
     punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>'))
-- 
cgit v1.2.3


From 0fa1023b9ef0f39be013befa9ba3d61b81d95928 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 18:57:09 -0800
Subject: Parsing: More minor performance improvements.

---
 src/Text/Pandoc/Parsing.hs | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 1e8518c90..4ea0d788c 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -190,7 +190,7 @@ where
 
 import Control.Monad.Identity
 import Control.Monad.Reader
-import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper,
+import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
                   isPunctuation, isSpace, ord, toLower, toUpper)
 import Data.Default
 import Data.Functor (($>))
@@ -444,12 +444,13 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 -- | Parses a nonspace, nonnewline character.
 nonspaceChar :: Stream s m Char => ParserT s st m Char
 nonspaceChar = satisfy (not . isSpaceChar)
- where
-  isSpaceChar ' '  = True
-  isSpaceChar '\t' = True
-  isSpaceChar '\n' = True
-  isSpaceChar '\r' = True
-  isSpaceChar _    = False
+
+isSpaceChar :: Char -> Bool
+isSpaceChar ' '  = True
+isSpaceChar '\t' = True
+isSpaceChar '\n' = True
+isSpaceChar '\r' = True
+isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
 skipSpaces :: Stream s m Char => ParserT s st m ()
@@ -682,7 +683,9 @@ mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
-  words' <- many1Till (countChar 1 (noneOf " \t\n\\")
+  words' <- many1Till (
+                       (T.singleton <$>
+                          satisfy (\c -> not (isSpaceChar c || c == '\\')))
                    <|> (char '\\' >>
                            -- This next clause is needed because \text{..} can
                            -- contain $, \(\), etc.
@@ -840,13 +843,13 @@ defaultNum = do
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
 lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
-  ch <- oneOf ['a'..'z']
+  ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
 upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
-  ch <- oneOf ['A'..'Z']
+  ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-- 
cgit v1.2.3


From 5990cbb150f3581ab9b2f3d3a726c73dcadfd793 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 7 Dec 2020 21:34:23 -0800
Subject: Parsing: Small code improvements.

---
 src/Text/Pandoc/Parsing.hs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4ea0d788c..979344f63 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -641,7 +641,7 @@ uri = try $ do
   scheme <- uriScheme
   char ':'
   -- Avoid parsing e.g. "**Notes:**" as a raw URI:
-  notFollowedBy (oneOf "*_]")
+  notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']')
   -- We allow sentence punctuation except at the end, since
   -- we don't want the trailing '.' in 'http://google.com.' We want to allow
   -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
@@ -693,7 +693,7 @@ mathInlineWith op cl = try $ do
                                  (("\\text" <>) <$> inBalancedBraces 0 ""))
                             <|>  (\c -> T.pack ['\\',c]) <$> anyChar))
                    <|> do (blankline <* notFollowedBy' blankline) <|>
-                             (oneOf " \t" <* skipMany (oneOf " \t"))
+                             (spaceChar <* skipMany spaceChar)
                           notFollowedBy (char '$')
                           return " "
                     ) (try $ textStr cl)
@@ -723,7 +723,8 @@ mathInlineWith op cl = try $ do
 mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
-  many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl)
+  many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
+            (try $ textStr cl)
 
 mathDisplay :: (HasReaderOptions st, Stream s m Char)
             => ParserT s st m Text
-- 
cgit v1.2.3


From 8c9010864cd818031d7eff161a57459709751517 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 9 Dec 2020 21:05:40 -0800
Subject: Commonmark reader: refactor specFor, set input name to "".

---
 src/Text/Pandoc/Readers/CommonMark.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index c1773eaab..d32a38342 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
@@ -27,15 +28,20 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
+import Data.Typeable
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s = do
-  let res = runIdentity $
-              commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s
+  let res = runIdentity $ commonmarkWith (specFor opts) "" s
   case res of
     Left err -> throwError $ PandocParsecError s err
     Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+
+specFor :: (Monad m, Typeable m, Typeable a,
+            Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
+        => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks)
+specFor opts = foldr ($) defaultSyntaxSpec exts
  where
   exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++
          [ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++
-- 
cgit v1.2.3


From a3eb87b2eab9def3e28364b43300043f5e13268d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 9 Dec 2020 21:14:11 -0800
Subject: Add sourcepos extension for commonmarke

* Add `Ext_sourcepos` constructor for `Extension`.
* Add `sourcepos` extension (only for commonmark).
* Bump to 2.11.3

With the `sourcepos` extension set set, `data-pos` attributes are added
to the AST by the commonmark reader. No other readers are affected.  The
`data-pos` attributes are put on elements that accept attributes; for
other elements, an enlosing Div or Span is added to hold the attributes.

Closes #4565.
---
 MANUAL.txt                            |  7 +++++++
 pandoc.cabal                          |  2 +-
 src/Text/Pandoc/Extensions.hs         |  2 ++
 src/Text/Pandoc/Options.hs            |  1 +
 src/Text/Pandoc/Readers/CommonMark.hs | 14 +++++++++-----
 5 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 0a1300947..461ebf54d 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5127,6 +5127,13 @@ for regular emphasis, add extra blank space around headings.
 
   [Project Gutenberg]: https://www.gutenberg.org
 
+#### Extension: `sourcepos` ####
+
+Include source position attributes when parsing `commonmark`.
+For elements that accept attributes, a `data-pos` attribute
+is added; other elements are placed in a surrounding
+Div or Span elemnet with a `data-pos` attribute.
+
 ## Markdown variants
 
 In addition to pandoc's extended Markdown, the following Markdown
diff --git a/pandoc.cabal b/pandoc.cabal
index c02dfeb38..5829856da 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -1,6 +1,6 @@
 cabal-version:   2.2
 name:            pandoc
-version:         2.11.2
+version:         2.11.3
 build-type:      Simple
 license:         GPL-2.0-or-later
 license-file:    COPYING.md
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index a94e24e2c..9865f897b 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -154,6 +154,7 @@ data Extension =
     | Ext_yaml_metadata_block -- ^ YAML metadata block
     | Ext_gutenberg           -- ^ Use Project Gutenberg conventions for plain
     | Ext_attributes          -- ^ Generic attribute syntax
+    | Ext_sourcepos           -- ^ Include source position attributes
     deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)
 
 -- | Extensions to be used with pandoc-flavored markdown.
@@ -503,6 +504,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_implicit_header_references
     , Ext_attributes
     , Ext_fenced_code_attributes
+    , Ext_sourcepos
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index c7f1a56fa..ecd65a54d 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -65,6 +65,7 @@ data ReaderOptions = ReaderOptions{
        , readerDefaultImageExtension :: Text -- ^ Default extension for images
        , readerTrackChanges          :: TrackChanges -- ^ Track changes setting for docx
        , readerStripComments         :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
+                                             -- (only implemented in commonmark)
 } deriving (Show, Read, Data, Typeable, Generic)
 
 instance HasSyntaxExtensions ReaderOptions where
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index d32a38342..9eef498e1 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -32,11 +32,15 @@ import Data.Typeable
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMark opts s = do
-  let res = runIdentity $ commonmarkWith (specFor opts) "" s
-  case res of
-    Left err -> throwError $ PandocParsecError s err
-    Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
+readCommonMark opts s
+  | isEnabled Ext_sourcepos opts =
+    case runIdentity (commonmarkWith (specFor opts) "" s) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
+  | otherwise =
+    case runIdentity (commonmarkWith (specFor opts) "" s) of
+      Left err -> throwError $ PandocParsecError s err
+      Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
 specFor :: (Monad m, Typeable m, Typeable a,
             Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))
-- 
cgit v1.2.3


From 0a502e5ff52b251bbf3da69fd1f9a88d5e0fe92c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Dec 2020 15:44:10 -0800
Subject: HTML reader:  retain attribute prefixes and avoid duplicates.

Previously we stripped attribute prefixes, reading
`xml:lang` as `lang` for example. This resulted in
two duplicate `lang` attributes when `xml:lang` and
`lang` were both used.  This commit causes the prefixes
to be retained, and also avoids invald duplicate
attributes.

Closes #6938.
---
 src/Text/Pandoc/Readers/HTML.hs         | 28 +++++++++++-----------------
 src/Text/Pandoc/Readers/HTML/Parsing.hs | 20 +++++++++++++-------
 test/command/5986.md                    |  2 +-
 test/epub/wasteland.native              |  8 ++++----
 4 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index eb78979a3..f870a241d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -74,7 +74,7 @@ readHtml :: PandocMonad m
          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
          -> m Pandoc
 readHtml opts inp = do
-  let tags = stripPrefixes . canonicalizeTags $
+  let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
              (crFilter inp)
       parseDoc = do
@@ -95,6 +95,15 @@ readHtml opts inp = do
     Right doc -> return doc
     Left  err -> throwError $ PandocParseError $ T.pack $ getError err
 
+-- Strip namespace prefixes on tags (not attributes)
+stripPrefixes :: [Tag Text] -> [Tag Text]
+stripPrefixes = map stripPrefix
+
+stripPrefix :: Tag Text -> Tag Text
+stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as
+stripPrefix (TagClose s)   = TagClose (T.takeWhileEnd (/=':') s)
+stripPrefix x = x
+
 replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
 replaceNotes bs = do
   st <- getState
@@ -114,7 +123,7 @@ setInPlain = local (\s -> s {inPlain = True})
 pHtml :: PandocMonad m => TagParser m Blocks
 pHtml = try $ do
   (TagOpen "html" attr) <- lookAhead pAny
-  for_ (lookup "lang" attr) $
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
@@ -1024,21 +1033,6 @@ htmlTag f = try $ do
          handleTag tagname
        _ -> mzero
 
--- Strip namespace prefixes
-stripPrefixes :: [Tag Text] -> [Tag Text]
-stripPrefixes = map stripPrefix
-
-stripPrefix :: Tag Text -> Tag Text
-stripPrefix (TagOpen s as) =
-    TagOpen (stripPrefix' s) (map (first stripPrefix') as)
-stripPrefix (TagClose s) = TagClose (stripPrefix' s)
-stripPrefix x = x
-
-stripPrefix' :: Text -> Text
-stripPrefix' s =
-  if T.null t then s else T.drop 1 t
-  where (_, t) = T.span (/= ':') s
-
 -- Utilities
 
 -- | Adjusts a url according to the document's base URL.
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 2d58319da..e28ebe77b 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -193,14 +193,20 @@ t1 `closes` t2 |
 _ `closes` _ = False
 
 toStringAttr :: [(Text, Text)] -> [(Text, Text)]
-toStringAttr = map go
+toStringAttr = foldr go []
   where
-   go (x,y) =
-     case T.stripPrefix "data-" x of
-       Just x' | x' `Set.notMember` (html5Attributes <>
-                                     html4Attributes <> rdfaAttributes)
-         -> (x',y)
-       _ -> (x,y)
+   go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
+   -- treat xml:lang as lang
+   go ("xml:lang",y) ats = go ("lang",y) ats
+   -- prevent duplicate attributes
+   go (x,y) ats
+     | any (\(x',_) -> x == x') ats = ats
+     | otherwise      =
+        case T.stripPrefix "data-" x of
+          Just x' | x' `Set.notMember` (html5Attributes <>
+                                        html4Attributes <> rdfaAttributes)
+            -> go (x',y) ats
+          _ -> (x,y):ats
 
 -- Unlike fromAttrib from tagsoup, this distinguishes
 -- between a missing attribute and an attribute with empty content.
diff --git a/test/command/5986.md b/test/command/5986.md
index ea0ca70c1..ed8dd30c9 100644
--- a/test/command/5986.md
+++ b/test/command/5986.md
@@ -4,7 +4,7 @@
 ^D
 <p><span id="title_page.xhtml"></span></p>
 <p><span id="nav.xhtml"></span></p>
-<nav type="landmarks" id="landmarks" hidden="hidden">
+<nav epub:type="landmarks" id="landmarks" hidden="hidden">
 <ol>
 <li><a href="text/title_page.xhtml">Title Page</a></li>
 <li><a href="#nav.xhtml#toc">Table of Contents</a></li>
diff --git a/test/epub/wasteland.native b/test/epub/wasteland.native
index 0ed23eba2..ff59182a6 100644
--- a/test/epub/wasteland.native
+++ b/test/epub/wasteland.native
@@ -1,8 +1,8 @@
 [Para [Image ("",[],[]) [] ("wasteland-cover.jpg","")]
 ,Para [Span ("wasteland-content.xhtml",[],[]) []]
-,Div ("wasteland-content.xhtml#frontmatter",["section"],[("type","frontmatter")])
+,Div ("wasteland-content.xhtml#frontmatter",["section","frontmatter"],[])
  []
-,Div ("wasteland-content.xhtml#bodymatter",["section"],[("type","bodymatter")])
+,Div ("wasteland-content.xhtml#bodymatter",["section","bodymatter"],[])
  [Div ("wasteland-content.xhtml#ch1",["section"],[])
   [Header 2 ("",[],[]) [Str "I.",Space,Str "THE",Space,Str "BURIAL",Space,Str "OF",Space,Str "THE",Space,Str "DEAD"]
   ,Div ("",["linegroup"],[])
@@ -922,8 +922,8 @@
     [Plain [Str "Datta.",Space,Str "Dayadhvam.",Space,Str "Damyata."]]
    ,Div ("wasteland-content.xhtml#ln434",["linegroup","indent"],[])
     [Plain [Span ("",[],[("lang","sa")]) [Str "Shantih",Space,Str "shantih",Space,Str "shantih",Note [Para [Link ("",[],[]) [Str "434."] ("#wasteland-content.xhtml#ln434",""),Space,Str "Shantih.",Space,Str "Repeated",Space,Str "as",Space,Str "here,",Space,Str "a",Space,Str "formal",Space,Str "ending",Space,Str "to",Space,Str "an",Space,Str "Upanishad.",Space,Str "'The",SoftBreak,Str "Peace",Space,Str "which",Space,Str "passeth",Space,Str "understanding'",Space,Str "is",Space,Str "a",Space,Str "feeble",Space,Str "translation",Space,Str "of",Space,Str "the",SoftBreak,Str "content",Space,Str "of",Space,Str "this",Space,Str "word."]],SoftBreak]]]]]]
-,Div ("wasteland-content.xhtml#backmatter",["section"],[("type","backmatter")])
- [Div ("wasteland-content.xhtml#rearnotes",["section"],[("type","rearnotes")])
+,Div ("wasteland-content.xhtml#backmatter",["section","backmatter"],[])
+ [Div ("wasteland-content.xhtml#rearnotes",["section","rearnotes"],[])
   [Header 2 ("",[],[]) [Str "NOTES",Space,Str "ON",Space,Str "\"THE",Space,Str "WASTE",Space,Str "LAND\""]
   ,Para [Str "Not",Space,Str "only",Space,Str "the",Space,Str "title,",Space,Str "but",Space,Str "the",Space,Str "plan",Space,Str "and",Space,Str "a",Space,Str "good",Space,Str "deal",Space,Str "of",Space,Str "the",Space,Str "incidental",Space,Str "symbolism",Space,Str "of",SoftBreak,Str "the",Space,Str "poem",Space,Str "were",Space,Str "suggested",Space,Str "by",Space,Str "Miss",Space,Str "Jessie",Space,Str "L.",Space,Str "Weston's",Space,Str "book",Space,Str "on",Space,Str "the",Space,Str "Grail",Space,Str "legend:",SoftBreak,Str "From",Space,Str "Ritual",Space,Str "to",Space,Str "Romance"]
   ,Para [Str "Indeed,",Space,Str "so",Space,Str "deeply",Space,Str "am",Space,Str "I",Space,Str "indebted,",Space,Str "Miss",Space,Str "Weston's",Space,Str "book",Space,Str "will",Space,Str "elucidate",Space,Str "the",SoftBreak,Str "difficulties",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "much",Space,Str "better",Space,Str "than",Space,Str "my",Space,Str "notes",Space,Str "can",Space,Str "do;",Space,Str "and",Space,Str "I",Space,Str "recommend",Space,Str "it",SoftBreak,Str "(apart",Space,Str "from",Space,Str "the",Space,Str "great",Space,Str "interest",Space,Str "of",Space,Str "the",Space,Str "book",Space,Str "itself)",Space,Str "to",Space,Str "any",Space,Str "who",Space,Str "think",Space,Str "such",SoftBreak,Str "elucidation",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "worth",Space,Str "the",Space,Str "trouble.",Space,Str "To",Space,Str "another",Space,Str "work",Space,Str "of",Space,Str "anthropology",Space,Str "I",Space,Str "am",SoftBreak,Str "indebted",Space,Str "in",Space,Str "general,",Space,Str "one",Space,Str "which",Space,Str "has",Space,Str "influenced",Space,Str "our",Space,Str "generation",Space,Str "profoundly;",Space,Str "I",Space,Str "mean",SoftBreak,Str "The",Space,Str "Golden",Space,Str "Bough;",Space,Str "I",Space,Str "have",Space,Str "used",Space,Str "especially",Space,Str "the",Space,Str "two",Space,Str "volumes",Space,Str "Adonis,",Space,Str "Attis,",Space,Str "Osiris.",SoftBreak,Str "Anyone",Space,Str "who",Space,Str "is",Space,Str "acquainted",Space,Str "with",Space,Str "these",Space,Str "works",Space,Str "will",Space,Str "immediately",Space,Str "recognise",Space,Str "in",Space,Str "the",Space,Str "poem",SoftBreak,Str "certain",Space,Str "references",Space,Str "to",Space,Str "vegetation",Space,Str "ceremonies."]
-- 
cgit v1.2.3


From fcd065818901e57f01aca4c919f6102f9a047ba0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Dec 2020 15:51:20 -0800
Subject: HTML reader: pay attention to lang attributes on body.

These (as well as lang attributes on html) should update
lang in metadata. See #6938.
---
 src/Text/Pandoc/Readers/HTML.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f870a241d..f8a17bb78 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -25,7 +25,6 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 ) where
 
 import Control.Applicative ((<|>))
-import Control.Arrow (first)
 import Control.Monad (guard, msum, mzero, unless, void)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
@@ -121,14 +120,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a
 setInPlain = local (\s -> s {inPlain = True})
 
 pHtml :: PandocMonad m => TagParser m Blocks
-pHtml = try $ do
+pHtml = do
   (TagOpen "html" attr) <- lookAhead pAny
   for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
     updateState . B.setMeta "lang" . B.text
   pInTags "html" block
 
 pBody :: PandocMonad m => TagParser m Blocks
-pBody = pInTags "body" block
+pBody = do
+  (TagOpen "body" attr) <- lookAhead pAny
+  for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $
+    updateState . B.setMeta "lang" . B.text
+  pInTags "body" block
 
 pHead :: PandocMonad m => TagParser m Blocks
 pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
-- 
cgit v1.2.3


From 208cb9619632ee9490b75158e44213ee6d4ab93e Mon Sep 17 00:00:00 2001
From: mb21 <mb21@users.noreply.github.com>
Date: Sat, 12 Dec 2020 14:12:39 +0100
Subject: ICML writer: fix image bounding box for custom widths/heights

fixes #6936
---
 src/Text/Pandoc/Writers/ICML.hs | 7 ++++++-
 test/command/5541-nesting.md    | 1 +
 test/command/svg.md             | 4 ++++
 test/writer.icml                | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index dcf5acfef..c254fbc58 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -621,7 +621,12 @@ imageICML opts style attr (src, _) = do
       image  = inTags True "Image"
                    [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)]
                  $ vcat [
-                     inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+                     inTags True "Properties" [] $ vcat [
+                         inTags True "Profile" [("type","string")] $ text "$ID/Embedded"
+                       , selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0")
+                         , ("Right",  showFl $ ow*ow / imgWidth)
+                         , ("Bottom", showFl $ oh*oh / imgHeight)]
+                       ]
                    , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')]
                    ]
       doc    = inTags True "CharacterStyleRange" attrs
diff --git a/test/command/5541-nesting.md b/test/command/5541-nesting.md
index 194b79164..5abe41a9a 100644
--- a/test/command/5541-nesting.md
+++ b/test/command/5541-nesting.md
@@ -80,6 +80,7 @@
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="150" Bottom="150" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:lalune.jpg" />
       </Image>
diff --git a/test/command/svg.md b/test/command/svg.md
index 26a8213f6..57506570b 100644
--- a/test/command/svg.md
+++ b/test/command/svg.md
@@ -23,6 +23,7 @@
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/corrupt.svg" />
       </Image>
@@ -56,6 +57,7 @@
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/SVG_logo.svg" />
       </Image>
@@ -89,6 +91,7 @@
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/SVG_logo-without-xml-declaration.svg" />
       </Image>
@@ -122,6 +125,7 @@
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="109.5" Bottom="130.5" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/inkscape-cube.svg" />
       </Image>
diff --git a/test/writer.icml b/test/writer.icml
index a4b40056a..0601ba494 100644
--- a/test/writer.icml
+++ b/test/writer.icml
@@ -2800,6 +2800,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</Content>
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="150" Bottom="150" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:lalune.jpg" />
       </Image>
@@ -2836,6 +2837,7 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</Content>
           <Profile type="string">
             $ID/Embedded
           </Profile>
+          <GraphicBounds Left="0" Top="0" Right="20" Bottom="22" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:movie.jpg" />
       </Image>
-- 
cgit v1.2.3


From ccd235e31ff00c4741ba52552ba58669f700bbdc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 12 Dec 2020 16:45:50 +0100
Subject: LaTeX writer: extract table handling into separate module.

---
 pandoc.cabal                             |   4 +
 src/Text/Pandoc/Writers/LaTeX.hs         | 249 ++-----------------------------
 src/Text/Pandoc/Writers/LaTeX/Caption.hs |  48 ++++++
 src/Text/Pandoc/Writers/LaTeX/Notes.hs   |  34 +++++
 src/Text/Pandoc/Writers/LaTeX/Table.hs   | 181 ++++++++++++++++++++++
 src/Text/Pandoc/Writers/LaTeX/Types.hs   |  80 ++++++++++
 6 files changed, 359 insertions(+), 237 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Caption.hs
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Notes.hs
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Table.hs
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Types.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 7f8df272f..335ccc720 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -630,6 +630,10 @@ library
                    Text.Pandoc.Writers.Docx.StyleMap,
                    Text.Pandoc.Writers.JATS.Table,
                    Text.Pandoc.Writers.JATS.Types,
+                   Text.Pandoc.Writers.LaTeX.Caption,
+                   Text.Pandoc.Writers.LaTeX.Notes,
+                   Text.Pandoc.Writers.LaTeX.Table,
+                   Text.Pandoc.Writers.LaTeX.Types,
                    Text.Pandoc.Writers.Roff,
                    Text.Pandoc.Writers.Powerpoint.Presentation,
                    Text.Pandoc.Writers.Powerpoint.Output,
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e02cc2833..6a4e3ba69 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Writers.LaTeX (
   ) where
 import Control.Applicative ((<|>))
 import Control.Monad.State.Strict
-import Data.Monoid (Any(..))
 import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace,
                   isPunctuation, ord)
 import Data.List (foldl', intersperse, nubBy, (\\), uncons)
@@ -42,69 +41,14 @@ import Text.Pandoc.Options
 import Text.DocLayout
 import Text.Pandoc.Shared
 import Text.Pandoc.Slides
-import Text.Pandoc.Walk
+import Text.Pandoc.Walk (query, walk, walkM)
+import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
+import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import qualified Data.Text.Normalize as Normalize
 
-data WriterState =
-  WriterState { stInNote        :: Bool          -- true if we're in a note
-              , stInQuote       :: Bool          -- true if in a blockquote
-              , stExternalNotes :: Bool          -- true if in context where
-                                                 -- we need to store footnotes
-              , stInMinipage    :: Bool          -- true if in minipage
-              , stInHeading     :: Bool          -- true if in a section heading
-              , stInItem        :: Bool          -- true if in \item[..]
-              , stNotes         :: [Doc Text]    -- notes in a minipage
-              , stOLLevel       :: Int           -- level of ordered list nesting
-              , stOptions       :: WriterOptions -- writer options, so they don't have to be parameter
-              , stVerbInNote    :: Bool          -- true if document has verbatim text in note
-              , stTable         :: Bool          -- true if document has a table
-              , stStrikeout     :: Bool          -- true if document has strikeout
-              , stUrl           :: Bool          -- true if document has visible URL link
-              , stGraphics      :: Bool          -- true if document contains images
-              , stLHS           :: Bool          -- true if document has literate haskell code
-              , stHasChapters   :: Bool          -- true if document has chapters
-              , stCsquotes      :: Bool          -- true if document uses csquotes
-              , stHighlighting  :: Bool          -- true if document has highlighted code
-              , stIncremental   :: Bool          -- true if beamer lists should be displayed bit by bit
-              , stInternalLinks :: [Text]      -- list of internal link targets
-              , stBeamer        :: Bool          -- produce beamer
-              , stEmptyLine     :: Bool          -- true if no content on line
-              , stHasCslRefs    :: Bool          -- has a Div with class refs
-              , stIsFirstInDefinition :: Bool    -- first block in a defn list
-              }
-
-startingState :: WriterOptions -> WriterState
-startingState options = WriterState {
-                  stInNote = False
-                , stInQuote = False
-                , stExternalNotes = False
-                , stInHeading = False
-                , stInMinipage = False
-                , stInItem = False
-                , stNotes = []
-                , stOLLevel = 1
-                , stOptions = options
-                , stVerbInNote = False
-                , stTable = False
-                , stStrikeout = False
-                , stUrl = False
-                , stGraphics = False
-                , stLHS = False
-                , stHasChapters = case writerTopLevelDivision options of
-                                    TopLevelPart    -> True
-                                    TopLevelChapter -> True
-                                    _               -> False
-                , stCsquotes = False
-                , stHighlighting = False
-                , stIncremental = writerIncremental options
-                , stInternalLinks = []
-                , stBeamer = False
-                , stEmptyLine = True
-                , stHasCslRefs = False
-                , stIsFirstInDefinition = False }
-
 -- | Convert Pandoc to LaTeX.
 writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeLaTeX options document =
@@ -117,8 +61,6 @@ writeBeamer options document =
   evalStateT (pandocToLaTeX options document) $
     (startingState options){ stBeamer = True }
 
-type LW m = StateT WriterState m
-
 pandocToLaTeX :: PandocMonad m
               => WriterOptions -> Pandoc -> LW m Text
 pandocToLaTeX options (Pandoc meta blocks) = do
@@ -573,7 +515,7 @@ blockToLaTeX (Plain lst) =
 blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)])
   | Just tit <- T.stripPrefix "fig:" tgt
   = do
-      (capt, captForLof, footnotes) <- getCaption True txt
+      (capt, captForLof, footnotes) <- getCaption inlineListToLaTeX True txt
       lab <- labelFor ident
       let caption = "\\caption" <> captForLof <> braces capt <> lab
       img <- inlineToLaTeX (Image attr txt (src,tit))
@@ -774,181 +716,14 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
   hdr <- sectionHeader classes id' level lst
   modify $ \s -> s{stInHeading = False}
   return hdr
-blockToLaTeX (Table _ blkCapt specs thead tbody tfoot) = do
-  let (caption, aligns, widths, heads, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  -- simple tables have to have simple cells:
-  let isSimple [Plain _] = True
-      isSimple [Para  _] = True
-      isSimple []        = True
-      isSimple _         = False
-  let widths' = if all (== 0) widths && not (all (all isSimple) rows)
-                   then replicate (length aligns)
-                          (1 / fromIntegral (length aligns))
-                   else widths
-  (captionText, captForLof, captNotes) <- getCaption False caption
-  let toHeaders hs = do contents <- tableRowToLaTeX True aligns hs
-                        return ("\\toprule" $$ contents $$ "\\midrule")
-  let removeNote (Note _) = Span ("", [], []) []
-      removeNote x        = x
-  firsthead <- if isEmpty captionText || all null heads
-                  then return empty
-                  else ($$ text "\\endfirsthead") <$> toHeaders heads
-  head' <- if all null heads
-              then return "\\toprule"
-              -- avoid duplicate notes in head and firsthead:
-              else toHeaders (if isEmpty firsthead
-                                 then heads
-                                 else walk removeNote heads)
-  let capt = if isEmpty captionText
-                then empty
-                else "\\caption" <> captForLof <> braces captionText
-                         <> "\\tabularnewline"
-  rows' <- mapM (tableRowToLaTeX False aligns) rows
-  let colDescriptors =
-         (if all (== 0) widths'
-             then hcat . map literal
-             else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $
-         zipWith (toColDescriptor (length widths')) aligns widths'
-  modify $ \s -> s{ stTable = True }
-  notes <- notesToLaTeX <$> gets stNotes
-  return $ "\\begin{longtable}[]" <>
-              braces ("@{}" <> colDescriptors <> "@{}")
-              -- the @{} removes extra space at beginning and end
-         $$ capt
-         $$ firsthead
-         $$ head'
-         $$ "\\endhead"
-         $$ vcat rows'
-         $$ "\\bottomrule"
-         $$ "\\end{longtable}"
-         $$ captNotes
-         $$ notes
-
-getCaption :: PandocMonad m
-           => Bool -> [Inline] -> LW m (Doc Text, Doc Text, Doc Text)
-getCaption externalNotes txt = do
-  oldExternalNotes <- gets stExternalNotes
-  modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] }
-  capt <- inlineListToLaTeX txt
-  footnotes <- if externalNotes
-                  then notesToLaTeX <$> gets stNotes
-                  else return empty
-  modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] }
-  -- We can't have footnotes in the list of figures/tables, so remove them:
-  let getNote (Note _) = Any True
-      getNote _        = Any False
-  let hasNotes = getAny . query getNote
-  captForLof <- if hasNotes txt
-                   then brackets <$> inlineListToLaTeX (walk deNote txt)
-                   else return empty
-  return (capt, captForLof, footnotes)
-
-toColDescriptor :: Int -> Alignment -> Double -> Text
-toColDescriptor _numcols align 0 =
-  case align of
-         AlignLeft    -> "l"
-         AlignRight   -> "r"
-         AlignCenter  -> "c"
-         AlignDefault -> "l"
-toColDescriptor numcols align width =
-  T.pack $ printf
-     ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
-     align'
-     ((numcols - 1) * 2)
-     width
- where
-   align' :: String
-   align' = case align of
-              AlignLeft    -> "\\raggedright"
-              AlignRight   -> "\\raggedleft"
-              AlignCenter  -> "\\centering"
-              AlignDefault -> "\\raggedright"
+blockToLaTeX (Table _ blkCapt specs thead tbodies tfoot) =
+  tableToLaTeX inlineListToLaTeX blockListToLaTeX
+               blkCapt specs thead tbodies tfoot
 
 blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
 blockListToLaTeX lst =
   vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst
 
-tableRowToLaTeX :: PandocMonad m
-                => Bool
-                -> [Alignment]
-                -> [[Block]]
-                -> LW m (Doc Text)
-tableRowToLaTeX header aligns cols = do
-  cells <- mapM (tableCellToLaTeX header) $ zip aligns cols
-  return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace"
-
--- For simple latex tables (without minipages or parboxes),
--- we need to go to some lengths to get line breaks working:
--- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
-fixLineBreaks :: Block -> Block
-fixLineBreaks (Para ils)  = Para $ fixLineBreaks' ils
-fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
-fixLineBreaks x           = x
-
-fixLineBreaks' :: [Inline] -> [Inline]
-fixLineBreaks' ils = case splitBy (== LineBreak) ils of
-                       []     -> []
-                       [xs]   -> xs
-                       chunks -> RawInline "tex" "\\vtop{" :
-                                 concatMap tohbox chunks <>
-                                 [RawInline "tex" "}"]
-  where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <>
-                    [RawInline "tex" "}"]
-
--- We also change display math to inline math, since display
--- math breaks in simple tables.
-displayMathToInline :: Inline -> Inline
-displayMathToInline (Math DisplayMath x) = Math InlineMath x
-displayMathToInline x                    = x
-
-tableCellToLaTeX :: PandocMonad m
-                 => Bool -> (Alignment, [Block])
-                 -> LW m (Doc Text)
-tableCellToLaTeX header (align, blocks) = do
-  beamer <- gets stBeamer
-  externalNotes <- gets stExternalNotes
-  inMinipage <- gets stInMinipage
-  -- See #5367 -- footnotehyper/footnote don't work in beamer,
-  -- so we need to produce the notes outside the table...
-  modify $ \st -> st{ stExternalNotes = beamer }
-  let isPlainOrPara Para{}  = True
-      isPlainOrPara Plain{} = True
-      isPlainOrPara _       = False
-  result <-
-    if all isPlainOrPara blocks
-       then
-         blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
-       else do
-         modify $ \st -> st{ stInMinipage = True }
-         cellContents <- blockListToLaTeX blocks
-         modify $ \st -> st{ stInMinipage = inMinipage }
-         let valign = text $ if header then "[b]" else "[t]"
-         let halign = case align of
-                      AlignLeft    -> "\\raggedright"
-                      AlignRight   -> "\\raggedleft"
-                      AlignCenter  -> "\\centering"
-                      AlignDefault -> "\\raggedright"
-         return $ "\\begin{minipage}" <> valign <>
-                  braces "\\linewidth" <> halign <> cr <>
-                  cellContents <> cr <>
-                  "\\end{minipage}"
-  modify $ \st -> st{ stExternalNotes = externalNotes }
-  return result
-
-
-notesToLaTeX :: [Doc Text] -> Doc Text
-notesToLaTeX [] = empty
-notesToLaTeX ns = (case length ns of
-                              n | n > 1 -> "\\addtocounter" <>
-                                           braces "footnote" <>
-                                           braces (text $ show $ 1 - n)
-                                | otherwise -> empty)
-                   $$
-                   vcat (intersperse
-                     ("\\addtocounter" <> braces "footnote" <> braces "1")
-                     $ map (\x -> "\\footnotetext" <> braces x)
-                     $ reverse ns)
-
 listItemToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
 listItemToLaTeX lst
   -- we need to put some text before a header if it's the first
@@ -1081,7 +856,7 @@ mapAlignment a = case a of
                    "top-baseline" -> "t"
                    "bottom" -> "b"
                    "center" -> "c"
-                   _ -> a 
+                   _ -> a
 
 wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text)
 wrapDiv (_,classes,kvs) t = do
@@ -1095,7 +870,7 @@ wrapDiv (_,classes,kvs) t = do
                                  (lookup "totalwidth" kvs)
                                onlytextwidth = filter ("onlytextwidth" ==) classes
                                options = text $ T.unpack $ T.intercalate "," $
-                                 valign : totalwidth ++ onlytextwidth 
+                                 valign : totalwidth ++ onlytextwidth
                            in inCmd "begin" "columns" <> brackets options
                               $$ contents
                               $$ inCmd "end" "columns"
@@ -1106,8 +881,8 @@ wrapDiv (_,classes,kvs) t = do
                                  maybe ""
                                  (brackets . text . T.unpack . mapAlignment)
                                  (lookup "align" kvs)
-                               w = maybe "0.48" fromPct (lookup "width" kvs) 
-                           in  inCmd "begin" "column" <> 
+                               w = maybe "0.48" fromPct (lookup "width" kvs)
+                           in  inCmd "begin" "column" <>
                                valign <>
                                braces (literal w <> "\\textwidth")
                                $$ contents
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
new file mode 100644
index 000000000..61ca41fb1
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -0,0 +1,48 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Caption
+   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Write figure or table captions as LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX.Caption
+  ( getCaption
+  ) where
+
+import Control.Monad.State.Strict
+import Data.Monoid (Any(..))
+import Data.Text (Text)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.DocLayout (Doc, brackets, empty)
+import Text.Pandoc.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Types
+  ( LW, WriterState (stExternalNotes, stNotes) )
+
+getCaption :: PandocMonad m
+           => ([Inline] -> LW m (Doc Text))
+           -> Bool -> [Inline]
+           -> LW m (Doc Text, Doc Text, Doc Text)
+getCaption inlineListToLaTeX externalNotes txt = do
+  oldExternalNotes <- gets stExternalNotes
+  modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] }
+  capt <- inlineListToLaTeX txt
+  footnotes <- if externalNotes
+                  then notesToLaTeX <$> gets stNotes
+                  else return empty
+  modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] }
+  -- We can't have footnotes in the list of figures/tables, so remove them:
+  let getNote (Note _) = Any True
+      getNote _        = Any False
+  let hasNotes = getAny . query getNote
+  captForLof <- if hasNotes txt
+                   then brackets <$> inlineListToLaTeX (walk deNote txt)
+                   else return empty
+  return (capt, captForLof, footnotes)
diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
new file mode 100644
index 000000000..216a7bfc3
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Notes
+   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Output tables as LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX.Notes
+  ( notesToLaTeX
+  ) where
+
+import Data.List (intersperse)
+import Text.DocLayout ( Doc, braces, empty, text, vcat, ($$))
+import Data.Text (Text)
+
+notesToLaTeX :: [Doc Text] -> Doc Text
+notesToLaTeX = \case
+  [] -> empty
+  ns -> (case length ns of
+            n | n > 1 -> "\\addtocounter" <>
+                         braces "footnote" <>
+                         braces (text $ show $ 1 - n)
+              | otherwise -> empty)
+        $$
+        vcat (intersperse
+               ("\\addtocounter" <> braces "footnote" <> braces "1")
+               $ map (\x -> "\\footnotetext" <> braces x)
+               $ reverse ns)
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
new file mode 100644
index 000000000..5299efa37
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Table
+   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Output LaTeX formatted tables.
+-}
+module Text.Pandoc.Writers.LaTeX.Table
+  ( tableToLaTeX
+  ) where
+import Control.Monad.State.Strict
+import Data.List (intersperse)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.DocLayout
+  ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest
+  , text, vcat, ($$) )
+import Text.Pandoc.Shared (splitBy)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Writers.Shared (toLegacyTable)
+import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
+import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Types
+  ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stNotes, stTable) )
+import Text.Printf (printf)
+
+tableToLaTeX :: PandocMonad m
+             => ([Inline] -> LW m (Doc Text))
+             -> ([Block]  -> LW m (Doc Text))
+             -> Caption -> [ColSpec] -> TableHead -> [TableBody] -> TableFoot
+             -> LW m (Doc Text)
+tableToLaTeX inlnsToLaTeX blksToLaTeX blkCapt specs thead tbody tfoot = do
+  let (caption, aligns, widths, heads, rows) =
+        toLegacyTable blkCapt specs thead tbody tfoot
+  -- simple tables have to have simple cells:
+  let isSimple = \case
+        [Plain _] -> True
+        [Para  _] -> True
+        []        -> True
+        _         -> False
+  let widths' = if all (== 0) widths && not (all (all isSimple) rows)
+                   then replicate (length aligns)
+                          (1 / fromIntegral (length aligns))
+                   else widths
+  (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
+  let toHeaders hs = do contents <- tableRowToLaTeX blksToLaTeX True aligns hs
+                        return ("\\toprule" $$ contents $$ "\\midrule")
+  let removeNote (Note _) = Span ("", [], []) []
+      removeNote x        = x
+  firsthead <- if isEmpty captionText || all null heads
+                  then return empty
+                  else ($$ text "\\endfirsthead") <$> toHeaders heads
+  head' <- if all null heads
+              then return "\\toprule"
+              -- avoid duplicate notes in head and firsthead:
+              else toHeaders (if isEmpty firsthead
+                                 then heads
+                                 else walk removeNote heads)
+  let capt = if isEmpty captionText
+                then empty
+                else "\\caption" <> captForLof <> braces captionText
+                         <> "\\tabularnewline"
+  rows' <- mapM (tableRowToLaTeX blksToLaTeX False aligns) rows
+  let colDescriptors =
+         (if all (== 0) widths'
+             then hcat . map literal
+             else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $
+         zipWith (toColDescriptor (length widths')) aligns widths'
+  modify $ \s -> s{ stTable = True }
+  notes <- notesToLaTeX <$> gets stNotes
+  return $ "\\begin{longtable}[]" <>
+              braces ("@{}" <> colDescriptors <> "@{}")
+              -- the @{} removes extra space at beginning and end
+         $$ capt
+         $$ firsthead
+         $$ head'
+         $$ "\\endhead"
+         $$ vcat rows'
+         $$ "\\bottomrule"
+         $$ "\\end{longtable}"
+         $$ captNotes
+         $$ notes
+
+toColDescriptor :: Int -> Alignment -> Double -> Text
+toColDescriptor _numcols align 0 =
+  case align of
+         AlignLeft    -> "l"
+         AlignRight   -> "r"
+         AlignCenter  -> "c"
+         AlignDefault -> "l"
+toColDescriptor numcols align width =
+  T.pack $ printf
+     ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
+     align'
+     ((numcols - 1) * 2)
+     width
+ where
+   align' :: String
+   align' = case align of
+              AlignLeft    -> "\\raggedright"
+              AlignRight   -> "\\raggedleft"
+              AlignCenter  -> "\\centering"
+              AlignDefault -> "\\raggedright"
+
+tableRowToLaTeX :: PandocMonad m
+                => ([Block] -> LW m (Doc Text))
+                -> Bool
+                -> [Alignment]
+                -> [[Block]]
+                -> LW m (Doc Text)
+tableRowToLaTeX blockListToLaTeX header aligns cols = do
+  cells <- mapM (tableCellToLaTeX blockListToLaTeX header) $ zip aligns cols
+  return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace"
+
+-- For simple latex tables (without minipages or parboxes),
+-- we need to go to some lengths to get line breaks working:
+-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
+fixLineBreaks :: Block -> Block
+fixLineBreaks (Para ils)  = Para $ fixLineBreaks' ils
+fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
+fixLineBreaks x           = x
+
+fixLineBreaks' :: [Inline] -> [Inline]
+fixLineBreaks' ils = case splitBy (== LineBreak) ils of
+                       []     -> []
+                       [xs]   -> xs
+                       chunks -> RawInline "tex" "\\vtop{" :
+                                 concatMap tohbox chunks <>
+                                 [RawInline "tex" "}"]
+  where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <>
+                    [RawInline "tex" "}"]
+
+-- We also change display math to inline math, since display
+-- math breaks in simple tables.
+displayMathToInline :: Inline -> Inline
+displayMathToInline (Math DisplayMath x) = Math InlineMath x
+displayMathToInline x                    = x
+
+tableCellToLaTeX :: PandocMonad m
+                 => ([Block] -> LW m (Doc Text))
+                 -> Bool -> (Alignment, [Block])
+                 -> LW m (Doc Text)
+tableCellToLaTeX blockListToLaTeX header (align, blocks) = do
+  beamer <- gets stBeamer
+  externalNotes <- gets stExternalNotes
+  inMinipage <- gets stInMinipage
+  -- See #5367 -- footnotehyper/footnote don't work in beamer,
+  -- so we need to produce the notes outside the table...
+  modify $ \st -> st{ stExternalNotes = beamer }
+  let isPlainOrPara = \case
+        Para{}  -> True
+        Plain{} -> True
+        _       -> False
+  result <-
+    if all isPlainOrPara blocks
+       then
+         blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
+       else do
+         modify $ \st -> st{ stInMinipage = True }
+         cellContents <- blockListToLaTeX blocks
+         modify $ \st -> st{ stInMinipage = inMinipage }
+         let valign = text $ if header then "[b]" else "[t]"
+         let halign = case align of
+                        AlignLeft    -> "\\raggedright"
+                        AlignRight   -> "\\raggedleft"
+                        AlignCenter  -> "\\centering"
+                        AlignDefault -> "\\raggedright"
+         return $ "\\begin{minipage}" <> valign <>
+                  braces "\\linewidth" <> halign <> cr <>
+                  cellContents <> cr <>
+                  "\\end{minipage}"
+  modify $ \st -> st{ stExternalNotes = externalNotes }
+  return result
diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs
new file mode 100644
index 000000000..a76388729
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs
@@ -0,0 +1,80 @@
+module Text.Pandoc.Writers.LaTeX.Types
+  ( LW
+  , WriterState (..)
+  , startingState
+  ) where
+
+import Control.Monad.State.Strict (StateT)
+import Data.Text (Text)
+import Text.DocLayout (Doc)
+import Text.Pandoc.Options
+  ( WriterOptions (writerIncremental, writerTopLevelDivision)
+  , TopLevelDivision (..)
+  )
+
+-- | LaTeX writer type. The type constructor @m@ will typically be an
+-- instance of PandocMonad.
+type LW m = StateT WriterState m
+
+data WriterState =
+  WriterState
+  { stInNote        :: Bool          -- ^ true if we're in a note
+  , stInQuote       :: Bool          -- ^ true if in a blockquote
+  , stExternalNotes :: Bool          -- ^ true if in context where
+                                     --   we need to store footnotes
+  , stInMinipage    :: Bool          -- ^ true if in minipage
+  , stInHeading     :: Bool          -- ^ true if in a section heading
+  , stInItem        :: Bool          -- ^ true if in \item[..]
+  , stNotes         :: [Doc Text]    -- ^ notes in a minipage
+  , stOLLevel       :: Int           -- ^ level of ordered list nesting
+  , stOptions       :: WriterOptions -- ^ writer options, so they don't have to
+                                     --   be parameter
+  , stVerbInNote    :: Bool          -- ^ true if document has verbatim text in note
+  , stTable         :: Bool          -- ^ true if document has a table
+  , stStrikeout     :: Bool          -- ^ true if document has strikeout
+  , stUrl           :: Bool          -- ^ true if document has visible URL link
+  , stGraphics      :: Bool          -- ^ true if document contains images
+  , stLHS           :: Bool          -- ^ true if document has literate haskell code
+  , stHasChapters   :: Bool          -- ^ true if document has chapters
+  , stCsquotes      :: Bool          -- ^ true if document uses csquotes
+  , stHighlighting  :: Bool          -- ^ true if document has highlighted code
+  , stIncremental   :: Bool          -- ^ true if beamer lists should be
+                                     --   displayed bit by bit
+  , stInternalLinks :: [Text]        -- ^ list of internal link targets
+  , stBeamer        :: Bool          -- ^ produce beamer
+  , stEmptyLine     :: Bool          -- ^ true if no content on line
+  , stHasCslRefs    :: Bool          -- ^ has a Div with class refs
+  , stIsFirstInDefinition :: Bool    -- ^ first block in a defn list
+  }
+
+startingState :: WriterOptions -> WriterState
+startingState options =
+  WriterState
+  { stInNote = False
+  , stInQuote = False
+  , stExternalNotes = False
+  , stInHeading = False
+  , stInMinipage = False
+  , stInItem = False
+  , stNotes = []
+  , stOLLevel = 1
+  , stOptions = options
+  , stVerbInNote = False
+  , stTable = False
+  , stStrikeout = False
+  , stUrl = False
+  , stGraphics = False
+  , stLHS = False
+  , stHasChapters = case writerTopLevelDivision options of
+                      TopLevelPart    -> True
+                      TopLevelChapter -> True
+                      _               -> False
+  , stCsquotes = False
+  , stHighlighting = False
+  , stIncremental = writerIncremental options
+  , stInternalLinks = []
+  , stBeamer = False
+  , stEmptyLine = True
+  , stHasCslRefs = False
+  , stIsFirstInDefinition = False
+  }
-- 
cgit v1.2.3


From 8cf58d96e0801d8073e118f05279a9f473efcee0 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 13 Dec 2020 14:09:53 +0100
Subject: Docx writer: use Content instead of Element.

---
 src/Text/Pandoc/Writers/Docx.hs | 134 ++++++++++++++++++++++------------------
 1 file changed, 75 insertions(+), 59 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 4cb879e6a..97048e980 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -441,7 +441,7 @@ writeDocx opts doc = do
         Nothing      -> mknode "w:sectPr" [] ()
 
   -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr'
-  let contents' = contents ++ [sectpr]
+  let contents' = contents ++ [Elem sectpr]
   let docContents = mknode "w:document" stdAttributes
                     $ mknode "w:body" [] contents'
 
@@ -538,7 +538,8 @@ writeDocx opts doc = do
 
   -- docProps/custom.xml
   let customProperties :: [(String, String)]
-      customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta)
+      customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta)
+                         | k <- M.keys (unMeta meta)
                          , k `notElem` (["title", "author", "keywords"]
                                        ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
@@ -788,7 +789,7 @@ makeTOC opts = do
           mknode "w:docPartUnique" [] ()]
          -- w:docPartObj
       ), -- w:sdtPr
-      mknode "w:sdtContent" [] (title++[
+      mknode "w:sdtContent" [] (title ++ [ Elem $
         mknode "w:p" [] (
           mknode "w:r" [] [
             mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (),
@@ -802,7 +803,9 @@ makeTOC opts = do
 
 -- | Convert Pandoc document to two lists of
 -- OpenXML elements (the main document and footnotes).
-writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element])
+writeOpenXML :: (PandocMonad m)
+             => WriterOptions -> Pandoc
+             -> WS m ([Content], [Element], [Element])
 writeOpenXML opts (Pandoc meta blocks) = do
   let tit = docTitle meta
   let auths = docAuthors meta
@@ -830,6 +833,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
         return $
           mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
             [ mknode "w:p" [] $
+              map Elem
               [ mknode "w:pPr" []
                 [ mknode "w:pStyle" [("w:val", "CommentText")] () ]
               , mknode "w:r" []
@@ -844,11 +848,11 @@ writeOpenXML opts (Pandoc meta blocks) = do
   toc <- if includeTOC
             then makeTOC opts
             else return []
-  let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc
+  let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ map Elem toc
   return (meta' ++ doc', notes', comments')
 
 -- | Convert a list of Pandoc blocks to OpenXML.
-blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element]
+blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Content]
 blocksToOpenXML opts = fmap concat . mapM (blockToOpenXML opts) . separateTables
 
 -- Word combines adjacent tables unless you put an empty paragraph between
@@ -884,10 +888,10 @@ dynamicStyleKey :: T.Text
 dynamicStyleKey = "custom-style"
 
 -- | Convert a Pandoc block element to OpenXML.
-blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
 blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk
 
-blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element]
+blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content]
 blockToOpenXML' _ Null = return []
 blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do
   stylemod <- case lookup dynamicStyleKey kvs of
@@ -921,18 +925,18 @@ blockToOpenXML' opts (Header lev (ident,_,kvs) lst) = do
                 Just n -> do
                    num <- withTextPropM (rStyleM "SectionNumber")
                             (inlineToOpenXML opts (Str n))
-                   return $ num ++ [mknode "w:r" [] [mknode "w:tab" [] ()]]
+                   return $ num ++ [Elem $ mknode "w:r" [] [mknode "w:tab" [] ()]]
                 Nothing -> return []
            else return []
   contents <- (number ++) <$> inlinesToOpenXML opts lst
   if T.null ident
-     then return [mknode "w:p" [] (paraProps ++ contents)]
+     then return [Elem $ mknode "w:p" [] (map Elem paraProps ++ contents)]
      else do
        let bookmarkName = ident
        modify $ \s -> s{ stSectionIds = Set.insert bookmarkName
                                       $ stSectionIds s }
        bookmarkedContents <- wrapBookmark bookmarkName contents
-       return [mknode "w:p" [] (paraProps ++ bookmarkedContents)]
+       return [Elem $ mknode "w:p" [] (map Elem paraProps ++ bookmarkedContents)]
 blockToOpenXML' opts (Plain lst) = do
   isInTable <- gets stInTable
   isInList <- gets stInList
@@ -952,7 +956,9 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit
   contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
   captionNode <- withParaPropM (pStyleM "Image Caption")
                  $ blockToOpenXML opts (Para alt)
-  return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
+  return $
+    Elem (mknode "w:p" [] (map Elem paraProps ++ contents))
+    : captionNode
 blockToOpenXML' opts (Para lst)
   | null lst && not (isEnabled Ext_empty_paragraphs opts) = return []
   | otherwise = do
@@ -969,10 +975,10 @@ blockToOpenXML' opts (Para lst)
             ps               -> ps
       modify $ \s -> s { stFirstPara = False }
       contents <- inlinesToOpenXML opts lst
-      return [mknode "w:p" [] (paraProps' ++ contents)]
+      return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)]
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
-  | format == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | format == Format "openxml" = return (parseXML str)
   | otherwise                  = do
       report $ BlockNotRendered b
       return []
@@ -987,7 +993,7 @@ blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do
   wrapBookmark ident p
 blockToOpenXML' _ HorizontalRule = do
   setFirstPara
-  return [
+  return [ Elem $
     mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
     $ mknode "v:rect" [("style","width:0;height:1.5pt"),
                        ("o:hralign","center"),
@@ -1006,26 +1012,28 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
   -- Not in the spec but in Word 2007, 2010. See #4953.
   let cellToOpenXML (al, cell) = do
         es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
-        return $ if any (\e -> qName (elName e) == "p") es
+        return $ if any (\e -> qName (elName e) == "p") (onlyElems es)
            then es
-           else es ++ [mknode "w:p" [] ()]
+           else es ++ [Elem $ mknode "w:p" [] ()]
   headers' <- mapM cellToOpenXML $ zip aligns headers
   rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  let borderProps = mknode "w:tcPr" []
+  let borderProps = Elem $ mknode "w:tcPr" []
                     [ mknode "w:tcBorders" []
                       $ mknode "w:bottom" [("w:val","single")] ()
                     , mknode "w:vAlign" [("w:val","bottom")] () ]
   compactStyle <- pStyleM "Compact"
-  let emptyCell' = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+  let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
   let mkcell border contents = mknode "w:tc" []
                             $ [ borderProps | border ] ++
                             if null contents
                                then emptyCell'
                                else contents
-  let mkrow border cells = mknode "w:tr" [] $
-                        [mknode "w:trPr" [] [
-                          mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border]
-                        ++ map (mkcell border) cells
+  let mkrow border cells =
+         mknode "w:tr" [] $
+         [ mknode "w:trPr" []
+           [ mknode "w:cnfStyle" [("w:firstRow","1")] ()]
+         | border]
+         ++ map (mkcell border) cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
@@ -1035,7 +1043,8 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
   modify $ \s -> s { stInTable = False }
   return $
     caption' ++
-    [mknode "w:tbl" []
+    [Elem $
+     mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
             mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
@@ -1070,7 +1079,9 @@ blockToOpenXML' opts (DefinitionList items) = do
   setFirstPara
   return l
 
-definitionListItemToOpenXML  :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element]
+definitionListItemToOpenXML  :: (PandocMonad m)
+                             => WriterOptions -> ([Inline],[[Block]])
+                             -> WS m [Content]
 definitionListItemToOpenXML opts (term,defs) = do
   term' <- withParaPropM (pStyleM "Definition Term")
            $ blockToOpenXML opts (Para term)
@@ -1083,8 +1094,11 @@ addList marker = do
   lists <- gets stLists
   modify $ \st -> st{ stLists = lists ++ [marker] }
 
-listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element]
-listItemToOpenXML _ _ []                   = return []
+listItemToOpenXML :: (PandocMonad m)
+                  => WriterOptions
+                  -> Int -> [Block]
+                  -> WS m [Content]
+listItemToOpenXML _ _ []                  = return []
 listItemToOpenXML opts numid (first:rest) = do
   oldInList <- gets stInList
   modify $ \st -> st{ stInList = True }
@@ -1111,7 +1125,7 @@ alignmentToString alignment = case alignment of
                                  AlignDefault -> "left"
 
 -- | Convert a list of inline elements to OpenXML.
-inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element]
+inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content]
 inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
 
 withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a
@@ -1186,12 +1200,12 @@ setFirstPara :: PandocMonad m => WS m ()
 setFirstPara =  modify $ \s -> s { stFirstPara = True }
 
 -- | Convert an inline element to OpenXML.
-inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
 inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
 
-inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element]
+inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
 inlineToOpenXML' _ (Str str) =
-  formattedString str
+  map Elem <$> formattedString str
 inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
 inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
 inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
@@ -1199,10 +1213,11 @@ inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
 inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) =
   inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
-  ([mknode "w:r" []
-    (mknode "w:t"
-      [("xml:space","preserve")]
-      ("\t" :: String))] ++)
+   ([Elem $
+     mknode "w:r" []
+     (mknode "w:t"
+       [("xml:space","preserve")]
+       ("\t" :: String))] ++)
     <$> inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
   inlinesToOpenXML opts ils
@@ -1212,18 +1227,18 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
   let ident' = fromMaybe ident (lookup "id" kvs)
       kvs' = filter (("id" /=) . fst) kvs
   modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
-  return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
+  return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
 inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
   -- prefer the "id" in kvs, since that is the one produced by the docx
   -- reader.
   let ident' = fromMaybe ident (lookup "id" kvs)
-  in
-    return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
-           , mknode "w:r" []
-             [ mknode "w:rPr" []
-               [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
-             , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
-           ]
+  in return . map Elem $
+     [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
+     , mknode "w:r" []
+       [ mknode "w:rPr" []
+         [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
+       , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
+     ]
 inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
   stylemod <- case lookup dynamicStyleKey kvs of
                    Just (fromString . T.unpack -> sty) -> do
@@ -1255,8 +1270,9 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  modify $ \s -> s{stInsId = insId + 1}
                  return $ \f -> do
                    x <- f
-                   return [ mknode "w:ins"
-                              (("w:id", show insId) : changeAuthorDate) x]
+                   return [Elem $
+                           mknode "w:ins"
+                             (("w:id", show insId) : changeAuthorDate) x]
                else return id
   delmod <- if "deletion" `elem` classes
                then do
@@ -1265,8 +1281,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  modify $ \s -> s{stDelId = delId + 1}
                  return $ \f -> local (\env->env{envInDel=True}) $ do
                    x <- f
-                   return [mknode "w:del"
-                           (("w:id", show delId) : changeAuthorDate) x]
+                   return [Elem $ mknode "w:del"
+                             (("w:id", show delId) : changeAuthorDate) x]
                else return id
   contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
                      $ inlinesToOpenXML opts ils
@@ -1294,9 +1310,9 @@ inlineToOpenXML' opts (SmallCaps lst) =
 inlineToOpenXML' opts (Strikeout lst) =
   withTextProp (mknode "w:strike" [] ())
   $ inlinesToOpenXML opts lst
-inlineToOpenXML' _ LineBreak = return [br]
+inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
-  | f == Format "openxml" = return [ x | Elem x <- parseXML str ]
+  | f == Format "openxml" = return (parseXML str)
   | otherwise             = do
       report $ InlineNotRendered il
       return []
@@ -1309,13 +1325,13 @@ inlineToOpenXML' opts (Math mathType str) = do
   when (mathType == DisplayMath) setFirstPara
   res <- (lift . lift) (convertMath writeOMML mathType str)
   case res of
-       Right r -> return [r]
+       Right r -> return [Elem r]
        Left il -> inlineToOpenXML' opts il
 inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Code attrs str) = do
   let alltoktypes = [KeywordTok ..]
   tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes
-  let unhighlighted = intercalate [br] `fmap`
+  let unhighlighted = (map Elem . intercalate [br]) `fmap`
                        mapM formattedString (T.lines str)
       formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok)
       toHlTok (toktype,tok) =
@@ -1328,7 +1344,7 @@ inlineToOpenXML' opts (Code attrs str) = do
           then unhighlighted
           else case highlight (writerSyntaxMap opts)
                       formatOpenXML attrs str of
-                    Right h  -> return h
+                    Right h  -> return (map Elem h)
                     Left msg -> do
                       unless (T.null msg) $ report $ CouldNotHighlight msg
                       unhighlighted
@@ -1351,14 +1367,14 @@ inlineToOpenXML' opts (Note bs) = do
                 $ insertNoteRef bs)
   let newnote = mknode "w:footnote" [("w:id", notenum)] contents
   modify $ \s -> s{ stFootnotes = newnote : notes }
-  return [ mknode "w:r" []
+  return [ Elem $ mknode "w:r" []
            [ mknode "w:rPr" [] footnoteStyle
            , mknode "w:footnoteReference" [("w:id", notenum)] () ] ]
 -- internal link:
 inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   return
-    [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
+    [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
 -- external link:
 inlineToOpenXML' opts (Link _ txt (src,_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
@@ -1370,7 +1386,7 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do
               modify $ \st -> st{ stExternalLinks =
                         M.insert (T.unpack src) i extlinks }
               return i
-  return [ mknode "w:hyperlink" [("r:id",id')] contents ]
+  return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
   pageWidth <- asks envPrintWidth
   imgs <- gets stImages
@@ -1434,7 +1450,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt
 
   wrapBookmark imgident =<< case stImage of
-    Just imgData -> return [generateImgElt imgData]
+    Just imgData -> return [Elem $ generateImgElt imgData]
     Nothing -> ( do --try
       (img, mt) <- P.fetchItem src
       ident <- ("rId"++) `fmap` getUniqueId
@@ -1462,7 +1478,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
          else do
            -- insert mime type to use in constructing [Content_Types].xml
            modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st }
-           return [generateImgElt imgData]
+           return [Elem $ generateImgElt imgData]
       )
       `catchError` ( \e -> do
         report $ CouldNotFetchResource src $ T.pack (show e)
@@ -1512,7 +1528,7 @@ withDirection x = do
                                     , envTextProperties = EnvProps textStyle textProps'
                                     }
 
-wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element]
+wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content]
 wrapBookmark "" contents = return contents
 wrapBookmark ident contents = do
   id' <- getUniqueId
@@ -1520,7 +1536,7 @@ wrapBookmark ident contents = do
                        [("w:id", id')
                        ,("w:name", T.unpack $ toBookmarkName ident)] ()
       bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
-  return $ bookmarkStart : contents ++ [bookmarkEnd]
+  return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
 
 -- Word imposes a 40 character limit on bookmark names and requires
 -- that they begin with a letter.  So we just use a hash of the
-- 
cgit v1.2.3


From 00031fc809117cb436397aba83a41ca1d4056f61 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 13 Dec 2020 14:09:59 +0100
Subject: Docx writer: keep raw openxml strings verbatim.

Closes: #6933
---
 src/Text/Pandoc/Writers/Docx.hs     |   7 +++++--
 test/Tests/Writers/Docx.hs          |  10 ++++++++++
 test/docx/golden/raw-blocks.docx    | Bin 0 -> 9888 bytes
 test/docx/golden/raw-bookmarks.docx | Bin 0 -> 10023 bytes
 test/docx/raw-blocks.native         |   6 ++++++
 test/docx/raw-bookmarks.native      |   3 +++
 6 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 test/docx/golden/raw-blocks.docx
 create mode 100644 test/docx/golden/raw-bookmarks.docx
 create mode 100644 test/docx/raw-blocks.native
 create mode 100644 test/docx/raw-bookmarks.native

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 97048e980..0174a8501 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -978,7 +978,9 @@ blockToOpenXML' opts (Para lst)
       return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)]
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
-  | format == Format "openxml" = return (parseXML str)
+  | format == Format "openxml" = return [
+        Text (CData CDataRaw (T.unpack str) Nothing)
+      ]
   | otherwise                  = do
       report $ BlockNotRendered b
       return []
@@ -1312,7 +1314,8 @@ inlineToOpenXML' opts (Strikeout lst) =
   $ inlinesToOpenXML opts lst
 inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
-  | f == Format "openxml" = return (parseXML str)
+  | f == Format "openxml" = return
+                            [Text (CData CDataRaw (T.unpack str) Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 8f051b4b7..66a5c3d36 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -128,6 +128,16 @@ tests = [ testGroup "inlines"
             def
             "docx/codeblock.native"
             "docx/golden/codeblock.docx"
+          , docxTest
+            "raw OOXML blocks"
+            def
+            "docx/raw-blocks.native"
+            "docx/golden/raw-blocks.docx"
+          , docxTest
+            "raw bookmark markers"
+            def
+            "docx/raw-bookmarks.native"
+            "docx/golden/raw-bookmarks.docx"
           ]
         , testGroup "track changes"
           [ docxTest
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
new file mode 100644
index 000000000..ae7f8f1f0
Binary files /dev/null and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
new file mode 100644
index 000000000..5e433b736
Binary files /dev/null and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/raw-blocks.native b/test/docx/raw-blocks.native
new file mode 100644
index 000000000..d7f985bc3
--- /dev/null
+++ b/test/docx/raw-blocks.native
@@ -0,0 +1,6 @@
+[Para [Str "Cell",Space,Str "compartments"]
+,RawBlock (Format "openxml") "<w:tbl>\n<w:tblPr>\n<w:tblW w:w=\"2000\" w:type=\"pct\"/>\n<w:tblBorders>\n<w:top w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:start w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:bottom w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:end w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n</w:tblBorders>\n</w:tblPr>\n<w:tblGrid>\n<w:gridCol w:w=\"1871\" />\n<w:gridCol w:w=\"1872\" />\n</w:tblGrid>\n<w:tr>\n<w:tc>"
+,Para [Str "Ribosome"]
+,RawBlock (Format "openxml") "</w:tc>\n<w:tc>"
+,Para [Str "Lysosome"]
+,RawBlock (Format "openxml") "</w:tc>\n</w:tr>\n</w:tbl>"]
diff --git a/test/docx/raw-bookmarks.native b/test/docx/raw-bookmarks.native
new file mode 100644
index 000000000..1e76655d6
--- /dev/null
+++ b/test/docx/raw-bookmarks.native
@@ -0,0 +1,3 @@
+[Para [Str "Manual",Space,Str "endnotes."]
+,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "<w:bookmarkStart w:id=\"0\" w:name=\"Aliquam\"/>",Str "Aliquam",Space,Str "posuere."]
+,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "<w:bookmarkEnd w:id=\"0\"/>",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]]
-- 
cgit v1.2.3


From ef62b70646c501f8b9fd7f94ede4da9a79b63fa0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 13 Dec 2020 10:18:29 -0800
Subject: ImageSize: use JuicyPixels to determine png size.

---
 src/Text/Pandoc/ImageSize.hs | 50 +++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index fc9e1854b..8517d07c2 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -54,6 +54,8 @@ import Control.Monad.Except
 import Control.Applicative
 import Data.Maybe (fromMaybe)
 import qualified Data.Attoparsec.ByteString.Char8 as A
+import qualified Codec.Picture.Metadata as Metadata
+import Codec.Picture (decodeImageWithMetadata)
 
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
@@ -122,7 +124,7 @@ findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img
 imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize
 imageSize opts img = checkDpi <$>
   case imageType img of
-       Just Png  -> mbToEither "could not determine PNG size" $ pngSize img
+       Just Png  -> pngSize img
        Just Gif  -> mbToEither "could not determine GIF size" $ gifSize img
        Just Jpeg -> jpegSize img
        Just Svg  -> mbToEither "could not determine SVG size" $ svgSize opts img
@@ -300,36 +302,22 @@ pPdfSize = do
             , dpiY = 72 }
    ) <|> pPdfSize
 
-pngSize :: ByteString -> Maybe ImageSize
-pngSize img = do
-  let (h, rest) = B.splitAt 8 img
-  guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
-          h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
-  let (i, rest') = B.splitAt 4 $ B.drop 4 rest
-  guard $ i == "MHDR" || i == "IHDR"
-  let (sizes, rest'') = B.splitAt 8 rest'
-  (x,y) <- case map fromIntegral $unpack sizes of
-                ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
-                    (shift w1 24 + shift w2 16 + shift w3 8 + w4,
-                     shift h1 24 + shift h2 16 + shift h3 8 + h4)
-                _ -> Nothing -- "PNG parse error"
-  (dpix, dpiy) <- findpHYs rest''
-  return ImageSize { pxX  = x, pxY = y, dpiX = dpix, dpiY = dpiy }
-
-findpHYs :: ByteString -> Maybe (Integer, Integer)
-findpHYs x
-  | B.null x || "IDAT" `B.isPrefixOf` x = return (72,72)
-  | "pHYs" `B.isPrefixOf` x =
-    case map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x of
-         [x1,x2,x3,x4,y1,y2,y3,y4,u] -> do
-           let factor = if u == 1 -- dots per meter
-                          then \z -> z * 254 `div` 10000
-                          else const 72
-           return
-              ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4,
-                factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 )
-         _ -> mzero
-  | otherwise = findpHYs $ B.drop 1 x  -- read another byte
+pngSize :: ByteString -> Either T.Text ImageSize
+pngSize img =
+  case decodeImageWithMetadata img of
+    Left e -> Left (T.pack e)
+    Right (_, meta) -> do
+      pxx <- maybe (Left "Could not determine width") Right $
+                   Metadata.lookup Metadata.Width meta
+      pxy <- maybe (Left "Could not determine height") Right $
+                  Metadata.lookup Metadata.Height meta
+      dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta
+      dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta
+      return $ ImageSize
+                { pxX = fromIntegral pxx
+                , pxY = fromIntegral pxy
+                , dpiX = fromIntegral dpix
+                , dpiY = fromIntegral dpiy }
 
 gifSize :: ByteString -> Maybe ImageSize
 gifSize img = do
-- 
cgit v1.2.3


From c3aa90b57a8b9bdafb588098b115280044531bba Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 13 Dec 2020 10:31:57 -0800
Subject: ImageSize: use JuicyPixels to extract size...

...for png, jpeg, gif, instead of doing our own binary parsing.
See #6936.
---
 src/Text/Pandoc/ImageSize.hs | 313 ++-----------------------------------------
 1 file changed, 8 insertions(+), 305 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 8517d07c2..665a94690 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
 {-# LANGUAGE ViewPatterns      #-}
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {- |
@@ -32,14 +32,12 @@ module Text.Pandoc.ImageSize ( ImageType(..)
                              , showInPixel
                              , showFl
                              ) where
-import Data.ByteString (ByteString, unpack)
+import Data.ByteString (ByteString)
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy as BL
+import Data.Binary.Get
 import Data.Char (isDigit)
 import Control.Monad
-import Data.Bits
-import Data.Binary
-import Data.Binary.Get
 import Text.Pandoc.Shared (safeRead)
 import Data.Default (Default)
 import Numeric (showFFloat)
@@ -47,12 +45,9 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
-import qualified Data.Map as M
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as TE
-import Control.Monad.Except
 import Control.Applicative
-import Data.Maybe (fromMaybe)
 import qualified Data.Attoparsec.ByteString.Char8 as A
 import qualified Codec.Picture.Metadata as Metadata
 import Codec.Picture (decodeImageWithMetadata)
@@ -124,9 +119,9 @@ findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img
 imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize
 imageSize opts img = checkDpi <$>
   case imageType img of
-       Just Png  -> pngSize img
-       Just Gif  -> mbToEither "could not determine GIF size" $ gifSize img
-       Just Jpeg -> jpegSize img
+       Just Png  -> getSize img
+       Just Gif  -> getSize img
+       Just Jpeg -> getSize img
        Just Svg  -> mbToEither "could not determine SVG size" $ svgSize opts img
        Just Eps  -> mbToEither "could not determine EPS size" $ epsSize img
        Just Pdf  -> mbToEither "could not determine PDF size" $ pdfSize img
@@ -141,9 +136,6 @@ imageSize opts img = checkDpi <$>
               , dpiY = if dpiY size == 0 then 72 else dpiY size }
 
 
-defaultSize :: (Integer, Integer)
-defaultSize = (72, 72)
-
 sizeInPixels :: ImageSize -> (Integer, Integer)
 sizeInPixels s = (pxX s, pxY s)
 
@@ -302,8 +294,8 @@ pPdfSize = do
             , dpiY = 72 }
    ) <|> pPdfSize
 
-pngSize :: ByteString -> Either T.Text ImageSize
-pngSize img =
+getSize :: ByteString -> Either T.Text ImageSize
+getSize img =
   case decodeImageWithMetadata img of
     Left e -> Left (T.pack e)
     Right (_, meta) -> do
@@ -319,18 +311,6 @@ pngSize img =
                 , dpiX = fromIntegral dpix
                 , dpiY = fromIntegral dpiy }
 
-gifSize :: ByteString -> Maybe ImageSize
-gifSize img = do
-  let (h, rest) = B.splitAt 6 img
-  guard $ h == "GIF87a" || h == "GIF89a"
-  case map fromIntegral $ unpack $ B.take 4 rest of
-       [w2,w1,h2,h1] -> return ImageSize {
-                          pxX  = shift w1 8 + w2,
-                          pxY  = shift h1 8 + h2,
-                          dpiX = 72,
-                          dpiY = 72
-                          }
-       _             -> Nothing -- "GIF parse error"
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
@@ -378,280 +358,3 @@ emfSize img =
     case parseheader . BL.fromStrict $ img of
       Left _ -> Nothing
       Right (_, _, size) -> Just size
-
-
-jpegSize :: ByteString -> Either T.Text ImageSize
-jpegSize img =
-  let (hdr, rest) = B.splitAt 4 img
-  in if B.length rest < 14
-        then Left "unable to determine JPEG size"
-        else case hdr of
-               "\xff\xd8\xff\xe0" -> jfifSize rest
-               "\xff\xd8\xff\xe1" -> exifSize rest
-               _                  -> Left "unable to determine JPEG size"
-
-jfifSize :: ByteString -> Either T.Text ImageSize
-jfifSize rest =
-  case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of
-    [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] ->
-      let factor = case dpiDensity of
-                        1 -> id
-                        2 -> \x -> x * 254 `div` 10
-                        _ -> const 72
-          dpix = factor (shift dpix1 8 + dpix2)
-          dpiy = factor (shift dpiy1 8 + dpiy2)
-      in case findJfifSize rest of
-         Left msg    -> Left msg
-         Right (w,h) -> Right ImageSize { pxX = w
-                                        , pxY = h
-                                        , dpiX = dpix
-                                        , dpiY = dpiy }
-    _ -> Left "unable to determine JFIF size"
-
-findJfifSize :: ByteString -> Either T.Text (Integer,Integer)
-findJfifSize bs =
-  let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
-  in case B.uncons bs' of
-       Just (c,bs'') | c >= '\xc0' && c <= '\xc3' ->
-         case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
-              [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2)
-              _             -> Left "JFIF parse error"
-       Just (_,bs'') ->
-         case map fromIntegral $ unpack $ B.take 2 bs'' of
-              [c1,c2] ->
-                let len = shift c1 8 + c2
-                -- skip variables
-                in  findJfifSize $ B.drop len bs''
-              _       -> Left "JFIF parse error"
-       Nothing -> Left "Did not find JFIF length record"
-
-runGet' :: Get (Either T.Text a) -> BL.ByteString -> Either T.Text a
-runGet' p bl =
-#if MIN_VERSION_binary(0,7,0)
-  case runGetOrFail p bl of
-       Left (_,_,msg) -> Left $ T.pack msg
-       Right (_,_,x)  -> x
-#else
-  runGet p bl
-#endif
-
-exifSize :: ByteString -> Either T.Text ImageSize
-exifSize bs = runGet' header bl
-  where bl = BL.fromChunks [bs]
-        header = runExceptT $ exifHeader bl
--- NOTE:  It would be nicer to do
--- runGet ((Just <$> exifHeader) <|> return Nothing)
--- which would prevent pandoc from raising an error when an exif header can't
--- be parsed.  But we only get an Alternative instance for Get in binary 0.6,
--- and binary 0.5 ships with ghc 7.6.
-
-exifHeader :: BL.ByteString -> ExceptT T.Text Get ImageSize
-exifHeader hdr = do
-  _app1DataSize <- lift getWord16be
-  exifHdr <- lift getWord32be
-  unless (exifHdr == 0x45786966) $ throwError "Did not find exif header"
-  zeros <- lift getWord16be
-  unless (zeros == 0) $ throwError "Expected zeros after exif header"
-  -- beginning of tiff header -- we read whole thing to use
-  -- in getting data from offsets:
-  let tiffHeader = BL.drop 8 hdr
-  byteAlign <- lift getWord16be
-  let bigEndian = byteAlign == 0x4d4d
-  let (getWord16, getWord32, getWord64) =
-        if bigEndian
-           then (getWord16be, getWord32be, getWord64be)
-           else (getWord16le, getWord32le, getWord64le)
-  let getRational = do
-        num <- getWord32
-        den <- getWord32
-        return $ fromIntegral num / fromIntegral den
-  tagmark <- lift getWord16
-  unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check"
-  ifdOffset <- lift getWord32
-  lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF
-  numentries <- lift  getWord16
-  let ifdEntry :: ExceptT T.Text Get (TagType, DataFormat)
-      ifdEntry = do
-       tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable
-                <$> lift getWord16
-       dataFormat <- lift getWord16
-       numComponents <- lift getWord32
-       (fmt, bytesPerComponent) <-
-             case dataFormat of
-                  1  -> return (UnsignedByte <$> getWord8, 1)
-                  2  -> return (AsciiString <$>
-                                getLazyByteString
-                                (fromIntegral numComponents), 1)
-                  3  -> return (UnsignedShort <$> getWord16, 2)
-                  4  -> return (UnsignedLong <$> getWord32, 4)
-                  5  -> return (UnsignedRational <$> getRational, 8)
-                  6  -> return (SignedByte <$> getWord8, 1)
-                  7  -> return (Undefined <$> getLazyByteString
-                                (fromIntegral numComponents), 1)
-                  8  -> return (SignedShort <$> getWord16, 2)
-                  9  -> return (SignedLong <$> getWord32, 4)
-                  10 -> return (SignedRational <$> getRational, 8)
-                  11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4)
-                  12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8)
-                  _  -> throwError $ "Unknown data format " <> T.pack (show dataFormat)
-       let totalBytes = fromIntegral $ numComponents * bytesPerComponent
-       payload <- if totalBytes <= 4 -- data is right here
-                     then lift $ fmt <* skip (4 - totalBytes)
-                     else do  -- get data from offset
-                          offs <- lift getWord32
-                          let bytesAtOffset =
-                                 BL.take (fromIntegral totalBytes)
-                                 $ BL.drop (fromIntegral offs) tiffHeader
-                          case runGet' (Right <$> fmt) bytesAtOffset of
-                               Left msg -> throwError msg
-                               Right x  -> return x
-       return (tag, payload)
-  entries <- replicateM (fromIntegral numentries) ifdEntry
-  subentries <- case lookup ExifOffset entries of
-                      Just (UnsignedLong offset') -> do
-                        pos <- lift bytesRead
-                        lift $ skip (fromIntegral offset' - (fromIntegral pos - 8))
-                        numsubentries <- lift getWord16
-                        replicateM (fromIntegral numsubentries) ifdEntry
-                      _ -> return []
-  let allentries = entries ++ subentries
-  (wdth, hght) <- case (lookup ExifImageWidth allentries,
-                        lookup ExifImageHeight allentries) of
-                       (Just (UnsignedLong w), Just (UnsignedLong h)) ->
-                         return (fromIntegral w, fromIntegral h)
-                       _ -> return defaultSize
-                            -- we return a default width and height when
-                            -- the exif header doesn't contain these
-  let resfactor = case lookup ResolutionUnit allentries of
-                        Just (UnsignedShort 1) -> 100 / 254
-                        _ -> 1
-  let xres = case lookup XResolution allentries of
-                  Just (UnsignedRational x) -> floor (x * resfactor)
-                  _ -> 72
-  let yres = case lookup YResolution allentries of
-                  Just (UnsignedRational y) -> floor (y * resfactor)
-                  _ -> 72
-  return ImageSize{
-                    pxX  = wdth
-                  , pxY  = hght
-                  , dpiX = xres
-                  , dpiY = yres }
-
-data DataFormat = UnsignedByte Word8
-                | AsciiString BL.ByteString
-                | UnsignedShort Word16
-                | UnsignedLong Word32
-                | UnsignedRational Rational
-                | SignedByte Word8
-                | Undefined BL.ByteString
-                | SignedShort Word16
-                | SignedLong Word32
-                | SignedRational Rational
-                | SingleFloat Word32
-                | DoubleFloat Word64
-                deriving (Show)
-
-data TagType = ImageDescription
-             | Make
-             | Model
-             | Orientation
-             | XResolution
-             | YResolution
-             | ResolutionUnit
-             | Software
-             | DateTime
-             | WhitePoint
-             | PrimaryChromaticities
-             | YCbCrCoefficients
-             | YCbCrPositioning
-             | ReferenceBlackWhite
-             | Copyright
-             | ExifOffset
-             | ExposureTime
-             | FNumber
-             | ExposureProgram
-             | ISOSpeedRatings
-             | ExifVersion
-             | DateTimeOriginal
-             | DateTimeDigitized
-             | ComponentConfiguration
-             | CompressedBitsPerPixel
-             | ShutterSpeedValue
-             | ApertureValue
-             | BrightnessValue
-             | ExposureBiasValue
-             | MaxApertureValue
-             | SubjectDistance
-             | MeteringMode
-             | LightSource
-             | Flash
-             | FocalLength
-             | MakerNote
-             | UserComment
-             | FlashPixVersion
-             | ColorSpace
-             | ExifImageWidth
-             | ExifImageHeight
-             | RelatedSoundFile
-             | ExifInteroperabilityOffset
-             | FocalPlaneXResolution
-             | FocalPlaneYResolution
-             | FocalPlaneResolutionUnit
-             | SensingMethod
-             | FileSource
-             | SceneType
-             | UnknownTagType
-             deriving (Show, Eq, Ord)
-
-tagTypeTable :: M.Map Word16 TagType
-tagTypeTable = M.fromList
-  [ (0x010e, ImageDescription)
-  , (0x010f, Make)
-  , (0x0110, Model)
-  , (0x0112, Orientation)
-  , (0x011a, XResolution)
-  , (0x011b, YResolution)
-  , (0x0128, ResolutionUnit)
-  , (0x0131, Software)
-  , (0x0132, DateTime)
-  , (0x013e, WhitePoint)
-  , (0x013f, PrimaryChromaticities)
-  , (0x0211, YCbCrCoefficients)
-  , (0x0213, YCbCrPositioning)
-  , (0x0214, ReferenceBlackWhite)
-  , (0x8298, Copyright)
-  , (0x8769, ExifOffset)
-  , (0x829a, ExposureTime)
-  , (0x829d, FNumber)
-  , (0x8822, ExposureProgram)
-  , (0x8827, ISOSpeedRatings)
-  , (0x9000, ExifVersion)
-  , (0x9003, DateTimeOriginal)
-  , (0x9004, DateTimeDigitized)
-  , (0x9101, ComponentConfiguration)
-  , (0x9102, CompressedBitsPerPixel)
-  , (0x9201, ShutterSpeedValue)
-  , (0x9202, ApertureValue)
-  , (0x9203, BrightnessValue)
-  , (0x9204, ExposureBiasValue)
-  , (0x9205, MaxApertureValue)
-  , (0x9206, SubjectDistance)
-  , (0x9207, MeteringMode)
-  , (0x9208, LightSource)
-  , (0x9209, Flash)
-  , (0x920a, FocalLength)
-  , (0x927c, MakerNote)
-  , (0x9286, UserComment)
-  , (0xa000, FlashPixVersion)
-  , (0xa001, ColorSpace)
-  , (0xa002, ExifImageWidth)
-  , (0xa003, ExifImageHeight)
-  , (0xa004, RelatedSoundFile)
-  , (0xa005, ExifInteroperabilityOffset)
-  , (0xa20e, FocalPlaneXResolution)
-  , (0xa20f, FocalPlaneYResolution)
-  , (0xa210, FocalPlaneResolutionUnit)
-  , (0xa217, SensingMethod)
-  , (0xa300, FileSource)
-  , (0xa301, SceneType)
-  ]
-- 
cgit v1.2.3


From c43e2dc0f4ce639c8d0bd156afea2fd07ffc91ff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 13 Dec 2020 10:45:03 -0800
Subject: RST writer: better image handling.

- An image alone in its paragraph (but not a figure) is now
  rendered as an independent image, with an `alt` attribute
  if a description is supplied.
- An inline image that is not alone in its paragraph will
  be rendered, as before, using a substitution.
  Such an image cannot have a "center", "left", or
  "right" alignment, so the classes `align-center`,
  `align-left`, or `align-right` are ignored.
  However, `align-top`, `align-middle`, `align-bottom`
  will generate a corresponding `align` attribute.

Closes #6948.
---
 src/Text/Pandoc/Writers/RST.hs | 30 +++++++++++++++++++++---------
 test/command/4420.md           | 11 -----------
 test/command/6948.md           | 31 +++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 20 deletions(-)
 create mode 100644 test/command/6948.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 43bf382b7..8beeef46a 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -143,9 +143,12 @@ pictToRST (label, (attr, src, _, mbtarget)) = do
   let (_, cls, _) = attr
       classes = case cls of
                    []               -> empty
-                   ["align-right"]  -> ":align: right"
-                   ["align-left"]   -> ":align: left"
-                   ["align-center"] -> ":align: center"
+                   ["align-top"]    -> ":align: top"
+                   ["align-middle"] -> ":align: middle"
+                   ["align-bottom"] -> ":align: bottom"
+                   ["align-center"] -> empty
+                   ["align-right"]  -> empty
+                   ["align-left"]   -> empty
                    _                -> ":class: " <> literal (T.unwords cls)
   return $ nowrap
          $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims)
@@ -215,19 +218,28 @@ blockToRST (Div (ident,classes,_kvs) bs) = do
            nest 3 contents $$
            blankline
 blockToRST (Plain inlines) = inlineListToRST inlines
--- title beginning with fig: indicates that the image is a figure
-blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do
-  capt <- inlineListToRST txt
+blockToRST (Para [Image attr txt (src, rawtit)]) = do
+  description <- inlineListToRST txt
   dims <- imageDimsToRST attr
-  let fig = "figure:: " <> literal src
-      alt = ":alt: " <> if T.null tit then capt else literal tit
+  -- title beginning with fig: indicates that the image is a figure
+  let (isfig, tit) = case T.stripPrefix "fig:" rawtit of
+                          Nothing   -> (False, rawtit)
+                          Just tit' -> (True, tit')
+  let fig | isfig = "figure:: " <> literal src
+          | otherwise = "image:: " <> literal src
+      alt | isfig = ":alt: " <> if T.null tit then description else literal tit
+          | null txt = empty
+          | otherwise = ":alt: " <> description
+      capt | isfig = description
+           | otherwise = empty
       (_,cls,_) = attr
       classes = case cls of
                    []               -> empty
                    ["align-right"]  -> ":align: right"
                    ["align-left"]   -> ":align: left"
                    ["align-center"] -> ":align: center"
-                   _                -> ":figclass: " <> literal (T.unwords cls)
+                   _ | isfig        -> ":figclass: " <> literal (T.unwords cls)
+                     | otherwise    -> ":class: " <> literal (T.unwords cls)
   return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline
 blockToRST (Para inlines)
   | LineBreak `elem` inlines =
diff --git a/test/command/4420.md b/test/command/4420.md
index 3e7008935..36d697234 100644
--- a/test/command/4420.md
+++ b/test/command/4420.md
@@ -1,14 +1,3 @@
-```
-% pandoc -f native -t rst
-[Image ("",["align-right"],[("width","100px")]) [Str "image"] ("foo.png","")]
-^D
-|image|
-
-.. |image| image:: foo.png
-   :align: right
-   :width: 100px
-```
-
 ```
 % pandoc -f native -t rst
 [Para [Image ("",["align-right"],[("width","100px")]) [Str "image"] ("foo.png","fig:test")]]
diff --git a/test/command/6948.md b/test/command/6948.md
new file mode 100644
index 000000000..8803aebe9
--- /dev/null
+++ b/test/command/6948.md
@@ -0,0 +1,31 @@
+Treat an image alone in its paragraph (but not a figure)
+as an independent image:
+```
+% pandoc -f native -t rst
+[Para [Image ("",["align-center"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]]
+^D
+.. image:: https://pandoc.org/diagram.jpg
+   :alt: https://pandoc.org/diagram.jpg
+   :align: center
+```
+
+Here we just omit the center attribute as it's not valid:
+```
+% pandoc -f native -t rst
+[Para [Str "hi",Space,Image ("",["align-center"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]]
+^D
+hi |https://pandoc.org/diagram.jpg|
+
+.. |https://pandoc.org/diagram.jpg| image:: https://pandoc.org/diagram.jpg
+```
+
+But we can use top, middle, or bottom alignment:
+```
+% pandoc -f native -t rst
+[Para [Str "hi",Space,Image ("",["align-top"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]]
+^D
+hi |https://pandoc.org/diagram.jpg|
+
+.. |https://pandoc.org/diagram.jpg| image:: https://pandoc.org/diagram.jpg
+   :align: top
+```
-- 
cgit v1.2.3


From 39153ea6e2a77604e3302c5591fbb65496c3457a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 14 Dec 2020 09:39:07 -0800
Subject: ImageSize: use exif width and height when available.

After the move to JuicyPixels, we were getting incorrect
width and heigh information for some images (see #6936, test-3.jpg).

The correct information was encoded in Exif tags that
JuicyPixels seemed to ignore. So we check these first
before looking at the Width and Height identified by
JuicyPixels.

Closes #6936.
---
 src/Text/Pandoc/ImageSize.hs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 665a94690..9ce5c668d 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -50,6 +50,7 @@ import qualified Data.Text.Encoding as TE
 import Control.Applicative
 import qualified Data.Attoparsec.ByteString.Char8 as A
 import qualified Codec.Picture.Metadata as Metadata
+import qualified Codec.Picture.Metadata.Exif as Exif
 import Codec.Picture (decodeImageWithMetadata)
 
 -- quick and dirty functions to get image sizes
@@ -300,8 +301,16 @@ getSize img =
     Left e -> Left (T.pack e)
     Right (_, meta) -> do
       pxx <- maybe (Left "Could not determine width") Right $
+                   -- first look for exif image width, then width
+                   (Metadata.lookup
+                     (Metadata.Exif (Exif.TagUnknown 0xA002)) meta >>=
+                       exifDataToWord) <|>
                    Metadata.lookup Metadata.Width meta
       pxy <- maybe (Left "Could not determine height") Right $
+                  -- first look for exif image height, then height
+                  (Metadata.lookup
+                     (Metadata.Exif (Exif.TagUnknown 0xA003)) meta >>=
+                       exifDataToWord) <|>
                   Metadata.lookup Metadata.Height meta
       dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta
       dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta
@@ -310,6 +319,10 @@ getSize img =
                 , pxY = fromIntegral pxy
                 , dpiX = fromIntegral dpix
                 , dpiY = fromIntegral dpiy }
+ where
+  exifDataToWord (Exif.ExifLong x) = Just $ fromIntegral x
+  exifDataToWord (Exif.ExifShort x) = Just $ fromIntegral x
+  exifDataToWord _ = Nothing
 
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
-- 
cgit v1.2.3


From 7d799bfcda749e4a3ad6fcae59ac5ccb80b77ffd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 15 Dec 2020 08:51:43 -0800
Subject: Allow both inline and external references to be used

with `--citeproc`.  This fixes a regression, since pandoc-citeproc
allowed these to be combined.

Closes #6951.
---
 src/Text/Pandoc/Citeproc.hs | 29 +++++++++++++++--------------
 test/command/6951.md        | 18 ++++++++++++++++++
 2 files changed, 33 insertions(+), 14 deletions(-)
 create mode 100644 test/command/6951.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 0de2882ae..82d1dc32f 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -105,20 +105,21 @@ processCitations (Pandoc meta bs) = do
   let idpred = if "*" `Set.member` nocites
                   then const True
                   else (`Set.member` citeIds)
-  refs <- map (linkifyVariables . legacyDateRanges) <$>
-          case lookupMeta "references" meta of
-            Just (MetaList rs) -> return $ mapMaybe metaValueToReference rs
-            _                  ->
-              case lookupMeta "bibliography" meta of
-                 Just (MetaList xs) ->
-                   mconcat <$>
-                     mapM (getRefsFromBib locale idpred)
-                       (mapMaybe metaValueToText xs)
-                 Just x ->
-                   case metaValueToText x of
-                     Just fp -> getRefsFromBib locale idpred fp
-                     Nothing -> return []
-                 Nothing -> return []
+  let inlineRefs = case lookupMeta "references" meta of
+                    Just (MetaList rs) -> mapMaybe metaValueToReference rs
+                    _                  -> []
+  externalRefs <- case lookupMeta "bibliography" meta of
+                    Just (MetaList xs) ->
+                      mconcat <$>
+                        mapM (getRefsFromBib locale idpred)
+                          (mapMaybe metaValueToText xs)
+                    Just x ->
+                      case metaValueToText x of
+                        Just fp -> getRefsFromBib locale idpred fp
+                        Nothing -> return []
+                    Nothing -> return []
+  let refs = map (linkifyVariables . legacyDateRanges)
+                 (inlineRefs ++ externalRefs)
   let otherIdsMap = foldr (\ref m ->
                              case T.words . extractText <$>
                                   M.lookup "other-ids"
diff --git a/test/command/6951.md b/test/command/6951.md
new file mode 100644
index 000000000..4a6301d80
--- /dev/null
+++ b/test/command/6951.md
@@ -0,0 +1,18 @@
+```
+% pandoc --citeproc -t plain --bibliography command/biblio.bib
+---
+references:
+- id: foo
+  title: Crazy
+  type: book
+...
+
+[@foo; @item1]
+
+^D
+(Crazy, n.d.; Doe 2005)
+
+Crazy. n.d.
+
+Doe, John. 2005. First Book. Cambridge: Cambridge University Press.
+```
-- 
cgit v1.2.3


From 87033b285669901505f6da8b2969911a29c448fd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 15 Dec 2020 09:09:51 -0800
Subject: Use fetchItem to get external bibliography.

This means that:

- a URL may be provided, and pandoc will fetch the resource.
- Pandoc will search the resource path for the bibliography
  if it is not found relative to the working directory.

Closes #6940.
---
 src/Text/Pandoc/Citeproc.hs | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 82d1dc32f..770d571a6 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -190,24 +190,23 @@ insertSpace ils =
 
 getRefsFromBib :: PandocMonad m
                => Locale -> (Text -> Bool) -> Text -> m [Reference Inlines]
-getRefsFromBib locale idpred t = do
-  let fp = T.unpack t
-  raw <- readFileStrict fp
-  case formatFromExtension fp of
+getRefsFromBib locale idpred fp = do
+  (raw, _) <- fetchItem fp
+  case formatFromExtension (T.unpack fp) of
     Just f -> getRefs locale f idpred (Just fp) raw
     Nothing -> throwError $ PandocAppError $
-                 "Could not determine bibliography format for " <> t
+                 "Could not determine bibliography format for " <> fp
 
 getRefs :: PandocMonad m
         => Locale
         -> BibFormat
         -> (Text -> Bool)
-        -> Maybe FilePath
+        -> Maybe Text
         -> ByteString
         -> m [Reference Inlines]
 getRefs locale format idpred mbfp raw = do
   let err' = throwError .
-             PandocBibliographyError (maybe mempty T.pack mbfp)
+             PandocBibliographyError (fromMaybe mempty mbfp)
   case format of
     Format_bibtex ->
       either (err' . tshow) return .
@@ -222,7 +221,7 @@ getRefs locale format idpred mbfp raw = do
     Format_yaml -> do
       rs <- yamlToRefs idpred
               def{ readerExtensions = pandocExtensions }
-              mbfp
+              (T.unpack <$> mbfp)
               (L.fromStrict raw)
       return $ mapMaybe metaValueToReference rs
 
-- 
cgit v1.2.3


From b4b4e32307499aafa2e6c1c713ee41f1c787ea76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 15 Dec 2020 23:45:34 -0800
Subject: Properly handle boolean values in writing YAML metadata.

(Markdown writer.)
This requires doctemplates >= 0.9.
Closes #6388.
---
 pandoc.cabal                        |  4 ++--
 src/Text/Pandoc/Writers/Markdown.hs |  2 ++
 src/Text/Pandoc/Writers/Shared.hs   |  3 +--
 stack.yaml                          |  5 ++++-
 test/command/6388.md                | 16 ++++++++++++++++
 5 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 test/command/6388.md

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 6483690b9..9244d25cd 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -410,7 +410,7 @@ library
                  deepseq               >= 1.3      && < 1.5,
                  directory             >= 1.2.3    && < 1.4,
                  doclayout             >= 0.3      && < 0.4,
-                 doctemplates          >= 0.8.2    && < 0.9,
+                 doctemplates          >= 0.9      && < 0.10,
                  emojis                >= 0.1      && < 0.2,
                  exceptions            >= 0.8      && < 0.11,
                  file-embed            >= 0.0      && < 0.1,
@@ -767,7 +767,7 @@ test-suite test-pandoc
                   bytestring        >= 0.9     && < 0.12,
                   containers        >= 0.4.2.1 && < 0.7,
                   directory         >= 1.2.3   && < 1.4,
-                  doctemplates      >= 0.8.2   && < 0.9,
+                  doctemplates      >= 0.9     && < 0.10,
                   exceptions        >= 0.8     && < 0.11,
                   executable-path   >= 0.0     && < 0.1,
                   filepath          >= 1.1     && < 1.5,
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 5eb47b261..c349fd713 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -174,6 +174,8 @@ valToYaml :: Val Text -> Doc Text
 valToYaml (ListVal xs) =
   vcat $ map (\v -> hang 2 "- " (valToYaml v)) xs
 valToYaml (MapVal c) = contextToYaml c
+valToYaml (BoolVal True) = "true"
+valToYaml (BoolVal False) = "false"
 valToYaml (SimpleVal x)
   | isEmpty x = empty
   | otherwise =
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index b399afbf3..129e45e9d 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -109,8 +109,7 @@ metaValueToVal blockWriter inlineWriter (MetaMap metamap) =
   MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap
 metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$>
   mapM (metaValueToVal blockWriter inlineWriter) xs
-metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true"
-metaValueToVal _ _ (MetaBool False) = return NullVal
+metaValueToVal _ _ (MetaBool b) = return $ BoolVal b
 metaValueToVal _ inlineWriter (MetaString s) =
    SimpleVal <$> inlineWriter (Builder.toList (Builder.text s))
 metaValueToVal blockWriter _ (MetaBlocks bs) = SimpleVal <$> blockWriter bs
diff --git a/stack.yaml b/stack.yaml
index ef0bc7a83..7c1bde8ae 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -16,7 +16,10 @@ extra-deps:
 - commonmark-0.1.1.2
 - commonmark-extensions-0.2.0.4
 - commonmark-pandoc-0.2.0.1
-- doctemplates-0.8.3
+# - doctemplates-0.8.3
+- doctemplates:
+  git: https://github.com/jgm/doctemplates.git
+  commit: 6cb8d09264f7c1f4b59ac67ff98b1a664a85e36a
 - citeproc-0.2.0.1
 # - citeproc:
 #   git: https://github.com/jgm/citeproc.git
diff --git a/test/command/6388.md b/test/command/6388.md
new file mode 100644
index 000000000..29a9156d7
--- /dev/null
+++ b/test/command/6388.md
@@ -0,0 +1,16 @@
+```
+% pandoc -t markdown -s
+---
+nvalue: false
+value: true
+---
+
+text
+^D
+---
+nvalue: false
+value: true
+---
+
+text
+```
-- 
cgit v1.2.3


From 57241e201a637d32b5d37b197463a4b96825a6ea Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 16 Dec 2020 07:56:07 -0800
Subject: Support Lua marshalling of doctemplates BoolVal.

This updates T.P.Lua.Marshaling.Context for doctemplates >= 0.9.
---
 src/Text/Pandoc/Lua/Marshaling/Context.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs
index effcc675d..c0e7aef60 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Context.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs
@@ -22,6 +22,7 @@ instance (TemplateTarget a, Pushable a) => Pushable (Context a) where
 
 instance (TemplateTarget a, Pushable a) => Pushable (Val a) where
   push NullVal = Lua.push ()
+  push (BoolVal b) = Lua.push b
   push (MapVal ctx) = Lua.push ctx
   push (ListVal xs) = Lua.push xs
   push (SimpleVal d) = Lua.push $ render Nothing d
-- 
cgit v1.2.3


From 914cf0b602f0585cfd1b401b956c8edfd129d47d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 16 Dec 2020 15:37:40 -0800
Subject: Fix citeproc regression with duplicate references.

- Use dev version of citeproc, which handles duplicate
  ids better, preferring the last one in the list
  and discarding the rest.
- Ensure that inline citations take priority over external
  ones.

See jgm/citeproc#36.

This restores the behavior of pandoc-citeproc.
---
 cabal.project               | 8 ++++----
 pandoc.cabal                | 2 +-
 src/Text/Pandoc/Citeproc.hs | 3 ++-
 stack.yaml                  | 8 ++++----
 4 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/cabal.project b/cabal.project
index 091c78ce6..55d4abaa2 100644
--- a/cabal.project
+++ b/cabal.project
@@ -9,8 +9,8 @@ source-repository-package
   location: https://github.com/jgm/doctemplates.git
   tag: 7ccbf7df16edbc7c5d835d955b242c61fd4e6601
 
--- source-repository-package
---   type: git
---   location: https://github.com/jgm/citeproc
---   tag:  42b1d154b02435229acbe98ae0f17d01b757ee93
+source-repository-package
+  type: git
+  location: https://github.com/jgm/citeproc
+  tag: a8193fe375fa2354049bf9a967ba3bad4b1ba053
 
diff --git a/pandoc.cabal b/pandoc.cabal
index 9244d25cd..d21d5e03c 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -400,7 +400,7 @@ library
                  blaze-markup          >= 0.8      && < 0.9,
                  bytestring            >= 0.9      && < 0.12,
                  case-insensitive      >= 1.2      && < 1.3,
-                 citeproc              >= 0.2.0.1  && < 0.3,
+                 citeproc              >= 0.3      && < 0.4,
                  commonmark            >= 0.1.1.2  && < 0.2,
                  commonmark-extensions >= 0.2.0.4  && < 0.3,
                  commonmark-pandoc     >= 0.2      && < 0.3,
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 770d571a6..ca1ab9f96 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -119,7 +119,8 @@ processCitations (Pandoc meta bs) = do
                         Nothing -> return []
                     Nothing -> return []
   let refs = map (linkifyVariables . legacyDateRanges)
-                 (inlineRefs ++ externalRefs)
+                 (externalRefs ++ inlineRefs)
+                 -- note that inlineRefs can override externalRefs
   let otherIdsMap = foldr (\ref m ->
                              case T.words . extractText <$>
                                   M.lookup "other-ids"
diff --git a/stack.yaml b/stack.yaml
index 518c24b63..92a80c4ff 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -20,10 +20,10 @@ extra-deps:
 - doctemplates:
   git: https://github.com/jgm/doctemplates.git
   commit: 7ccbf7df16edbc7c5d835d955b242c61fd4e6601
-- citeproc-0.2.0.1
-# - citeproc:
-#   git: https://github.com/jgm/citeproc.git
-#   commit: 42b1d154b02435229acbe98ae0f17d01b757ee93
+# - citeproc-0.2.0.1
+- citeproc:
+  git: https://github.com/jgm/citeproc.git
+  commit: a8193fe375fa2354049bf9a967ba3bad4b1ba053
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-16.23
-- 
cgit v1.2.3


From 8f402beab922646d4c428b40a75fe4d140ab5e9e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 21 Dec 2020 03:04:54 +0100
Subject: LaTeX writer: support colspans and rowspans in tables. (#6950)

Note that the multirow package is needed for rowspans.
It is included in the latex template under a variable,
so that it won't be used unless needed for a table.
---
 data/templates/default.latex              |   3 +
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/Writers/AnnotatedTable.hs |  23 +++
 src/Text/Pandoc/Writers/LaTeX.hs          |   6 +-
 src/Text/Pandoc/Writers/LaTeX/Table.hs    | 301 ++++++++++++++++++++----------
 src/Text/Pandoc/Writers/LaTeX/Types.hs    |   2 +
 test/Tests/Old.hs                         |   2 +-
 test/tables/nordics.latex                 |  26 +++
 test/tables/planets.latex                 |  36 ++++
 test/tables/students.latex                |  23 +++
 10 files changed, 326 insertions(+), 97 deletions(-)
 create mode 100644 test/tables/nordics.latex
 create mode 100644 test/tables/planets.latex
 create mode 100644 test/tables/students.latex

(limited to 'src')

diff --git a/data/templates/default.latex b/data/templates/default.latex
index 169661582..c567278e3 100644
--- a/data/templates/default.latex
+++ b/data/templates/default.latex
@@ -255,6 +255,9 @@ $highlighting-macros$
 $endif$
 $if(tables)$
 \usepackage{longtable,booktabs,array}
+$if(multirow)$
+\usepackage{multirow}
+$endif$
 \usepackage{calc} % for calculating minipage widths
 $if(beamer)$
 \usepackage{caption}
diff --git a/pandoc.cabal b/pandoc.cabal
index 8a4faa3e1..4a893d672 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -299,6 +299,7 @@ extra-source-files:
                  test/tables.xwiki
                  test/tables/*.html4
                  test/tables/*.html5
+                 test/tables/*.latex
                  test/tables/*.native
                  test/tables/*.jats_archiving
                  test/testsuite.txt
diff --git a/src/Text/Pandoc/Writers/AnnotatedTable.hs b/src/Text/Pandoc/Writers/AnnotatedTable.hs
index 48c9d61f2..3f69496a9 100644
--- a/src/Text/Pandoc/Writers/AnnotatedTable.hs
+++ b/src/Text/Pandoc/Writers/AnnotatedTable.hs
@@ -1,8 +1,12 @@
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE TupleSections #-}
+{-# LANGUAGE UndecidableInstances #-}
 
 {- |
    Module      : Text.Pandoc.Writers.AnnotatedTable
@@ -45,6 +49,7 @@ import           Data.Generics                  ( Data
 import           Data.List.NonEmpty             ( NonEmpty(..) )
 import           GHC.Generics                   ( Generic )
 import qualified Text.Pandoc.Builder           as B
+import           Text.Pandoc.Walk               ( Walkable (..) )
 
 -- | An annotated table type, corresponding to the Pandoc 'B.Table'
 -- constructor and the HTML @\<table\>@ element. It records the data
@@ -298,3 +303,21 @@ fromBodyRow (BodyRow attr _ rh rb) =
 
 fromCell :: Cell -> B.Cell
 fromCell (Cell _ _ c) = c
+
+--
+-- Instances
+--
+instance Walkable a B.Cell => Walkable a Cell where
+  walkM f (Cell colspecs colnum cell) =
+    Cell colspecs colnum <$> walkM f cell
+  query f (Cell _colspecs _colnum cell) = query f cell
+
+instance Walkable a B.Cell => Walkable a HeaderRow where
+  walkM f (HeaderRow attr rownum cells) =
+    HeaderRow attr rownum <$> walkM f cells
+  query f (HeaderRow _attr _rownum cells) = query f cells
+
+instance Walkable a B.Cell => Walkable a TableHead where
+  walkM f (TableHead attr rows) =
+    TableHead attr <$> walkM f rows
+  query f (TableHead _attr rows) = query f rows
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6a4e3ba69..2281290c0 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -48,6 +48,7 @@ import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import qualified Data.Text.Normalize as Normalize
+import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
 -- | Convert Pandoc to LaTeX.
 writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -154,6 +155,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                   defField "documentclass" documentClass $
                   defField "verbatim-in-note" (stVerbInNote st) $
                   defField "tables" (stTable st) $
+                  defField "multirow" (stMultiRow st) $
                   defField "strikeout" (stStrikeout st) $
                   defField "url" (stUrl st) $
                   defField "numbersections" (writerNumberSections options) $
@@ -716,9 +718,9 @@ blockToLaTeX (Header level (id',classes,_) lst) = do
   hdr <- sectionHeader classes id' level lst
   modify $ \s -> s{stInHeading = False}
   return hdr
-blockToLaTeX (Table _ blkCapt specs thead tbodies tfoot) =
+blockToLaTeX (Table attr blkCapt specs thead tbodies tfoot) =
   tableToLaTeX inlineListToLaTeX blockListToLaTeX
-               blkCapt specs thead tbodies tfoot
+               (Ann.toTable attr blkCapt specs thead tbodies tfoot)
 
 blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text)
 blockListToLaTeX lst =
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 5299efa37..9dd66c8a3 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.Writers.LaTeX.Table
   ) where
 import Control.Monad.State.Strict
 import Data.List (intersperse)
+import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -23,102 +24,196 @@ import Text.Pandoc.Definition
 import Text.DocLayout
   ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest
   , text, vcat, ($$) )
-import Text.Pandoc.Shared (splitBy)
+import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow)
 import Text.Pandoc.Walk (walk)
-import Text.Pandoc.Writers.Shared (toLegacyTable)
 import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
 import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
 import Text.Pandoc.Writers.LaTeX.Types
-  ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stNotes, stTable) )
+  ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow
+                    , stNotes, stTable) )
 import Text.Printf (printf)
+import qualified Text.Pandoc.Builder as B
+import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
 tableToLaTeX :: PandocMonad m
              => ([Inline] -> LW m (Doc Text))
              -> ([Block]  -> LW m (Doc Text))
-             -> Caption -> [ColSpec] -> TableHead -> [TableBody] -> TableFoot
+             -> Ann.Table
              -> LW m (Doc Text)
-tableToLaTeX inlnsToLaTeX blksToLaTeX blkCapt specs thead tbody tfoot = do
-  let (caption, aligns, widths, heads, rows) =
-        toLegacyTable blkCapt specs thead tbody tfoot
-  -- simple tables have to have simple cells:
-  let isSimple = \case
-        [Plain _] -> True
-        [Para  _] -> True
-        []        -> True
-        _         -> False
-  let widths' = if all (== 0) widths && not (all (all isSimple) rows)
-                   then replicate (length aligns)
-                          (1 / fromIntegral (length aligns))
-                   else widths
-  (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
-  let toHeaders hs = do contents <- tableRowToLaTeX blksToLaTeX True aligns hs
-                        return ("\\toprule" $$ contents $$ "\\midrule")
+tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do
+  let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl
+  CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption
   let removeNote (Note _) = Span ("", [], []) []
       removeNote x        = x
-  firsthead <- if isEmpty captionText || all null heads
-                  then return empty
-                  else ($$ text "\\endfirsthead") <$> toHeaders heads
-  head' <- if all null heads
-              then return "\\toprule"
-              -- avoid duplicate notes in head and firsthead:
-              else toHeaders (if isEmpty firsthead
-                                 then heads
-                                 else walk removeNote heads)
-  let capt = if isEmpty captionText
-                then empty
-                else "\\caption" <> captForLof <> braces captionText
-                         <> "\\tabularnewline"
-  rows' <- mapM (tableRowToLaTeX blksToLaTeX False aligns) rows
-  let colDescriptors =
-         (if all (== 0) widths'
-             then hcat . map literal
-             else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $
-         zipWith (toColDescriptor (length widths')) aligns widths'
+  firsthead <- if isEmpty capt || isEmptyHead thead
+               then return empty
+               else ($$ text "\\endfirsthead") <$>
+                    headToLaTeX blksToLaTeX thead
+  head' <- if isEmptyHead thead
+           then return "\\toprule"
+           -- avoid duplicate notes in head and firsthead:
+           else headToLaTeX blksToLaTeX
+                (if isEmpty firsthead
+                 then thead
+                 else walk removeNote thead)
+  rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $
+                mconcat (map bodyRows tbodies) <> footRows tfoot
   modify $ \s -> s{ stTable = True }
   notes <- notesToLaTeX <$> gets stNotes
-  return $ "\\begin{longtable}[]" <>
-              braces ("@{}" <> colDescriptors <> "@{}")
-              -- the @{} removes extra space at beginning and end
-         $$ capt
-         $$ firsthead
-         $$ head'
-         $$ "\\endhead"
-         $$ vcat rows'
-         $$ "\\bottomrule"
-         $$ "\\end{longtable}"
-         $$ captNotes
-         $$ notes
-
-toColDescriptor :: Int -> Alignment -> Double -> Text
-toColDescriptor _numcols align 0 =
-  case align of
-         AlignLeft    -> "l"
-         AlignRight   -> "r"
-         AlignCenter  -> "c"
-         AlignDefault -> "l"
-toColDescriptor numcols align width =
-  T.pack $ printf
-     ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
-     align'
-     ((numcols - 1) * 2)
-     width
- where
-   align' :: String
-   align' = case align of
-              AlignLeft    -> "\\raggedright"
-              AlignRight   -> "\\raggedleft"
-              AlignCenter  -> "\\centering"
-              AlignDefault -> "\\raggedright"
-
-tableRowToLaTeX :: PandocMonad m
-                => ([Block] -> LW m (Doc Text))
-                -> Bool
-                -> [Alignment]
-                -> [[Block]]
-                -> LW m (Doc Text)
-tableRowToLaTeX blockListToLaTeX header aligns cols = do
-  cells <- mapM (tableCellToLaTeX blockListToLaTeX header) $ zip aligns cols
-  return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace"
+  return
+    $  "\\begin{longtable}[]" <>
+          braces ("@{}" <> colDescriptors tbl <> "@{}")
+          -- the @{} removes extra space at beginning and end
+    $$ capt
+    $$ firsthead
+    $$ head'
+    $$ "\\endhead"
+    $$ vcat rows'
+    $$ "\\bottomrule"
+    $$ "\\end{longtable}"
+    $$ captNotes
+    $$ notes
+
+-- | Creates column descriptors for the table.
+colDescriptors :: Ann.Table -> Doc Text
+colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) =
+  let (aligns, widths) = unzip specs
+
+      defaultWidthsOnly = all (== ColWidthDefault) widths
+      isSimpleTable = all (all isSimpleCell) $ mconcat
+                      [ headRows thead
+                      , concatMap bodyRows tbodies
+                      , footRows tfoot
+                      ]
+
+      relativeWidths = if defaultWidthsOnly
+                       then replicate (length specs)
+                            (1 / fromIntegral (length specs))
+                       else map toRelWidth widths
+  in if defaultWidthsOnly && isSimpleTable
+     then hcat $ map (literal . colAlign) aligns
+     else (cr <>) . nest 2 . vcat . map literal $
+          zipWith (toColDescriptor (length specs))
+                  aligns
+                  relativeWidths
+  where
+    toColDescriptor :: Int -> Alignment -> Double -> Text
+    toColDescriptor numcols align width =
+      T.pack $ printf
+      ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}"
+      (T.unpack (alignCommand align))
+      ((numcols - 1) * 2)
+      width
+
+    isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) =
+      case blocks of
+        [Para _]  -> True
+        [Plain _] -> True
+        []        -> True
+        _         -> False
+
+    toRelWidth ColWidthDefault = 0
+    toRelWidth (ColWidth w)    = w
+
+alignCommand :: Alignment -> Text
+alignCommand = \case
+  AlignLeft    -> "\\raggedright"
+  AlignRight   -> "\\raggedleft"
+  AlignCenter  -> "\\centering"
+  AlignDefault -> "\\raggedright"
+
+colAlign :: Alignment -> Text
+colAlign = \case
+  AlignLeft    -> "l"
+  AlignRight   -> "r"
+  AlignCenter  -> "c"
+  AlignDefault -> "l"
+
+data CaptionDocs =
+  CaptionDocs
+  { captionCommand :: Doc Text
+  , captionNotes :: Doc Text
+  }
+
+captionToLaTeX :: PandocMonad m
+               => ([Inline] -> LW m (Doc Text))
+               -> Caption
+               -> LW m CaptionDocs
+captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do
+  let caption = blocksToInlines longCaption
+  (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption
+  return $ CaptionDocs
+    { captionNotes = captNotes
+    , captionCommand = if isEmpty captionText
+                       then empty
+                       else "\\caption" <> captForLof <>
+                            braces captionText <> "\\tabularnewline"
+    }
+
+type BlocksWriter m = [Block] -> LW m (Doc Text)
+
+headToLaTeX :: PandocMonad m
+            => BlocksWriter m
+            -> Ann.TableHead
+            -> LW m (Doc Text)
+headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do
+  rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells)
+                       headerRows
+  return ("\\toprule" $$ vcat rowsContents $$ "\\midrule")
+
+-- | Converts a row of table cells into a LaTeX row.
+rowToLaTeX :: PandocMonad m
+           => BlocksWriter m
+           -> CellType
+           -> [Ann.Cell]
+           -> LW m (Doc Text)
+rowToLaTeX blocksWriter celltype row = do
+  cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row)
+  return $ hsep (intersperse "&" cellsDocs) <> " \\\\ \\addlinespace"
+
+-- | Pads row with empty cells to adjust for rowspans above this row.
+fillRow :: [Ann.Cell] -> [Ann.Cell]
+fillRow = go 0
+  where
+    go _ [] = []
+    go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) =
+      let (Cell _ _ _ (ColSpan colspan) _) = cell
+      in map mkEmptyCell [n .. colnum - 1] ++
+         acell : go (colnum + colspan) cells
+
+    mkEmptyCell :: Int -> Ann.Cell
+    mkEmptyCell colnum =
+      Ann.Cell ((AlignDefault, ColWidthDefault):|[])
+               (Ann.ColNumber colnum)
+               B.emptyCell
+
+isEmptyHead :: Ann.TableHead -> Bool
+isEmptyHead (Ann.TableHead _attr []) = True
+isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows
+
+-- | Gets all cells in a header row.
+headerRowCells :: Ann.HeaderRow -> [Ann.Cell]
+headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells
+
+-- | Gets all cells in a body row.
+bodyRowCells :: Ann.BodyRow -> [Ann.Cell]
+bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells
+
+-- | Gets a list of rows of the table body, where a row is a simple
+-- list of cells.
+bodyRows :: Ann.TableBody -> [[Ann.Cell]]
+bodyRows (Ann.TableBody _attr _rowheads headerRows rows) =
+  map headerRowCells headerRows <> map bodyRowCells rows
+
+-- | Gets a list of rows of the table head, where a row is a simple
+-- list of cells.
+headRows :: Ann.TableHead -> [[Ann.Cell]]
+headRows (Ann.TableHead _attr rows) = map headerRowCells rows
+
+-- | Gets a list of rows from the foot, where a row is a simple list
+-- of cells.
+footRows :: Ann.TableFoot -> [[Ann.Cell]]
+footRows (Ann.TableFoot _attr rows) = map headerRowCells rows
 
 -- For simple latex tables (without minipages or parboxes),
 -- we need to go to some lengths to get line breaks working:
@@ -144,11 +239,14 @@ displayMathToInline :: Inline -> Inline
 displayMathToInline (Math DisplayMath x) = Math InlineMath x
 displayMathToInline x                    = x
 
-tableCellToLaTeX :: PandocMonad m
-                 => ([Block] -> LW m (Doc Text))
-                 -> Bool -> (Alignment, [Block])
-                 -> LW m (Doc Text)
-tableCellToLaTeX blockListToLaTeX header (align, blocks) = do
+cellToLaTeX :: PandocMonad m
+            => BlocksWriter m
+            -> CellType
+            -> Ann.Cell
+            -> LW m (Doc Text)
+cellToLaTeX blockListToLaTeX celltype annotatedCell = do
+  let (Ann.Cell _specs _colnum cell) = annotatedCell
+  let (Cell _attr align rowspan colspan blocks) = cell
   beamer <- gets stBeamer
   externalNotes <- gets stExternalNotes
   inMinipage <- gets stInMinipage
@@ -167,15 +265,30 @@ tableCellToLaTeX blockListToLaTeX header (align, blocks) = do
          modify $ \st -> st{ stInMinipage = True }
          cellContents <- blockListToLaTeX blocks
          modify $ \st -> st{ stInMinipage = inMinipage }
-         let valign = text $ if header then "[b]" else "[t]"
-         let halign = case align of
-                        AlignLeft    -> "\\raggedright"
-                        AlignRight   -> "\\raggedleft"
-                        AlignCenter  -> "\\centering"
-                        AlignDefault -> "\\raggedright"
+         let valign = text $ case celltype of
+                               HeaderCell -> "[b]"
+                               BodyCell   -> "[t]"
+         let halign = literal $ alignCommand align
          return $ "\\begin{minipage}" <> valign <>
                   braces "\\linewidth" <> halign <> cr <>
                   cellContents <> cr <>
                   "\\end{minipage}"
   modify $ \st -> st{ stExternalNotes = externalNotes }
-  return result
+  when (rowspan /= RowSpan 1) $
+    modify (\st -> st{ stMultiRow = True })
+  let inMultiColumn x = case colspan of
+                          (ColSpan 1) -> x
+                          (ColSpan n) -> "\\multicolumn"
+                                         <> braces (literal (tshow n))
+                                         <> braces (literal $ colAlign align)
+                                         <> braces x
+  let inMultiRow x = case rowspan of
+                       (RowSpan 1) -> x
+                       (RowSpan n) -> let nrows = literal (tshow n)
+                                      in "\\multirow" <> braces nrows
+                                         <> braces "*" <> braces x
+  return . inMultiColumn . inMultiRow $ result
+
+data CellType
+  = HeaderCell
+  | BodyCell
diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs
index a76388729..d598794ad 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs
@@ -31,6 +31,7 @@ data WriterState =
                                      --   be parameter
   , stVerbInNote    :: Bool          -- ^ true if document has verbatim text in note
   , stTable         :: Bool          -- ^ true if document has a table
+  , stMultiRow      :: Bool          -- ^ true if document has multirow cells
   , stStrikeout     :: Bool          -- ^ true if document has strikeout
   , stUrl           :: Bool          -- ^ true if document has visible URL link
   , stGraphics      :: Bool          -- ^ true if document contains images
@@ -61,6 +62,7 @@ startingState options =
   , stOptions = options
   , stVerbInNote = False
   , stTable = False
+  , stMultiRow = False
   , stStrikeout = False
   , stUrl = False
   , stGraphics = False
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index aca2d05d0..cf0396d0a 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -58,7 +58,7 @@ tests pandocPath =
     ]
   , testGroup "latex"
     [ testGroup "writer"
-        (writerTests' "latex" ++ lhsWriterTests' "latex")
+        (extWriterTests' "latex" ++ lhsWriterTests' "latex")
     , testGroup "reader"
       [ test' "basic" ["-r", "latex+raw_tex", "-w", "native", "-s"]
         "latex-reader.latex" "latex-reader.native"
diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex
new file mode 100644
index 000000000..d1a0b4aed
--- /dev/null
+++ b/test/tables/nordics.latex
@@ -0,0 +1,26 @@
+\begin{longtable}[]{@{}
+  >{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}@{}}
+\caption{States belonging to the \emph{Nordics.}}\tabularnewline
+\toprule
+Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
+\vtop{\hbox{\strut Area}\hbox{\strut (in
+km\textsuperscript{2})}} \\ \addlinespace
+\midrule
+\endfirsthead
+\toprule
+Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
+\vtop{\hbox{\strut Area}\hbox{\strut (in
+km\textsuperscript{2})}} \\ \addlinespace
+\midrule
+\endhead
+Denmark & Copenhagen & 5,809,502 & 43,094 \\ \addlinespace
+Finland & Helsinki & 5,537,364 & 338,145 \\ \addlinespace
+Iceland & Reykjavik & 343,518 & 103,000 \\ \addlinespace
+Norway & Oslo & 5,372,191 & 323,802 \\ \addlinespace
+Sweden & Stockholm & 10,313,447 & 450,295 \\ \addlinespace
+Total & & 27,376,022 & 1,258,336 \\ \addlinespace
+\bottomrule
+\end{longtable}
diff --git a/test/tables/planets.latex b/test/tables/planets.latex
new file mode 100644
index 000000000..9457b6821
--- /dev/null
+++ b/test/tables/planets.latex
@@ -0,0 +1,36 @@
+\begin{longtable}[]{@{}cclrrrrrrrrl@{}}
+\caption{Data about the planets of our solar system.}\tabularnewline
+\toprule
+\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
+(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
+(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
+\midrule
+\endfirsthead
+\toprule
+\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
+(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
+(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
+\midrule
+\endhead
+\multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 &
+4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the
+Sun \\ \addlinespace
+& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0
+& \\ \addlinespace
+& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our
+world \\ \addlinespace
+& & Mars & 0.642 & 6,792 & 3933 & 3.7 & 24.7 & 227.9 & -65 & 2 & The red
+planet \\ \addlinespace
+\multirow{4}{*}{Jovian planets} & \multirow{2}{*}{Gas giants} & Jupiter & 1898
+& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest
+planet \\ \addlinespace
+& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62
+& \\ \addlinespace
+& \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 &
+2872.5 & -195 & 27 & \\ \addlinespace
+& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14
+& \\ \addlinespace
+\multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 &
+153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \addlinespace
+\bottomrule
+\end{longtable}
diff --git a/test/tables/students.latex b/test/tables/students.latex
new file mode 100644
index 000000000..680a539d3
--- /dev/null
+++ b/test/tables/students.latex
@@ -0,0 +1,23 @@
+\begin{longtable}[]{@{}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}}
+\caption{List of Students}\tabularnewline
+\toprule
+Student ID & Name \\ \addlinespace
+\midrule
+\endfirsthead
+\toprule
+Student ID & Name \\ \addlinespace
+\midrule
+\endhead
+\multicolumn{2}{l}{Computer Science} \\ \addlinespace
+3741255 & Jones, Martha \\ \addlinespace
+4077830 & Pierce, Benjamin \\ \addlinespace
+5151701 & Kirk, James \\ \addlinespace
+\multicolumn{2}{l}{Russian Literature} \\ \addlinespace
+3971244 & Nim, Victor \\ \addlinespace
+\multicolumn{2}{l}{Astrophysics} \\ \addlinespace
+4100332 & Petrov, Alexandra \\ \addlinespace
+4100332 & Toyota, Hiroko \\ \addlinespace
+\bottomrule
+\end{longtable}
-- 
cgit v1.2.3


From 9cbbf18fe18d740e12f564506079f3a2cb853be6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 20 Dec 2020 22:33:26 -0800
Subject: HTML writer: don't include p tags in CSL bibliography entries.

Fixes a regression in 2.11.3.
Closes #6966
---
 src/Text/Pandoc/Writers/HTML.hs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 76f17f77a..9f9a1013c 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -778,12 +778,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
       classes' = case slideVariant of
         NoSlides -> classes
         _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes
+  let paraToPlain (Para ils) = Plain ils
+      paraToPlain x          = x
+  let bs' = if "csl-entry" `elem` classes'
+               then walk paraToPlain bs
+               else bs
   contents <- if "columns" `elem` classes'
                  then -- we don't use blockListToHtml because it inserts
                       -- a newline between the column divs, which throws
                       -- off widths! see #4028
-                      mconcat <$> mapM (blockToHtml opts) bs
-                 else blockListToHtml opts' bs
+                      mconcat <$> mapM (blockToHtml opts) bs'
+                 else blockListToHtml opts' bs'
   let contents' = nl opts >> contents >> nl opts
   let (divtag, classes'') = if html5 && "section" `elem` classes'
                             then (H5.section, filter (/= "section") classes')
-- 
cgit v1.2.3


From 47f435276a62b3eddac496de6a7e365631d57197 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 24 Dec 2020 09:56:20 -0800
Subject: Citeproc: fix handling of empty URL variables (`DOI`, etc.).

The `linkifyVariables` function was changing these to links
which then got treated as non-empty by citeproc, leading
to wrong results (e.g. ignoring nonempty URL when empty DOI is present).

Addresses part 2 of jgm/citeproc#41.
---
 src/Text/Pandoc/Citeproc.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index ca1ab9f96..e87ddcbcd 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -507,7 +507,9 @@ linkifyVariables ref =
                       x'' = if "://" `T.isInfixOf` x'
                                then x'
                                else pref <> x'
-                  in  FancyVal (B.link x'' "" (B.str x'))
+                  in  if T.null x'
+                         then x
+                         else FancyVal (B.link x'' "" (B.str x'))
 
 extractText :: Val Inlines -> Text
 extractText (TextVal x)  = x
-- 
cgit v1.2.3


From dcd89413f3d4412c7a17b0b2f1019ea592a35858 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 26 Dec 2020 22:51:22 +0100
Subject: Powerpoint writer: allow arbitrary OOXML in raw inline elements

The raw text is now included verbatim in the output. Previously is was parsed
into XML elements, which prevented the inclusion of partial XML snippets.
---
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 49 +++++++++++++++-------------
 test/pptx/raw_ooxml.native                   |  2 +-
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 603a84acc..8554db622 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -437,10 +437,10 @@ getContentShapeSize ns layout master
 getContentShapeSize _ _ _ = throwError $ PandocSomeError
                             "Attempted to find content shape size in non-layout"
 
-buildSpTree :: NameSpaces -> Element -> [Element] -> Element
+buildSpTree :: NameSpaces -> Element -> [Content] -> Element
 buildSpTree ns spTreeElem newShapes =
   emptySpTreeElem { elContent = newContent }
-  where newContent = elContent emptySpTreeElem <> map Elem newShapes
+  where newContent = elContent emptySpTreeElem <> newShapes
         emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) }
         fn :: Content -> Bool
         fn (Elem e) = isElem ns "p" "nvGrpSpPr" e ||
@@ -744,8 +744,8 @@ makePicElements layout picProps mInfo alt = do
     else return [picShape]
 
 
-paraElemToElements :: PandocMonad m => ParaElem -> P m [Element]
-paraElemToElements Break = return [mknode "a:br" [] ()]
+paraElemToElements :: PandocMonad m => ParaElem -> P m [Content]
+paraElemToElements Break = return [Elem $ mknode "a:br" [] ()]
 paraElemToElements (Run rpr s) = do
   sizeAttrs <- fontSizeAttributes rpr
   let attrs = sizeAttrs <>
@@ -801,19 +801,20 @@ paraElemToElements (Run rpr s) = do
   let codeContents =
         [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr]
   let propContents = linkProps <> colorContents <> codeContents
-  return [mknode "a:r" [] [ mknode "a:rPr" attrs propContents
-                          , mknode "a:t" [] $ T.unpack s
-                          ]]
+  return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents
+                                 , mknode "a:t" [] $ T.unpack s
+                                 ]]
 paraElemToElements (MathElem mathType texStr) = do
   isInSpkrNotes <- asks envInSpeakerNotes
   if isInSpkrNotes
     then paraElemToElements $ Run def $ unTeXString texStr
     else do res <- convertMath writeOMML mathType (unTeXString texStr)
             case res of
-              Right r -> return [mknode "a14:m" [] $ addMathInfo r]
+              Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r]
               Left (Str s) -> paraElemToElements (Run def s)
               Left _       -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
-paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str ]
+paraElemToElements (RawOOXMLParaElem str) = return
+  [Text (CData CDataRaw (T.unpack str) Nothing)]
 
 
 -- This is a bit of a kludge -- really requires adding an option to
@@ -875,8 +876,9 @@ paragraphToElement par = do
                  [mknode "a:buAutoNum" (autoNumAttrs attrs') ()]
                Nothing -> [mknode "a:buNone" [] ()]
             )
-  paras <- concat <$> mapM paraElemToElements (paraElems par)
-  return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras
+  paras <- mapM paraElemToElements (paraElems par)
+  return $ mknode "a:p" [] $
+    [Elem $ mknode "a:pPr" attrs props] <> concat paras
 
 shapeToElement :: PandocMonad m => Element -> Shape -> P m Element
 shapeToElement layout (TextBox paras)
@@ -896,21 +898,22 @@ shapeToElement layout (TextBox paras)
 -- GraphicFrame and Pic should never reach this.
 shapeToElement _ _ = return $ mknode "p:sp" [] ()
 
-shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element]
+shapeToElements :: PandocMonad m => Element -> Shape -> P m [Content]
 shapeToElements layout (Pic picProps fp alt) = do
   mInfo <- registerMedia fp alt
   case mInfoExt mInfo of
-    Just _ ->
+    Just _ -> map Elem <$>
       makePicElements layout picProps mInfo alt
     Nothing -> shapeToElements layout $ TextBox [Paragraph def alt]
-shapeToElements layout (GraphicFrame tbls cptn) =
+shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$>
   graphicFrameToElements layout tbls cptn
-shapeToElements _ (RawOOXMLShape str) = return [ x | Elem x <- parseXML str ]
+shapeToElements _ (RawOOXMLShape str) = return
+  [Text (CData CDataRaw (T.unpack str) Nothing)]
 shapeToElements layout shp = do
   element <- shapeToElement layout shp
-  return [element]
+  return [Elem element]
 
-shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element]
+shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Content]
 shapesToElements layout shps =
  concat <$> mapM (shapeToElements layout) shps
 
@@ -1083,7 +1086,7 @@ contentToElement layout hdrShape shapes
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title"] hdrShape
-      let hdrShapeElements = [element | not (null hdrShape)]
+      let hdrShapeElements = [Elem element | not (null hdrShape)]
       contentElements <- local
                          (\env -> env {envContentType = NormalContent})
                          (shapesToElements layout shapes)
@@ -1096,7 +1099,7 @@ twoColumnToElement layout hdrShape shapesL shapesR
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title"] hdrShape
-      let hdrShapeElements = [element | not (null hdrShape)]
+      let hdrShapeElements = [Elem element | not (null hdrShape)]
       contentElementsL <- local
                           (\env -> env {envContentType =TwoColumnLeftContent})
                           (shapesToElements layout shapesL)
@@ -1105,7 +1108,8 @@ twoColumnToElement layout hdrShape shapesL shapesR
                           (shapesToElements layout shapesR)
       -- let contentElementsL' = map (setIdx ns "1") contentElementsL
       --     contentElementsR' = map (setIdx ns "2") contentElementsR
-      return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR)
+      return $ buildSpTree ns spTree $
+        hdrShapeElements <> contentElementsL <> contentElementsR
 twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] ()
 
 
@@ -1115,7 +1119,7 @@ titleToElement layout titleElems
   , Just cSld <- findChild (elemName ns "p" "cSld") layout
   , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do
       element <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems
-      let titleShapeElements = [element | not (null titleElems)]
+      let titleShapeElements = [Elem element | not (null titleElems)]
       return $ buildSpTree ns spTree titleShapeElements
 titleToElement _ _ = return $ mknode "p:sp" [] ()
 
@@ -1135,7 +1139,8 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems
       dateShapeElements <- if null dateElems
                            then return []
                            else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems]
-      return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements)
+      return . buildSpTree ns spTree . map Elem $
+        (titleShapeElements <> subtitleShapeElements <> dateShapeElements)
 metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] ()
 
 slideToElement :: PandocMonad m => Slide -> P m Element
diff --git a/test/pptx/raw_ooxml.native b/test/pptx/raw_ooxml.native
index aa86ad076..ae5bdd140 100644
--- a/test/pptx/raw_ooxml.native
+++ b/test/pptx/raw_ooxml.native
@@ -1,3 +1,3 @@
 [Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "text,",Space,Str "written",Space,Str "as",Space,Str "a",Space,Str "raw",Space,Str "inline:",Space,RawInline (Format "openxml") "<a:r><a:rPr /><a:t>Here are examples of </a:t></a:r><a:r><a:rPr i=\"1\" /><a:t>italics</a:t></a:r><a:r><a:rPr /><a:t>, </a:t></a:r><a:r><a:rPr b=\"1\" /><a:t>bold</a:t></a:r>"]
 ,HorizontalRule
-,RawBlock (Format "openxml") "      <p:sp>\n        <p:nvSpPr>\n          <p:cNvPr id=\"3\" name=\"Content Placeholder 2\"/>\n          <p:cNvSpPr>\n            <a:spLocks noGrp=\"1\"/>\n          </p:cNvSpPr>\n          <p:nvPr>\n            <p:ph idx=\"1\"/>\n          </p:nvPr>\n        </p:nvSpPr>\n        <p:spPr/>\n        <p:txBody>\n          <a:bodyPr/>\n          <a:lstStyle/>\n          <a:p>\n            <a:pPr lvl=\"1\"/>\n            <a:r>\n              <a:rPr/>\n              <a:t>Bulleted bulleted lists.</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"1\"/>\n            <a:r>\n              <a:rPr/>\n              <a:t>And go to arbitrary depth.</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"2\"/>\n            <a:r>\n              <a:rPr/>\n              <a:t>Like this</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"3\"/>\n            <a:r>\n              <a:rPr/>\n              <a:t>Or this</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"2\"/>\n            <a:r>\n              <a:rPr/>\n              <a:t>Back to here.</a:t>\n            </a:r>\n          </a:p>\n        </p:txBody>\n      </p:sp>"]
+,RawBlock (Format "openxml") "<p:sp>\n        <p:nvSpPr>\n          <p:cNvPr id=\"3\" name=\"Content Placeholder 2\" />\n          <p:cNvSpPr>\n            <a:spLocks noGrp=\"1\" />\n          </p:cNvSpPr>\n          <p:nvPr>\n            <p:ph idx=\"1\" />\n          </p:nvPr>\n        </p:nvSpPr>\n        <p:spPr />\n        <p:txBody>\n          <a:bodyPr />\n          <a:lstStyle />\n          <a:p>\n            <a:pPr lvl=\"1\" />\n            <a:r>\n              <a:rPr />\n              <a:t>Bulleted bulleted lists.</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"1\" />\n            <a:r>\n              <a:rPr />\n              <a:t>And go to arbitrary depth.</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"2\" />\n            <a:r>\n              <a:rPr />\n              <a:t>Like this</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"3\" />\n            <a:r>\n              <a:rPr />\n              <a:t>Or this</a:t>\n            </a:r>\n          </a:p>\n          <a:p>\n            <a:pPr lvl=\"2\" />\n            <a:r>\n              <a:rPr />\n              <a:t>Back to here.</a:t>\n            </a:r>\n          </a:p>\n        </p:txBody>\n      </p:sp>"]
-- 
cgit v1.2.3


From 668596cc89f8f6bddafd35fd031638bc0c416183 Mon Sep 17 00:00:00 2001
From: timo-a <timo-a@gmx.ch>
Date: Mon, 28 Dec 2020 03:42:28 +0100
Subject: Add support for writing nested tables to asciidoc (#6972)

Added field to WriterState that denotes the current nesting level for traversing tables.
Depending on the value of that field nested tables are recognized and written.
Asciidoc supports one level of nesting. If deeper tables are to be written, they are
omitted and a warning is issued.
---
 src/Text/Pandoc/Writers/AsciiDoc.hs           | 39 ++++++++++---
 test/command/nested-table-to-asciidoc-6942.md | 82 +++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 7 deletions(-)
 create mode 100644 test/command/nested-table-to-asciidoc-6942.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index e742577b6..0a312d1d1 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -37,6 +37,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Templates (renderTemplate)
 import Text.Pandoc.Writers.Shared
 
+
 data WriterState = WriterState { defListMarker       :: Text
                                , orderedListLevel    :: Int
                                , bulletListLevel     :: Int
@@ -45,6 +46,10 @@ data WriterState = WriterState { defListMarker       :: Text
                                , asciidoctorVariant  :: Bool
                                , inList              :: Bool
                                , hasMath             :: Bool
+                               -- |0 is no table
+                               -- 1 is top level table
+                               -- 2 is a table in a table
+                               , tableNestingLevel   :: Int
                                }
 
 defaultWriterState :: WriterState
@@ -56,6 +61,7 @@ defaultWriterState = WriterState { defListMarker      = "::"
                                  , asciidoctorVariant = False
                                  , inList             = False
                                  , hasMath            = False
+                                 , tableNestingLevel  = 0
                                  }
 
 -- | Convert Pandoc to AsciiDoc.
@@ -194,7 +200,7 @@ blockToAsciiDoc opts (BlockQuote blocks) = do
                      else contents
   let bar = text "____"
   return $ bar $$ chomp contents' $$ bar <> blankline
-blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do
+blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do
   let (caption, aligns, widths, headers, rows) =
         toLegacyTable blkCapt specs thead tbody tfoot
   caption' <- inlineListToAsciiDoc opts caption
@@ -236,23 +242,42 @@ blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do
              $ zipWith colspec aligns widths')
          <> text ","
          <> headerspec <> text "]"
+         
+  -- construct cells and recurse in case of nested tables
+  parentTableLevel <- gets tableNestingLevel
+  let currentNestingLevel = parentTableLevel + 1
+  
+  modify $ \st -> st{ tableNestingLevel = currentNestingLevel }
+  
+  let separator = text (if parentTableLevel == 0
+                          then "|"  -- top level separator
+                          else "!") -- nested separator
+
   let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
-                              return $ text "|" <> chomp d
+                              return $ separator <> chomp d
       makeCell [Para x]  = makeCell [Plain x]
-      makeCell []        = return $ text "|"
-      makeCell bs        = do d <- blockListToAsciiDoc opts bs
-                              return $ text "a|" $$ d
+      makeCell []        = return separator
+      makeCell bs        = if currentNestingLevel == 2
+                             then do
+                               --asciidoc only supports nesting once
+                               report $ BlockNotRendered block
+                               return separator
+                             else do
+                               d <- blockListToAsciiDoc opts bs
+                               return $ (text "a" <> separator) $$ d
+
   let makeRow cells = hsep `fmap` mapM makeCell cells
   rows' <- mapM makeRow rows
   head' <- makeRow headers
+  modify $ \st -> st{ tableNestingLevel = parentTableLevel }
   let head'' = if all null headers then empty else head'
   let colwidth = if writerWrapText opts == WrapAuto
                     then writerColumns opts
                     else 100000
   let maxwidth = maximum $ map offset (head':rows')
   let body = if maxwidth > colwidth then vsep rows' else vcat rows'
-  let border = text "|==="
-  return $
+  let border = separator <> text "==="
+  return $ 
     caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
 blockToAsciiDoc opts (BulletList items) = do
   inlist <- gets inList
diff --git a/test/command/nested-table-to-asciidoc-6942.md b/test/command/nested-table-to-asciidoc-6942.md
new file mode 100644
index 000000000..baf11fdf7
--- /dev/null
+++ b/test/command/nested-table-to-asciidoc-6942.md
@@ -0,0 +1,82 @@
+A table within a table should be convertet into a table within table
+
+```
+% pandoc -f html -t asciidoc
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<title> NestedTables </title>
+</head>
+<body>
+<table>
+ <tr>
+  <td >
+   <table>  <tr> <td> a1 </td> <td> a2 </td> </tr>  </table>
+  </td>
+  <td>b</td>
+ </tr>
+ <tr>
+   <td>c</td> <td>d </td>
+ </tr>
+</table>
+</body>
+</html>
+^D
+[width="100%",cols="50%,50%",]
+|===
+a|
+[cols=",",]
+!===
+!a1 !a2
+!===
+
+|b
+|c |d
+|===
+```
+
+A table within a table within a table cannot be converted because asciidoc only
+supports two levels of tables.
+The table on level 3 is thus converted to level 2 and a warning is produced
+```
+% pandoc -f html -t asciidoc --verbose
+<!doctype html>
+<html>
+<head>
+<meta charset="utf-8">
+<title> NestedTables </title>
+</head>
+<body>
+<table>
+ <tr>
+  <td>
+    <table>  <tr>
+	<td> a1 </td>
+	<td>
+	  <table>  <tr> <td> 1 </td> <td> 2 </td> </tr>  </table>
+	</td>
+    </tr>  </table>
+  </td>
+  <td>b</td>
+ </tr>
+ <tr>
+   <td>c</td> <td>d </td>
+ </tr>
+</table>
+</body>
+</html>
+^D
+[INFO] Not rendering Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.5),(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "a1"]],Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidthDefault),(AlignDefault,ColWidthDefault)] (TableHead ("",[],[]) []) [TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "1"]],Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "2"]]]]] (TableFoot ("",[],[]) [])]]]] (TableFoot ("",[],[]) [])
+[width="100%",cols="50%,50%",]
+|===
+a|
+[width="100%",cols="50%,50%",]
+!===
+!a1 !
+!===
+
+|b
+|c |d
+|===
+```
-- 
cgit v1.2.3


From 99e1b67b74ddcbf7ffc8add640493f73471cf50a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 27 Dec 2020 23:19:14 -0800
Subject: Use meta-description instead of description in templates.

Since this is an attribute value, we need to prepare it
in the writer.
---
 data/templates/default.html4    | 4 ++--
 data/templates/default.html5    | 4 ++--
 src/Text/Pandoc/Writers/HTML.hs | 3 +++
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/data/templates/default.html4 b/data/templates/default.html4
index deba57a7b..d54c48ee4 100644
--- a/data/templates/default.html4
+++ b/data/templates/default.html4
@@ -13,8 +13,8 @@ $endif$
 $if(keywords)$
   <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" />
 $endif$
-$if(description)$
-  <meta name="description" content="$description$" />
+$if(description-meta)$
+  <meta name="description" content="$description-meta$" />
 $endif$
   <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
   <style type="text/css">
diff --git a/data/templates/default.html5 b/data/templates/default.html5
index dd7dc3212..9699b8504 100644
--- a/data/templates/default.html5
+++ b/data/templates/default.html5
@@ -13,8 +13,8 @@ $endif$
 $if(keywords)$
   <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" />
 $endif$
-$if(description)$
-  <meta name="description" content="$description$" />
+$if(description-meta)$
+  <meta name="description" content="$description-meta$" />
 $endif$
   <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title>
   <style>
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 9f9a1013c..c93322953 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -254,6 +254,8 @@ pandocToHtml opts (Pandoc meta blocks) = do
   let stringifyHTML = escapeStringForXML . stringify
   let authsMeta = map stringifyHTML $ docAuthors meta
   let dateMeta  = stringifyHTML $ docDate meta
+  let descriptionMeta = escapeStringForXML $
+                          lookupMetaString "description" meta
   slideVariant <- gets stSlideVariant
   let sects = adjustNumbers opts $
               makeSections (writerNumberSections opts) Nothing $
@@ -352,6 +354,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
                   defField "author-meta" authsMeta .
                   maybe id (defField "date-meta")
                     (normalizeDate dateMeta) .
+                  defField "description-meta" descriptionMeta .
                   defField "pagetitle"
                       (stringifyHTML . docTitle $ meta) .
                   defField "idprefix" (writerIdentifierPrefix opts) .
-- 
cgit v1.2.3


From e837ed772efe14a1c05880ec9fbe025862bb303a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 28 Dec 2020 14:48:55 +0100
Subject: HTML reader: use renderTags' from Text.Pandoc.Shared.

The `renderTags'` function was duplicated when the reader used `Text` as
its string type. The duplication is no longer necessary.

A side effect of this change is that empty `<col>` elements are written
as self-closing tags in raw HTML blocks.
---
 src/Text/Pandoc/Readers/HTML.hs | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f8a17bb78..8e94a0812 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -61,8 +61,9 @@ import Text.Pandoc.Options (
     ReaderOptions (readerExtensions, readerStripComments),
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
-import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
-                           extractSpaces, htmlSpanLikeElements, safeRead, tshow)
+import Text.Pandoc.Shared (
+    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
@@ -1045,26 +1046,3 @@ canonicalizeUrl url = do
   return $ case (parseURIReference (T.unpack url), mbBaseHref) of
                 (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
                 _                   -> url
-
--- For now we need a special version here; the one in Shared has String type
-renderTags' :: [Tag Text] -> Text
-renderTags' = renderTagsOptions
-               renderOptions{ optMinimize = matchTags ["hr", "br", "img",
-                                                       "meta", "link"]
-                            , optRawTag   = matchTags ["script", "style"] }
-              where matchTags tags = flip elem tags . T.toLower
-
-
--- EPUB Specific
---
---
-{-
-
-types :: [(String, ([String], Int))]
-types =  -- Document divisions
-   map (\s -> (s, (["section", "body"], 0)))
-    ["volume", "part", "chapter", "division"]
-  <> -- Document section and components
-  [
-    ("abstract",  ([], 0))]
--}
-- 
cgit v1.2.3


From 55f9b59af181f52f20f3e9eb8f9df3046d3cb536 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Dec 2020 14:41:28 -0800
Subject: Docx writer: fix nested tables with captions.

Previously we got unreadable content, because docx seems
to want a `<w:p>` element (even an empty one) at the end of
every table cell.  Closes #6983.
---
 src/Text/Pandoc/Writers/Docx.hs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 0174a8501..13c4edb3c 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1011,12 +1011,14 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
                       $ blockToOpenXML opts (Para caption)
   let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
   -- Table cells require a <w:p> element, even an empty one!
-  -- Not in the spec but in Word 2007, 2010. See #4953.
+  -- Not in the spec but in Word 2007, 2010. See #4953. And
+  -- apparently the last element must be a <w:p>, see #6983.
   let cellToOpenXML (al, cell) = do
         es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
-        return $ if any (\e -> qName (elName e) == "p") (onlyElems es)
-           then es
-           else es ++ [Elem $ mknode "w:p" [] ()]
+        return $
+          case reverse (onlyElems es) of
+            e:_ | qName (elName e) == "p" -> es
+            _ -> es ++ [Elem $ mknode "w:p" [] ()]
   headers' <- mapM cellToOpenXML $ zip aligns headers
   rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
   let borderProps = Elem $ mknode "w:tcPr" []
-- 
cgit v1.2.3


From 3cd21c5f6e1b9ddb1e6ff35d418f6df25c56cb63 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Dec 2020 08:44:11 -0800
Subject: Improve fix to #6983.

If we have a paragraph then a bookmarkEnd, we don't need to
insert the empty paragraph (and in fact it alters the spacing).

Closes #6983.
---
 src/Text/Pandoc/Writers/Docx.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 13c4edb3c..65946ec88 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1017,7 +1017,9 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
         es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
         return $
           case reverse (onlyElems es) of
-            e:_ | qName (elName e) == "p" -> es
+            b:e:_ | qName (elName b) == "bookmarkEnd"
+                  , qName (elName e) == "p" -> es
+            e:_   | qName (elName e) == "p" -> es
             _ -> es ++ [Elem $ mknode "w:p" [] ()]
   headers' <- mapM cellToOpenXML $ zip aligns headers
   rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-- 
cgit v1.2.3


From 49286a25df24bd3bf6d5c20b47e819d83d6d8207 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 13:36:18 -0800
Subject: Ms writer: don't justify inside table cells.

---
 src/Text/Pandoc/Writers/Ms.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 96914d3c6..0fc333bc2 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -263,8 +263,10 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) =
                          return $ makeRow cols) rows
   setFirstPara
   return $ literal ".PP" $$ caption' $$
+           literal ".na" $$ -- we don't want justification in table cells
            literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$
-           colheadings' $$ vcat body $$ literal ".TE"
+           colheadings' $$ vcat body $$ literal ".TE" $$
+           literal ".ad"
 
 blockToMs opts (BulletList items) = do
   contents <- mapM (bulletListItemToMs opts) items
-- 
cgit v1.2.3


From 419190213a7e6b07e2ac66f47d53e9f023af757d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 15:38:48 -0800
Subject: Hlint fixes

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 2 +-
 src/Text/Pandoc/Writers/Docx.hs     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 474e4fac0..08083b177 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -564,7 +564,7 @@ getTarget = do
               _                               -> "html"
 
 atStart :: T2T ()
-atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
+atStart = getPosition >>= guard . (== 1) . sourceColumn
 
 ignoreSpacesCap :: T2T Text -> T2T Text
 ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 65946ec88..1818ed81d 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -28,6 +28,7 @@ import Data.Char (isSpace, isLetter)
 import Data.List (intercalate, isPrefixOf, isSuffixOf)
 import Data.String (fromString)
 import qualified Data.Map as M
+import Data.Either (fromRight)
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -1402,7 +1403,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     generateImgElt (ident, _, _, img) =
       let
         (xpt,ypt) = desiredSizeInPoints opts attr
-               (either (const def) id (imageSize opts img))
+               (fromRight def (imageSize opts img))
         -- 12700 emu = 1 pt
         (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700)
                                 (pageWidth * 12700)
-- 
cgit v1.2.3


From 0782d5882c8c04fbdffcb7026457b3cc79150692 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 16:04:09 -0800
Subject: Undo the "Use fromRight" hlint hint.

---
 .hlint.yaml                     | 4 ++++
 src/Text/Pandoc/Writers/Docx.hs | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/.hlint.yaml b/.hlint.yaml
index 09fd9baf7..6b74014d4 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -104,5 +104,9 @@
       - Text.Pandoc.Readers.Markdown
       - Text.Pandoc.Readers.RST
 
+# fromRight is only in base >= 4.10
+- ignore:
+    name: "Use fromRight"
+
 # Define some custom infix operators
 # - fixity: infixr 3 ~^#^~
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 1818ed81d..65946ec88 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -28,7 +28,6 @@ import Data.Char (isSpace, isLetter)
 import Data.List (intercalate, isPrefixOf, isSuffixOf)
 import Data.String (fromString)
 import qualified Data.Map as M
-import Data.Either (fromRight)
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -1403,7 +1402,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     generateImgElt (ident, _, _, img) =
       let
         (xpt,ypt) = desiredSizeInPoints opts attr
-               (fromRight def (imageSize opts img))
+               (either (const def) id (imageSize opts img))
         -- 12700 emu = 1 pt
         (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700)
                                 (pageWidth * 12700)
-- 
cgit v1.2.3


From 23f964b9076f9795b00e740d20feebeb1d6d91db Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 30 Dec 2020 21:31:28 -0800
Subject: Mediawiki reader: allow space around storng/emph delimiters.

Closes #6993.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 10 ++++------
 test/command/6993.md                 | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 test/command/6993.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index e8985ab2c..d712b1120 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -678,19 +678,17 @@ url = do
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines
 inlinesBetween start end =
-  trimInlines . mconcat <$> try (start >> many1Till inner end)
-    where inner      = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
-          innerSpace = try $ whitespace <* notFollowedBy' end
+  trimInlines . mconcat <$> try (start >> many1Till inline end)
 
 emph :: PandocMonad m => MWParser m Inlines
 emph = B.emph <$> nested (inlinesBetween start end)
-    where start = sym "''" >> lookAhead nonspaceChar
+    where start = sym "''"
           end   = try $ notFollowedBy' (() <$ strong) >> sym "''"
 
 strong :: PandocMonad m => MWParser m Inlines
 strong = B.strong <$> nested (inlinesBetween start end)
-    where start = sym "'''" >> lookAhead nonspaceChar
-          end   = try $ sym "'''"
+    where start = sym "'''"
+          end   = sym "'''"
 
 doubleQuotes :: PandocMonad m => MWParser m Inlines
 doubleQuotes = do
diff --git a/test/command/6993.md b/test/command/6993.md
new file mode 100644
index 000000000..b7f7f5384
--- /dev/null
+++ b/test/command/6993.md
@@ -0,0 +1,21 @@
+```
+% pandoc -f mediawiki -t native
+'''Should be bold '''
+^D
+[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]]
+```
+
+```
+% pandoc -f mediawiki -t native
+''' Should be bold'''
+^D
+[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]]
+```
+
+```
+% pandoc -f mediawiki -t native
+'' Should be emph ''
+^D
+[Para [Emph [Str "Should",Space,Str "be",Space,Str "emph"]]]
+```
+
-- 
cgit v1.2.3


From 17e3efc785fa8b0680ec6d4ebaac1ea6bdb57e1a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 1 Jan 2021 10:50:58 +0100
Subject: Org reader: restructure output of captioned code blocks

The Div wrapper of code blocks with captions now has the class
"captioned-content". The caption itself is added as a Plain block
inside a Div of class "caption". This makes it easier to write filters
which match on captioned code blocks. Existing filters will need to be
updated.

Closes: #6977
---
 src/Text/Pandoc/Readers/Org/Blocks.hs     | 26 ++++++++++++--------------
 test/Tests/Readers/Org/Block/CodeBlock.hs |  6 +++---
 2 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f2e8b1ab6..17e3ff986 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -294,24 +294,22 @@ verseBlock blockType = try $ do
 codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
 codeBlock blockAttrs blockType = do
   skipSpaces
-  (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
-  content           <- rawBlockContent blockType
-  resultsContent    <- option mempty babelResultsBlock
-  let id'            = fromMaybe mempty $ blockAttrName blockAttrs
-  let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
-  let labelledBlck   = maybe (pure codeBlck)
-                             (labelDiv codeBlck)
-                             (blockAttrCaption blockAttrs)
+  (classes, kv)  <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+  content        <- rawBlockContent blockType
+  resultsContent <- option mempty babelResultsBlock
+  let identifier = fromMaybe mempty $ blockAttrName blockAttrs
+  let codeBlk    = B.codeBlockWith (identifier, classes, kv) content
+  let wrap       = maybe pure addCaption (blockAttrCaption blockAttrs)
   return $
-    (if exportsCode kv    then labelledBlck   else mempty) <>
+    (if exportsCode kv    then wrap codeBlk   else mempty) <>
     (if exportsResults kv then resultsContent else mempty)
  where
-   labelDiv :: Blocks -> F Inlines -> F Blocks
-   labelDiv blk value =
-     B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
+   addCaption :: F Inlines -> Blocks -> F Blocks
+   addCaption caption blk = B.divWith ("", ["captioned-content"], [])
+                         <$> (mkCaptionBlock caption <> pure blk)
 
-   labelledBlock :: F Inlines -> F Blocks
-   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+   mkCaptionBlock :: F Inlines -> F Blocks
+   mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain)
 
    exportsResults :: [(Text, Text)] -> Bool
    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index adf6661ca..2648a6e1f 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -185,10 +185,10 @@ tests =
                 , "#+end_src"
                 ] =?>
       divWith
-         nullAttr
+         ("", ["captioned-content"], [] )
          (mappend
-          (plain $ spanWith ("", ["label"], [])
-                            (spcSep [ "Functor", "laws", "in", "Haskell" ]))
+          (divWith ("", ["caption"], []) $
+           plain (spcSep [ "Functor", "laws", "in", "Haskell" ]))
           (codeBlockWith ("functor-laws", ["haskell"], [])
                          (T.unlines [ "fmap id = id"
                                     , "fmap (p . q) = (fmap p) . (fmap q)"
-- 
cgit v1.2.3


From 9a18cf4b591364e7f0dc9b976628e91f8b31a6cf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 2 Jan 2021 07:56:07 -0800
Subject: LaTeX writer: revert table line height increase in 2.11.3.

In 2.11.3 we started adding `\addlinespace`, which produced less
dense tables.  This wasn't an intentional change; I misunderstood
a comment in the discussion leading up to the change. This commit
restores the earlier default table appearance.

Note that if you want a less dense table, you can use something like
`\def\arraystretch{1.5}` in your header.

Closes #6996.
---
 src/Text/Pandoc/Writers/LaTeX/Table.hs |  2 +-
 test/command/2378.md                   |  6 ++--
 test/command/5367.md                   |  6 ++--
 test/tables.latex                      | 61 +++++++++++++++-------------------
 test/tables/nordics.latex              | 18 +++++-----
 test/tables/planets.latex              | 28 ++++++----------
 test/tables/students.latex             | 22 ++++++------
 7 files changed, 63 insertions(+), 80 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 9dd66c8a3..8411d9f80 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -169,7 +169,7 @@ rowToLaTeX :: PandocMonad m
            -> LW m (Doc Text)
 rowToLaTeX blocksWriter celltype row = do
   cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row)
-  return $ hsep (intersperse "&" cellsDocs) <> " \\\\ \\addlinespace"
+  return $ hsep (intersperse "&" cellsDocs) <> " \\\\"
 
 -- | Pads row with empty cells to adjust for rowspans above this row.
 fillRow :: [Ann.Cell] -> [Ann.Cell]
diff --git a/test/command/2378.md b/test/command/2378.md
index e81855367..59359c653 100644
--- a/test/command/2378.md
+++ b/test/command/2378.md
@@ -14,14 +14,14 @@ is used.
 \begin{longtable}[]{@{}ll@{}}
 \caption{a table}\tabularnewline
 \toprule
-x & y\footnote{a footnote} \\ \addlinespace
+x & y\footnote{a footnote} \\
 \midrule
 \endfirsthead
 \toprule
-x & y{} \\ \addlinespace
+x & y{} \\
 \midrule
 \endhead
-1 & 2 \\ \addlinespace
+1 & 2 \\
 \bottomrule
 \end{longtable}
 ```
diff --git a/test/command/5367.md b/test/command/5367.md
index 4dbcca771..2d3a5e52e 100644
--- a/test/command/5367.md
+++ b/test/command/5367.md
@@ -24,14 +24,14 @@ hello\footnote{doc footnote}
   >{\centering\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{0.17}}@{}}
 \caption[Sample table.]{Sample table.\footnote{caption footnote}}\tabularnewline
 \toprule
-Fruit\footnote{header footnote} \\ \addlinespace
+Fruit\footnote{header footnote} \\
 \midrule
 \endfirsthead
 \toprule
-Fruit{} \\ \addlinespace
+Fruit{} \\
 \midrule
 \endhead
-Bans\footnote{table cell footnote} \\ \addlinespace
+Bans\footnote{table cell footnote} \\
 \bottomrule
 \end{longtable}
 
diff --git a/test/tables.latex b/test/tables.latex
index 38c571ba6..afa14d845 100644
--- a/test/tables.latex
+++ b/test/tables.latex
@@ -3,16 +3,16 @@ Simple table with caption:
 \begin{longtable}[]{@{}rlcl@{}}
 \caption{Demonstration of simple table syntax.}\tabularnewline
 \toprule
-Right & Left & Center & Default \\ \addlinespace
+Right & Left & Center & Default \\
 \midrule
 \endfirsthead
 \toprule
-Right & Left & Center & Default \\ \addlinespace
+Right & Left & Center & Default \\
 \midrule
 \endhead
-12 & 12 & 12 & 12 \\ \addlinespace
-123 & 123 & 123 & 123 \\ \addlinespace
-1 & 1 & 1 & 1 \\ \addlinespace
+12 & 12 & 12 & 12 \\
+123 & 123 & 123 & 123 \\
+1 & 1 & 1 & 1 \\
 \bottomrule
 \end{longtable}
 
@@ -20,12 +20,12 @@ Simple table without caption:
 
 \begin{longtable}[]{@{}rlcl@{}}
 \toprule
-Right & Left & Center & Default \\ \addlinespace
+Right & Left & Center & Default \\
 \midrule
 \endhead
-12 & 12 & 12 & 12 \\ \addlinespace
-123 & 123 & 123 & 123 \\ \addlinespace
-1 & 1 & 1 & 1 \\ \addlinespace
+12 & 12 & 12 & 12 \\
+123 & 123 & 123 & 123 \\
+1 & 1 & 1 & 1 \\
 \bottomrule
 \end{longtable}
 
@@ -34,16 +34,16 @@ Simple table indented two spaces:
 \begin{longtable}[]{@{}rlcl@{}}
 \caption{Demonstration of simple table syntax.}\tabularnewline
 \toprule
-Right & Left & Center & Default \\ \addlinespace
+Right & Left & Center & Default \\
 \midrule
 \endfirsthead
 \toprule
-Right & Left & Center & Default \\ \addlinespace
+Right & Left & Center & Default \\
 \midrule
 \endhead
-12 & 12 & 12 & 12 \\ \addlinespace
-123 & 123 & 123 & 123 \\ \addlinespace
-1 & 1 & 1 & 1 \\ \addlinespace
+12 & 12 & 12 & 12 \\
+123 & 123 & 123 & 123 \\
+1 & 1 & 1 & 1 \\
 \bottomrule
 \end{longtable}
 
@@ -56,19 +56,15 @@ Multiline table with caption:
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}}
 \caption{Here's the caption. It may span multiple lines.}\tabularnewline
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default
-aligned \\ \addlinespace
+Centered Header & Left Aligned & Right Aligned & Default aligned \\
 \midrule
 \endfirsthead
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default
-aligned \\ \addlinespace
+Centered Header & Left Aligned & Right Aligned & Default aligned \\
 \midrule
 \endhead
-First & row & 12.0 & Example of a row that spans multiple
-lines. \\ \addlinespace
-Second & row & 5.0 & Here's another one. Note the blank line between
-rows. \\ \addlinespace
+First & row & 12.0 & Example of a row that spans multiple lines. \\
+Second & row & 5.0 & Here's another one. Note the blank line between rows. \\
 \bottomrule
 \end{longtable}
 
@@ -80,14 +76,11 @@ Multiline table without caption:
   >{\raggedleft\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.16}}
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}}
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default
-aligned \\ \addlinespace
+Centered Header & Left Aligned & Right Aligned & Default aligned \\
 \midrule
 \endhead
-First & row & 12.0 & Example of a row that spans multiple
-lines. \\ \addlinespace
-Second & row & 5.0 & Here's another one. Note the blank line between
-rows. \\ \addlinespace
+First & row & 12.0 & Example of a row that spans multiple lines. \\
+Second & row & 5.0 & Here's another one. Note the blank line between rows. \\
 \bottomrule
 \end{longtable}
 
@@ -96,9 +89,9 @@ Table without column headers:
 \begin{longtable}[]{@{}rlcr@{}}
 \toprule
 \endhead
-12 & 12 & 12 & 12 \\ \addlinespace
-123 & 123 & 123 & 123 \\ \addlinespace
-1 & 1 & 1 & 1 \\ \addlinespace
+12 & 12 & 12 & 12 \\
+123 & 123 & 123 & 123 \\
+1 & 1 & 1 & 1 \\
 \bottomrule
 \end{longtable}
 
@@ -111,9 +104,7 @@ Multiline table without column headers:
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}}
 \toprule
 \endhead
-First & row & 12.0 & Example of a row that spans multiple
-lines. \\ \addlinespace
-Second & row & 5.0 & Here's another one. Note the blank line between
-rows. \\ \addlinespace
+First & row & 12.0 & Example of a row that spans multiple lines. \\
+Second & row & 5.0 & Here's another one. Note the blank line between rows. \\
 \bottomrule
 \end{longtable}
diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex
index d1a0b4aed..1b5929bad 100644
--- a/test/tables/nordics.latex
+++ b/test/tables/nordics.latex
@@ -6,21 +6,19 @@
 \caption{States belonging to the \emph{Nordics.}}\tabularnewline
 \toprule
 Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
-\vtop{\hbox{\strut Area}\hbox{\strut (in
-km\textsuperscript{2})}} \\ \addlinespace
+\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\
 \midrule
 \endfirsthead
 \toprule
 Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
-\vtop{\hbox{\strut Area}\hbox{\strut (in
-km\textsuperscript{2})}} \\ \addlinespace
+\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\
 \midrule
 \endhead
-Denmark & Copenhagen & 5,809,502 & 43,094 \\ \addlinespace
-Finland & Helsinki & 5,537,364 & 338,145 \\ \addlinespace
-Iceland & Reykjavik & 343,518 & 103,000 \\ \addlinespace
-Norway & Oslo & 5,372,191 & 323,802 \\ \addlinespace
-Sweden & Stockholm & 10,313,447 & 450,295 \\ \addlinespace
-Total & & 27,376,022 & 1,258,336 \\ \addlinespace
+Denmark & Copenhagen & 5,809,502 & 43,094 \\
+Finland & Helsinki & 5,537,364 & 338,145 \\
+Iceland & Reykjavik & 343,518 & 103,000 \\
+Norway & Oslo & 5,372,191 & 323,802 \\
+Sweden & Stockholm & 10,313,447 & 450,295 \\
+Total & & 27,376,022 & 1,258,336 \\
 \bottomrule
 \end{longtable}
diff --git a/test/tables/planets.latex b/test/tables/planets.latex
index 9457b6821..8238c43f3 100644
--- a/test/tables/planets.latex
+++ b/test/tables/planets.latex
@@ -3,34 +3,28 @@
 \toprule
 \multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
 (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
-(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
+(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\
 \midrule
 \endfirsthead
 \toprule
 \multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
 (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
-(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace
+(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\
 \midrule
 \endhead
 \multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 &
-4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the
-Sun \\ \addlinespace
-& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0
-& \\ \addlinespace
-& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our
-world \\ \addlinespace
+4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the Sun \\
+& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 & \\
+& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our world \\
 & & Mars & 0.642 & 6,792 & 3933 & 3.7 & 24.7 & 227.9 & -65 & 2 & The red
-planet \\ \addlinespace
+planet \\
 \multirow{4}{*}{Jovian planets} & \multirow{2}{*}{Gas giants} & Jupiter & 1898
-& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest
-planet \\ \addlinespace
-& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62
-& \\ \addlinespace
+& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest planet \\
+& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62 & \\
 & \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 &
-2872.5 & -195 & 27 & \\ \addlinespace
-& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14
-& \\ \addlinespace
+2872.5 & -195 & 27 & \\
+& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 & \\
 \multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 &
-153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \addlinespace
+153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\
 \bottomrule
 \end{longtable}
diff --git a/test/tables/students.latex b/test/tables/students.latex
index 680a539d3..87efb0851 100644
--- a/test/tables/students.latex
+++ b/test/tables/students.latex
@@ -3,21 +3,21 @@
   >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}}
 \caption{List of Students}\tabularnewline
 \toprule
-Student ID & Name \\ \addlinespace
+Student ID & Name \\
 \midrule
 \endfirsthead
 \toprule
-Student ID & Name \\ \addlinespace
+Student ID & Name \\
 \midrule
 \endhead
-\multicolumn{2}{l}{Computer Science} \\ \addlinespace
-3741255 & Jones, Martha \\ \addlinespace
-4077830 & Pierce, Benjamin \\ \addlinespace
-5151701 & Kirk, James \\ \addlinespace
-\multicolumn{2}{l}{Russian Literature} \\ \addlinespace
-3971244 & Nim, Victor \\ \addlinespace
-\multicolumn{2}{l}{Astrophysics} \\ \addlinespace
-4100332 & Petrov, Alexandra \\ \addlinespace
-4100332 & Toyota, Hiroko \\ \addlinespace
+\multicolumn{2}{l}{Computer Science} \\
+3741255 & Jones, Martha \\
+4077830 & Pierce, Benjamin \\
+5151701 & Kirk, James \\
+\multicolumn{2}{l}{Russian Literature} \\
+3971244 & Nim, Victor \\
+\multicolumn{2}{l}{Astrophysics} \\
+4100332 & Petrov, Alexandra \\
+4100332 & Toyota, Hiroko \\
 \bottomrule
 \end{longtable}
-- 
cgit v1.2.3


From 260aaaacc640cc458259e292c42e15c3d2f34b3f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 2 Jan 2021 08:19:00 -0800
Subject: LaTeX reader: put contents of unknown environments in a Div...

when `raw_tex` is not enabled. (When `raw_tex` is enabled,
the whole environment is parsed as a raw block.)
The class name is the name of the environment.
Previously, we just included the contents without the
surrounding Div, but having a record of the environment's
boundaries and name can be useful.

Closes #6997.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index afe960454..d9fe0f502 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1835,7 +1835,7 @@ rawEnv name = do
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-       return bs
+       return $ divWith ("",[name],[]) bs
 
 rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
 rawVerbEnv name = do
-- 
cgit v1.2.3


From 57b10941521d9db808e3892ea9893878065b7c50 Mon Sep 17 00:00:00 2001
From: Dimitri Sabadie <dimitri.sabadie@gmail.com>
Date: Sun, 3 Jan 2021 08:57:47 +0100
Subject: Org reader: mark verbatim code with class "verbatim". (#6998)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Replace org-mode’s verbatim from code to codeWith.

This adds the `"verbatim"` class so that exporters can apply a specific
style on it. For instance, it will be possible for HTML to add a CSS
rule for code + verbatim class.

* Alter test for org-mode’s verbatim change.

See previous commit for further detail on the new implementation.
---
 src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +-
 test/Tests/Readers/Org/Inline.hs       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 0330cf55f..b819a34a3 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -572,7 +572,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines)
 underline = fmap B.underline    <$> emphasisBetween '_'
 
 verbatim  :: PandocMonad m => OrgParser m (F Inlines)
-verbatim  = return . B.code     <$> verbatimBetween '='
+verbatim  = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '='
 
 code      :: PandocMonad m => OrgParser m (F Inlines)
 code      = return . B.code     <$> verbatimBetween '~'
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index 9edd328c3..b5d0caa26 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -56,7 +56,7 @@ tests =
 
   , "Verbatim" =:
       "=Robot.rock()=" =?>
-      para (code "Robot.rock()")
+      para (codeWith ("", ["verbatim"], []) "Robot.rock()")
 
   , "Code" =:
       "~word for word~" =?>
@@ -190,7 +190,7 @@ tests =
                   ])
   , "Verbatim text can contain equal signes (=)" =:
       "=is_subst = True=" =?>
-      para (code "is_subst = True")
+      para (codeWith ("", ["verbatim"], []) "is_subst = True")
 
   , testGroup "Images"
     [ "Image" =:
-- 
cgit v1.2.3


From 21ee2d80c25d6e7fe3be00b58de553b638c90abb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 3 Jan 2021 11:27:01 -0800
Subject: EPUB writer: adjust internal links to images, links, and tables...

after splitting into chapters. Previously we only did this for
Div and Span and Header elements.  See #7000.
---
 src/Text/Pandoc/Writers/EPUB.hs | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 5867b7c80..a24a872ef 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -557,6 +557,10 @@ pandocToEPUB version opts doc = do
   let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
       extractLinkURL' num (Span (ident, _, _) _)
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+      extractLinkURL' num (Link (ident, _, _) _ _)
+        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+      extractLinkURL' num (Image (ident, _, _) _ _)
+        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
       extractLinkURL' _ _ = []
 
   let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
@@ -564,6 +568,8 @@ pandocToEPUB version opts doc = do
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
       extractLinkURL num (Header _ (ident, _, _) _)
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+      extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
+        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
       extractLinkURL num b = query (extractLinkURL' num) b
 
   let reftable = concat $ zipWith (\(Chapter bs) num ->
-- 
cgit v1.2.3


From f04e02d8d5ff893475c129aaf69ee3f175bf8d8f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 3 Jan 2021 11:35:36 -0800
Subject: EPUB writer: recognize `Format "html4"`, `Format "html5"` as raw
 HTML.

---
 src/Text/Pandoc/Writers/EPUB.hs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index a24a872ef..6222b0690 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1059,11 +1059,17 @@ getMediaNextNewName ext = do
   let nextName = "file" ++ show nextId ++ ext
   (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName)
 
+isHtmlFormat :: Format -> Bool
+isHtmlFormat (Format "html") = True
+isHtmlFormat (Format "html4") = True
+isHtmlFormat (Format "html5") = True
+isHtmlFormat _ = False
+
 transformBlock  :: PandocMonad m
                 => Block
                 -> E m Block
 transformBlock (RawBlock fmt raw)
-  | fmt == Format "html" = do
+  | isHtmlFormat fmt = do
   let tags = parseTags raw
   tags' <- mapM transformTag tags
   return $ RawBlock fmt (renderTags' tags')
@@ -1083,7 +1089,7 @@ transformInline opts x@(Math t m)
     return $ Span ("",["math",mathclass],[])
                 [Image nullAttr [x] ("../" <> newsrc, "")]
 transformInline _opts (RawInline fmt raw)
-  | fmt == Format "html" = do
+  | isHtmlFormat fmt = do
   let tags = parseTags raw
   tags' <- mapM transformTag tags
   return $ RawInline fmt (renderTags' tags')
-- 
cgit v1.2.3


From 1ce7db1fa67b6a9f1d354d377e3bfa548d854ee9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 4 Jan 2021 11:38:18 -0800
Subject: EPUB writer: adjust internal links to identifiers...

defined in raw HTML sections after splitting into
chapters.

Closes #7000.
---
 src/Text/Pandoc/Writers/EPUB.hs | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 6222b0690..c8f5ff60d 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -561,6 +561,16 @@ pandocToEPUB version opts doc = do
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
       extractLinkURL' num (Image (ident, _, _) _ _)
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+      extractLinkURL' num (RawInline fmt raw)
+        | isHtmlFormat fmt
+        = foldr (\tag ->
+                   case tag of
+                     TagOpen{} ->
+                       case fromAttrib "id" tag of
+                         "" -> id
+                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                     _ -> id)
+            [] (parseTags raw)
       extractLinkURL' _ _ = []
 
   let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
@@ -570,6 +580,16 @@ pandocToEPUB version opts doc = do
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
       extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
         | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+      extractLinkURL num (RawBlock fmt raw)
+        | isHtmlFormat fmt
+        = foldr (\tag ->
+                   case tag of
+                     TagOpen{} ->
+                       case fromAttrib "id" tag of
+                         "" -> id
+                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                     _ -> id)
+            [] (parseTags raw)
       extractLinkURL num b = query (extractLinkURL' num) b
 
   let reftable = concat $ zipWith (\(Chapter bs) num ->
-- 
cgit v1.2.3


From ea479bf28a4031f408af12ea92d3e19f9a838820 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 4 Jan 2021 14:05:03 -0800
Subject: LaTeX reader: handle filecontents environment.

Closes #7003.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 32 +++++++++++++++++++++------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  2 ++
 test/command/7003.md                     | 37 ++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 test/command/7003.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index d9fe0f502..14a41a911 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1219,6 +1219,7 @@ preamble :: PandocMonad m => LP m Blocks
 preamble = mconcat <$> many preambleBlock
   where preambleBlock =  (mempty <$ spaces1)
                      <|> macroDef (rawBlock "latex")
+                     <|> filecontents
                      <|> (mempty <$ blockCommand)
                      <|> (mempty <$ braced)
                      <|> (do notFollowedBy (begin_ "document")
@@ -1272,6 +1273,16 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
 insertIncluded :: PandocMonad m
                => FilePath
                -> FilePath
@@ -1281,13 +1292,12 @@ insertIncluded defaultExtension f' = do
                 ".tex" -> f'
                 ".sty" -> f'
                 _      -> addExtension f' defaultExtension
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
   pos <- getPosition
   containers <- getIncludeFiles <$> getState
   when (T.pack f `elem` containers) $
     throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
   updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromDirs dirs f
+  mbcontents <- readFileFromTexinputs f
   contents <- case mbcontents of
                    Just s -> return s
                    Nothing -> do
@@ -1695,6 +1705,18 @@ environments = M.fromList
    , ("iftoggle", try $ ifToggle >> block)
    ]
 
+filecontents :: PandocMonad m => LP m Blocks
+filecontents = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  guard $ name == "filecontents" || name == "filecontents*"
+  skipopts
+  fp <- untokenize <$> braced
+  txt <- verbEnv name
+  updateState $ \st ->
+    st{ sFileContents = M.insert fp txt (sFileContents st) }
+  return mempty
+
 theoremstyle :: PandocMonad m => LP m Blocks
 theoremstyle = do
   stylename <- untokenize <$> braced
@@ -1894,8 +1916,7 @@ inputMinted = do
   pos <- getPosition
   attr <- mintedAttr
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   rawcode <- case mbCode of
                   Just s -> return s
                   Nothing -> do
@@ -1981,8 +2002,7 @@ inputListing = do
   pos <- getPosition
   options <- option [] keyvals
   f <- T.filter (/='"') . untokenize <$> braced
-  dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS"
-  mbCode <- readFileFromDirs dirs (T.unpack f)
+  mbCode <- readFileFromTexinputs (T.unpack f)
   codeLines <- case mbCode of
                       Just s -> return $ T.lines s
                       Nothing -> do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 563d32883..e92ed387c 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -152,6 +152,7 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
+                            , sFileContents  :: M.Map Text Text
                             }
      deriving Show
 
@@ -177,6 +178,7 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sHasChapters   = False
                               , sToggles       = M.empty
                               , sExpanded      = False
+                              , sFileContents  = M.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
diff --git a/test/command/7003.md b/test/command/7003.md
new file mode 100644
index 000000000..1f6275943
--- /dev/null
+++ b/test/command/7003.md
@@ -0,0 +1,37 @@
+```
+% pandoc -f latex -t native
+\documentclass{article}
+\usepackage{listings}
+
+\lstset{basicstyle=\ttfamily}
+
+\begin{filecontents*}[overwrite]{example.tex}
+\documentclass{article}
+\begin{document}
+\section{Bar}
+This a Bar section
+\end{document}
+\end{filecontents*}
+
+\begin{document}
+
+\section{With lstlisting environment}
+
+\begin{lstlisting}
+\documentclass{article}
+\begin{document}
+\section{Foo}
+This a Foo section
+\end{document}
+\end{lstlisting}
+
+\section{With lstinputlisting command}
+
+\lstinputlisting{example.tex}
+\end{document}
+^D
+[Header 1 ("with-lstlisting-environment",[],[]) [Str "With",Space,Str "lstlisting",Space,Str "environment"]
+,CodeBlock ("",[],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Foo}\nThis a Foo section\n\\end{document}"
+,Header 1 ("with-lstinputlisting-command",[],[]) [Str "With",Space,Str "lstinputlisting",Space,Str "command"]
+,CodeBlock ("",["latex"],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Bar}\nThis a Bar section\n\\end{document}"]
+```
-- 
cgit v1.2.3


From 385b6a3b215124fd2dfa044b8847d69a6cf14a73 Mon Sep 17 00:00:00 2001
From: David Martschenko <62178322+davidmrt98@users.noreply.github.com>
Date: Tue, 5 Jan 2021 19:15:59 +0100
Subject: Implement defaults file inheritance (#6924)

Allow defaults files to inherit options from other defaults files by
specifying them with the following syntax:
`defaults: [list of defaults files or single defaults file]`.
---
 MANUAL.txt                                |  10 +++
 pandoc.cabal                              |   7 ++
 src/Text/Pandoc/App/CommandLineOptions.hs |  36 +++-----
 src/Text/Pandoc/App/Opt.hs                | 136 ++++++++++++++++++++++++++++--
 test/command/defaults-inheritance-1.md    |   6 ++
 test/command/defaults-inheritance-2.md    |   5 ++
 test/command/defaults-inheritance-3.md    |   6 ++
 test/command/defaults3.yaml               |   4 +
 test/command/defaults4.yaml               |   3 +
 test/command/defaults5.yaml               |   2 +
 test/command/defaults6.yaml               |   2 +
 test/command/defaults7.yaml               |   2 +
 test/command/defaults8.yaml               |   2 +
 test/command/defaults9.yaml               |   1 +
 14 files changed, 189 insertions(+), 33 deletions(-)
 create mode 100644 test/command/defaults-inheritance-1.md
 create mode 100644 test/command/defaults-inheritance-2.md
 create mode 100644 test/command/defaults-inheritance-3.md
 create mode 100644 test/command/defaults3.yaml
 create mode 100644 test/command/defaults4.yaml
 create mode 100644 test/command/defaults5.yaml
 create mode 100644 test/command/defaults6.yaml
 create mode 100644 test/command/defaults7.yaml
 create mode 100644 test/command/defaults8.yaml
 create mode 100644 test/command/defaults9.yaml

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 53cd54e5f..831ffb0b0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1507,6 +1507,16 @@ input-files:
 - content.md
 # or you may use input-file: with a single value
 
+# Include options from the specified defaults files.
+# The files will be searched for first in the working directory
+# and then in the defaults subdirectory of the user data directory.
+# The files are included in the same order in which they appear in
+# the list. Options specified in this defaults file always have
+# priority over the included ones.
+defaults:
+- defsA
+- defsB
+
 template: letter
 standalone: true
 self-contained: false
diff --git a/pandoc.cabal b/pandoc.cabal
index 794eef91d..cce3c1a58 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -216,6 +216,13 @@ extra-source-files:
                  test/command/01.csv
                  test/command/defaults1.yaml
                  test/command/defaults2.yaml
+                 test/command/defaults3.yaml
+                 test/command/defaults4.yaml
+                 test/command/defaults5.yaml
+                 test/command/defaults6.yaml
+                 test/command/defaults7.yaml
+                 test/command/defaults8.yaml
+                 test/command/defaults9.yaml
                  test/command/3533-rst-csv-tables.csv
                  test/command/3880.txt
                  test/command/5182.txt
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 906fcc4c0..21ee47b7b 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -25,6 +25,7 @@ module Text.Pandoc.App.CommandLineOptions (
 import Control.Monad
 import Control.Monad.Trans
 import Control.Monad.Except (throwError)
+import Control.Monad.State.Strict
 import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
          defConfig, Indent(..), NumberFormat(..))
 import Data.Bifunctor (second)
@@ -46,10 +47,12 @@ import System.FilePath
 import System.IO (stdout)
 import Text.DocTemplates (Context (..), ToContext (toVal), Val (..))
 import Text.Pandoc
-import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), addMeta)
+import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..),
+                            DefaultsState (..), addMeta, applyDefaults,
+                            fullDefaultsPath)
 import Text.Pandoc.Filter (Filter (..))
 import Text.Pandoc.Highlighting (highlightingStyles)
-import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs, findM)
+import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs)
 import Text.Printf
 
 #ifdef EMBED_DATA_FILES
@@ -64,7 +67,6 @@ import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
 import qualified Data.Map as M
 import qualified Data.Text as T
-import qualified Data.YAML as Y
 import qualified Text.Pandoc.UTF8 as UTF8
 
 parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
@@ -166,7 +168,11 @@ options =
 
     , Option "d" ["defaults"]
                  (ReqArg
-                  (\arg opt -> applyDefaults opt arg
+                  (\arg opt -> runIOorExplode $ do
+                     let defsState = DefaultsState { curDefaults = Nothing,
+                                                     inheritanceGraph = [] }
+                     fp <- fullDefaultsPath (optDataDir opt) arg
+                     evalStateT (applyDefaults opt fp) defsState
                   )
                   "FILE")
                 ""
@@ -1012,28 +1018,6 @@ writersNames = sort
 splitField :: String -> (String, String)
 splitField = second (tailDef "true") . break (`elemText` ":=")
 
--- | Apply defaults from --defaults file.
-applyDefaults :: Opt -> FilePath -> IO Opt
-applyDefaults opt file = runIOorExplode $ do
-  let fp = if null (takeExtension file)
-              then addExtension file "yaml"
-              else file
-  setVerbosity $ optVerbosity opt
-  dataDirs <- liftIO defaultUserDataDirs
-  let fps = fp : case optDataDir opt of
-              Nothing -> map (</> ("defaults" </> fp))
-                               dataDirs
-              Just dd -> [dd </> "defaults" </> fp]
-  fp' <- fromMaybe fp <$> findM fileExists fps
-  inp <- readFileLazy fp'
-  case Y.decode1 inp of
-      Right (f :: Opt -> Opt) -> return $ f opt
-      Left (errpos, errmsg)  -> throwError $
-         PandocParseError $ T.pack $
-         "Error parsing " ++ fp' ++ " line " ++
-          show (Y.posLine errpos) ++ " column " ++
-          show (Y.posColumn errpos) ++ ":\n" ++ errmsg
-
 lookupHighlightStyle :: PandocMonad m => String -> m Style
 lookupHighlightStyle s
   | takeExtension s == ".theme" = -- attempt to load KDE theme
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 00b4b5523..6dd19758e 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -20,10 +20,17 @@ module Text.Pandoc.App.Opt (
             Opt(..)
           , LineEnding (..)
           , IpynbOutput (..)
+          , DefaultsState (..)
           , defaultOpts
           , addMeta
+          , applyDefaults
+          , fullDefaultsPath
           ) where
+import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM)
+import Control.Monad.State.Strict (StateT, modify, gets)
+import System.FilePath ( addExtension, (</>), takeExtension )
 import Data.Char (isLower, toLower)
+import Data.Maybe (fromMaybe)
 import GHC.Generics hiding (Meta)
 import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.Filter (Filter (..))
@@ -34,7 +41,9 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault),
                             ReferenceLocation (EndOfDocument),
                             ObfuscationMethod (NoObfuscation),
                             CiteMethod (Citeproc))
-import Text.Pandoc.Shared (camelCaseStrToHyphenated)
+import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, PandocMonad)
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError))
+import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDirs, findM, ordNub)
 import qualified Text.Pandoc.Parsing as P
 import Text.Pandoc.Readers.Metadata (yamlMap)
 import Text.Pandoc.Class.PandocPure
@@ -150,16 +159,77 @@ data Opt = Opt
     } deriving (Generic, Show)
 
 instance FromYAML (Opt -> Opt) where
-  parseYAML (Mapping _ _ m) =
-    foldr (.) id <$> mapM doOpt (M.toList m)
+  parseYAML (Mapping _ _ m) = chain doOpt (M.toList m)
   parseYAML n = failAtNode n "Expected a mapping"
 
+data DefaultsState = DefaultsState
+    {
+      curDefaults      :: Maybe FilePath -- currently parsed file
+    , inheritanceGraph :: [[FilePath]]   -- defaults file inheritance graph
+    } deriving (Show)
+
+instance (PandocMonad m, MonadIO m)
+      => FromYAML (Opt -> StateT DefaultsState m Opt) where
+  parseYAML (Mapping _ _ m) = do
+    let opts = M.mapKeys toText m
+    dataDir <- case M.lookup "data-dir" opts of
+      Nothing -> return Nothing
+      Just v -> Just . unpack <$> parseYAML v
+    f <- parseOptions $ M.toList m
+    case M.lookup "defaults" opts of
+      Just v -> do
+        g <- parseDefaults v dataDir
+        return  $ g >=> f
+      Nothing -> return f
+    where
+      toText (Scalar _ (SStr s)) = s
+      toText _ = ""
+  parseYAML n = failAtNode n "Expected a mapping"
+
+parseDefaults :: (PandocMonad m, MonadIO m)
+              => Node Pos
+              -> Maybe FilePath
+              -> Parser (Opt -> StateT DefaultsState m Opt)
+parseDefaults n dataDir = parseDefsNames n >>= \ds -> return $ \o -> do
+  -- get parent defaults:
+  defsParent <- gets $ fromMaybe "" . curDefaults
+  -- get child defaults:
+  defsChildren <- mapM (fullDefaultsPath dataDir) ds
+  -- expand parent in defaults inheritance graph by children:
+  defsGraph <- gets inheritanceGraph
+  let defsGraphExp = expand defsGraph defsChildren defsParent
+  modify $ \defsState -> defsState{ inheritanceGraph = defsGraphExp }
+  -- check for cyclic inheritance:
+  if cyclic defsGraphExp
+    then throwError $
+      PandocSomeError $ T.pack $
+        "Error: Circular defaults file reference in " ++
+        "'" ++ defsParent ++ "'"
+    else foldM applyDefaults o defsChildren
+  where parseDefsNames x = (parseYAML x >>= \xs -> return $ map unpack xs)
+                       <|> (parseYAML x >>= \x' -> return [unpack x'])
+
+parseOptions :: Monad m
+             => [(Node Pos, Node Pos)]
+             -> Parser (Opt -> StateT DefaultsState m Opt)
+parseOptions ns = do
+  f <- chain doOpt' ns
+  return $ return . f
+
+chain :: Monad m => (a -> m (b -> b)) -> [a] -> m (b -> b)
+chain f = foldM g id
+  where g o n = f n >>= \o' -> return $ o' . o
+
+doOpt' :: (Node Pos, Node Pos) -> Parser (Opt -> Opt)
+doOpt' (k',v) = do
+  k <- parseStringKey k'
+  case k of
+    "defaults" -> return id
+    _ -> doOpt (k',v)
+
 doOpt :: (Node Pos, Node Pos) -> Parser (Opt -> Opt)
 doOpt (k',v) = do
-  k <- case k' of
-         Scalar _ (SStr t) -> return t
-         Scalar _ _ -> failAtNode k' "Non-string key"
-         _ -> failAtNode k' "Non-scalar key"
+  k <- parseStringKey k'
   case k of
     "tab-stop" ->
       parseYAML v >>= \x -> return (\o -> o{ optTabStop = x })
@@ -494,6 +564,12 @@ defaultOpts = Opt
     , optStripComments         = False
     }
 
+parseStringKey ::  Node Pos -> Parser Text
+parseStringKey k = case k of
+  Scalar _ (SStr t) -> return t
+  Scalar _ _ -> failAtNode k "Non-string key"
+  _ -> failAtNode k "Non-scalar key"
+
 yamlToMeta :: Node Pos -> Parser Meta
 yamlToMeta (Mapping _ _ m) =
     either (fail . show) return $ runEverything (yamlMap pMetaString m)
@@ -524,6 +600,52 @@ readMetaValue s
   | s == "FALSE" = MetaBool False
   | otherwise    = MetaString $ T.pack s
 
+-- | Apply defaults from --defaults file.
+applyDefaults :: (PandocMonad m, MonadIO m)
+              => Opt
+              -> FilePath
+              -> StateT DefaultsState m Opt
+applyDefaults opt file = do
+  setVerbosity $ optVerbosity opt
+  modify $ \defsState -> defsState{ curDefaults = Just file }
+  inp <- readFileLazy file
+  case decode1 inp of
+      Right f -> f opt
+      Left (errpos, errmsg)  -> throwError $
+         PandocParseError $ T.pack $
+         "Error parsing " ++ file ++ " line " ++
+          show (posLine errpos) ++ " column " ++
+          show (posColumn errpos) ++ ":\n" ++ errmsg
+
+fullDefaultsPath :: (PandocMonad m, MonadIO m)
+                 => Maybe FilePath
+                 -> FilePath
+                 -> m FilePath
+fullDefaultsPath dataDir file = do
+  let fp = if null (takeExtension file)
+              then addExtension file "yaml"
+              else file
+  dataDirs <- liftIO defaultUserDataDirs
+  let fps = fp : case dataDir of
+                   Nothing -> map (</> ("defaults" </> fp))
+                                    dataDirs
+                   Just dd -> [dd </> "defaults" </> fp]
+  fromMaybe fp <$> findM fileExists fps
+
+-- | In a list of lists, append another list in front of every list which
+-- starts with specific element.
+expand :: Ord a => [[a]] -> [a] -> a -> [[a]]
+expand [] ns n = fmap (\x -> x : [n]) ns
+expand ps ns n = concatMap (ext n ns) ps
+  where
+    ext x xs p = case p of
+      (l : _) | x == l -> fmap (: p) xs
+      _ -> [p]
+
+cyclic :: Ord a => [[a]] -> Bool
+cyclic = any hasDuplicate
+  where
+    hasDuplicate xs = length (ordNub xs) /= length xs
 
 -- see https://github.com/jgm/pandoc/pull/4083
 -- using generic deriving caused long compilation times
diff --git a/test/command/defaults-inheritance-1.md b/test/command/defaults-inheritance-1.md
new file mode 100644
index 000000000..0760e2ec0
--- /dev/null
+++ b/test/command/defaults-inheritance-1.md
@@ -0,0 +1,6 @@
+```
+% pandoc -d command/defaults3
+# Header
+^D
+# Header
+```
diff --git a/test/command/defaults-inheritance-2.md b/test/command/defaults-inheritance-2.md
new file mode 100644
index 000000000..8b26a2613
--- /dev/null
+++ b/test/command/defaults-inheritance-2.md
@@ -0,0 +1,5 @@
+```
+% pandoc -d command/defaults6
+^D
+Error: Circular defaults file reference in 'command/defaults7.yaml'
+```
diff --git a/test/command/defaults-inheritance-3.md b/test/command/defaults-inheritance-3.md
new file mode 100644
index 000000000..81cac7baa
--- /dev/null
+++ b/test/command/defaults-inheritance-3.md
@@ -0,0 +1,6 @@
+```
+% pandoc -d command/defaults8
+<h1>Header</h1>
+^D
+# Header
+```
diff --git a/test/command/defaults3.yaml b/test/command/defaults3.yaml
new file mode 100644
index 000000000..d8b6f9144
--- /dev/null
+++ b/test/command/defaults3.yaml
@@ -0,0 +1,4 @@
+defaults:
+  - command/defaults4
+  - command/defaults5
+to: markdown
diff --git a/test/command/defaults4.yaml b/test/command/defaults4.yaml
new file mode 100644
index 000000000..a6caa985b
--- /dev/null
+++ b/test/command/defaults4.yaml
@@ -0,0 +1,3 @@
+from: html
+defaults:
+  - command/defaults5
diff --git a/test/command/defaults5.yaml b/test/command/defaults5.yaml
new file mode 100644
index 000000000..bb48b9708
--- /dev/null
+++ b/test/command/defaults5.yaml
@@ -0,0 +1,2 @@
+from: markdown
+to: html
diff --git a/test/command/defaults6.yaml b/test/command/defaults6.yaml
new file mode 100644
index 000000000..ac4a819e5
--- /dev/null
+++ b/test/command/defaults6.yaml
@@ -0,0 +1,2 @@
+defaults:
+  - command/defaults7
diff --git a/test/command/defaults7.yaml b/test/command/defaults7.yaml
new file mode 100644
index 000000000..19ca8f09e
--- /dev/null
+++ b/test/command/defaults7.yaml
@@ -0,0 +1,2 @@
+defaults:
+  - command/defaults6
diff --git a/test/command/defaults8.yaml b/test/command/defaults8.yaml
new file mode 100644
index 000000000..e418d33b2
--- /dev/null
+++ b/test/command/defaults8.yaml
@@ -0,0 +1,2 @@
+from: html
+defaults: command/defaults9
diff --git a/test/command/defaults9.yaml b/test/command/defaults9.yaml
new file mode 100644
index 000000000..d732eb066
--- /dev/null
+++ b/test/command/defaults9.yaml
@@ -0,0 +1 @@
+to: markdown
-- 
cgit v1.2.3


From 15ba184e6e7825898ebae5a59e2c3a60e0992da0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 5 Jan 2021 12:07:02 -0800
Subject: HTML writer: fix implicit_figure at end of footnotes.

Closes #7006.
---
 src/Text/Pandoc/Writers/HTML.hs | 10 +++++++---
 test/command/7006.md            | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 test/command/7006.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index c93322953..0b051c8f0 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1455,11 +1455,15 @@ blockListToNote opts ref blocks = do
                     else let lastBlock   = last blocks
                              otherBlocks = init blocks
                          in  case lastBlock of
-                                  (Para lst)  -> otherBlocks ++
+                                  Para [Image _ _ (_,tit)]
+                                      | "fig:" `T.isPrefixOf` tit
+                                            -> otherBlocks ++ [lastBlock,
+                                                  Plain backlink]
+                                  Para lst  -> otherBlocks ++
                                                  [Para (lst ++ backlink)]
-                                  (Plain lst) -> otherBlocks ++
+                                  Plain lst -> otherBlocks ++
                                                  [Plain (lst ++ backlink)]
-                                  _           -> otherBlocks ++ [lastBlock,
+                                  _         -> otherBlocks ++ [lastBlock,
                                                  Plain backlink]
   contents <- blockListToHtml opts blocks'
   let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents
diff --git a/test/command/7006.md b/test/command/7006.md
new file mode 100644
index 000000000..e7951fb1a
--- /dev/null
+++ b/test/command/7006.md
@@ -0,0 +1,20 @@
+```
+% pandoc -t html
+Test.[^fn]
+
+[^fn]: Foo:
+
+    ![Caption.](/image.jpg)
+^D
+<p>Test.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p>
+<section class="footnotes" role="doc-endnotes">
+<hr />
+<ol>
+<li id="fn1" role="doc-endnote"><p>Foo:</p>
+<figure>
+<img src="/image.jpg" alt="Caption." /><figcaption aria-hidden="true">Caption.</figcaption>
+</figure>
+<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></li>
+</ol>
+</section>
+```
-- 
cgit v1.2.3


From c0d8b186d142eb5e9f845de0a4ebcadd04c32dce Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 7 Jan 2021 11:06:25 -0800
Subject: T.P.Parsing: modify gridTableWith' for headerless tables.

If the table lacks a header, the header row should be an empty
list. Previously we got a list of empty cells, which caused
an empty header to be emitted instead of no header.  In LaTeX/PDF
output that meant we got a double top line with space between.

@tarleb @despres - please let me know if this is problematic
for some reason I'm not grasping.
---
 src/Text/Pandoc/Parsing.hs       | 22 +++++++++++-----------
 test/command/3516.md             |  6 +-----
 test/command/5708.md             |  6 +-----
 test/markdown-reader-more.native | 38 +++++---------------------------------
 test/rst-reader.native           | 24 +++---------------------
 test/tables-rstsubset.native     | 10 +---------
 6 files changed, 22 insertions(+), 84 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 979344f63..fd14341ad 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1069,24 +1069,24 @@ gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st
                 => Bool -- ^ Headerless table
                 -> ParserT s st m (mf Blocks)
                 -> ParserT s st m (mf [Blocks], [Alignment], [Int])
-gridTableHeader headless blocks = try $ do
+gridTableHeader True _ = do
   optional blanklines
   dashes <- gridDashedLines '-'
-  rawContent  <- if headless
-                    then return $ repeat ""
-                    else many1
-                         (notFollowedBy (gridTableSep '=') >> char '|' >>
+  let aligns = map snd dashes
+  let lines'   = map (snd . fst) dashes
+  let indices  = scanl (+) 0 lines'
+  return (return [], aligns, indices)
+gridTableHeader False blocks = try $ do
+  optional blanklines
+  dashes <- gridDashedLines '-'
+  rawContent  <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >>
                            T.pack <$> many1Till anyChar newline)
-  underDashes <- if headless
-                    then return dashes
-                    else gridDashedLines '='
+  underDashes <- gridDashedLines '='
   guard $ length dashes == length underDashes
   let lines'   = map (snd . fst) underDashes
   let indices  = scanl (+) 0 lines'
   let aligns   = map snd underDashes
-  let rawHeads = if headless
-                    then replicate (length underDashes) ""
-                    else map (T.unlines . map trim) $ transpose
+  let rawHeads = map (T.unlines . map trim) $ transpose
                        $ map (gridTableSplitLine indices) rawContent
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
diff --git a/test/command/3516.md b/test/command/3516.md
index 1cb805afe..dc8770bad 100644
--- a/test/command/3516.md
+++ b/test/command/3516.md
@@ -29,11 +29,7 @@ on Windows builds.
  [(AlignDefault,ColWidth 5.555555555555555e-2)
  ,(AlignDefault,ColWidth 5.555555555555555e-2)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
diff --git a/test/command/5708.md b/test/command/5708.md
index e4b99e4db..e5ba72c69 100644
--- a/test/command/5708.md
+++ b/test/command/5708.md
@@ -9,11 +9,7 @@
  [(AlignDefault,ColWidth 0.125)
  ,(AlignDefault,ColWidth 0.2361111111111111)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native
index 5ac4facb8..b4a16f7cf 100644
--- a/test/markdown-reader-more.native
+++ b/test/markdown-reader-more.native
@@ -134,13 +134,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
  ,(AlignDefault,ColWidth 0.16666666666666666)
  ,(AlignDefault,ColWidth 0.18055555555555555)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -198,13 +192,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
  ,(AlignLeft,ColWidth 0.16666666666666666)
  ,(AlignCenter,ColWidth 0.18055555555555555)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -230,13 +218,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
  ,(AlignDefault,ColWidth 0.16666666666666666)
  ,(AlignDefault,ColWidth 0.18055555555555555)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -262,13 +244,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
  ,(AlignDefault,ColWidth 0.16666666666666666)
  ,(AlignDefault,ColWidth 0.18055555555555555)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -300,11 +276,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
  [(AlignDefault,ColWidth 5.555555555555555e-2)
  ,(AlignDefault,ColWidth 5.555555555555555e-2)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
diff --git a/test/rst-reader.native b/test/rst-reader.native
index 93855cc00..9f8d5ab3e 100644
--- a/test/rst-reader.native
+++ b/test/rst-reader.native
@@ -346,13 +346,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
  ,(AlignDefault,ColWidth 0.15)
  ,(AlignDefault,ColWidth 0.1625)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -378,13 +372,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
  ,(AlignDefault,ColWidth 0.15)
  ,(AlignDefault,ColWidth 0.1625)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -410,13 +398,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
  ,(AlignDefault,ColWidth 0.15)
  ,(AlignDefault,ColWidth 0.1625)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
diff --git a/test/tables-rstsubset.native b/test/tables-rstsubset.native
index b584bb7ea..2536ff954 100644
--- a/test/tables-rstsubset.native
+++ b/test/tables-rstsubset.native
@@ -276,15 +276,7 @@
  ,(AlignDefault,ColWidth 0.15)
  ,(AlignDefault,ColWidth 0.3375)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
-- 
cgit v1.2.3


From 327e1428c5cdcb62beef6d2e00ab6f1d699256f0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 7 Jan 2021 16:41:25 -0800
Subject: gfm/commonmark writer: implement start number on ordered lists.

Previously they always started at 1, but according to the spec
the start number is respected. Closes #7009.
---
 src/Text/Pandoc/Writers/Markdown.hs | 5 ++++-
 test/command/7009.md                | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7009.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index c349fd713..1b5c00468 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -674,7 +674,10 @@ blockToMarkdown' opts (BulletList items) = do
   contents <- inList $ mapM (bulletListItemToMarkdown opts) items
   return $ (if isTightList items then vcat else vsep) contents <> blankline
 blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do
-  let start' = if isEnabled Ext_startnum opts then start else 1
+  variant <- asks envVariant
+  let start' = if variant == Commonmark || isEnabled Ext_startnum opts
+                  then start
+                  else 1
   let sty'   = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle
   let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim
   let attribs = (start', sty', delim')
diff --git a/test/command/7009.md b/test/command/7009.md
new file mode 100644
index 000000000..a060c6076
--- /dev/null
+++ b/test/command/7009.md
@@ -0,0 +1,8 @@
+```
+% pandoc -t gfm
+3. a
+4. b
+^D
+3.  a
+4.  b
+```
-- 
cgit v1.2.3


From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 8 Jan 2021 18:38:20 +0100
Subject: Update copyright notices for 2021 (#7012)

---
 COPYRIGHT                                       | 26 ++++++++++++-------------
 MANUAL.txt                                      |  2 +-
 README.md                                       |  2 +-
 README.template                                 |  2 +-
 app/pandoc.hs                                   |  2 +-
 benchmark/benchmark-pandoc.hs                   |  2 +-
 benchmark/weigh-pandoc.hs                       |  2 +-
 pandoc.cabal                                    |  2 +-
 src/Text/Pandoc.hs                              |  2 +-
 src/Text/Pandoc/App.hs                          |  2 +-
 src/Text/Pandoc/App/CommandLineOptions.hs       |  4 ++--
 src/Text/Pandoc/App/FormatHeuristics.hs         |  2 +-
 src/Text/Pandoc/App/Opt.hs                      |  2 +-
 src/Text/Pandoc/App/OutputSettings.hs           |  2 +-
 src/Text/Pandoc/Asciify.hs                      |  2 +-
 src/Text/Pandoc/BCP47.hs                        |  2 +-
 src/Text/Pandoc/CSS.hs                          |  2 +-
 src/Text/Pandoc/CSV.hs                          |  2 +-
 src/Text/Pandoc/Data.hs                         |  2 +-
 src/Text/Pandoc/Error.hs                        |  2 +-
 src/Text/Pandoc/Extensions.hs                   |  2 +-
 src/Text/Pandoc/Filter.hs                       |  2 +-
 src/Text/Pandoc/Filter/JSON.hs                  |  2 +-
 src/Text/Pandoc/Filter/Lua.hs                   |  2 +-
 src/Text/Pandoc/Filter/Path.hs                  |  2 +-
 src/Text/Pandoc/Highlighting.hs                 |  2 +-
 src/Text/Pandoc/Image.hs                        |  2 +-
 src/Text/Pandoc/ImageSize.hs                    |  2 +-
 src/Text/Pandoc/Logging.hs                      |  2 +-
 src/Text/Pandoc/Lua.hs                          |  2 +-
 src/Text/Pandoc/Lua/ErrorConversion.hs          |  2 +-
 src/Text/Pandoc/Lua/Filter.hs                   |  4 ++--
 src/Text/Pandoc/Lua/Global.hs                   |  2 +-
 src/Text/Pandoc/Lua/Init.hs                     |  2 +-
 src/Text/Pandoc/Lua/Marshaling.hs               |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AST.hs           |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/AnyValue.hs      |  2 +-
 src/Text/Pandoc/Lua/Marshaling/CommonState.hs   |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/Context.hs       |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/List.hs          |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/MediaBag.hs      |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/PandocError.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs |  4 ++--
 src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs   |  2 +-
 src/Text/Pandoc/Lua/Marshaling/Version.hs       |  2 +-
 src/Text/Pandoc/Lua/Module/MediaBag.hs          |  2 +-
 src/Text/Pandoc/Lua/Module/Pandoc.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/System.hs            |  2 +-
 src/Text/Pandoc/Lua/Module/Types.hs             |  2 +-
 src/Text/Pandoc/Lua/Module/Utils.hs             |  2 +-
 src/Text/Pandoc/Lua/Packages.hs                 |  2 +-
 src/Text/Pandoc/Lua/PandocLua.hs                |  2 +-
 src/Text/Pandoc/Lua/Util.hs                     |  4 ++--
 src/Text/Pandoc/Lua/Walk.hs                     |  4 ++--
 src/Text/Pandoc/MIME.hs                         |  2 +-
 src/Text/Pandoc/MediaBag.hs                     |  2 +-
 src/Text/Pandoc/Options.hs                      |  2 +-
 src/Text/Pandoc/PDF.hs                          |  2 +-
 src/Text/Pandoc/Parsing.hs                      |  2 +-
 src/Text/Pandoc/Process.hs                      |  2 +-
 src/Text/Pandoc/Readers.hs                      |  2 +-
 src/Text/Pandoc/Readers/BibTeX.hs               |  2 +-
 src/Text/Pandoc/Readers/CSV.hs                  |  2 +-
 src/Text/Pandoc/Readers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Readers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Readers/DocBook.hs              |  2 +-
 src/Text/Pandoc/Readers/Docx/Combine.hs         |  2 +-
 src/Text/Pandoc/Readers/Docx/Util.hs            |  2 +-
 src/Text/Pandoc/Readers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Readers/HTML/Parsing.hs         |  2 +-
 src/Text/Pandoc/Readers/HTML/Table.hs           |  4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs   |  2 +-
 src/Text/Pandoc/Readers/HTML/Types.hs           |  2 +-
 src/Text/Pandoc/Readers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Readers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs           |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs          |  2 +-
 src/Text/Pandoc/Readers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Readers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Readers/Metadata.hs             |  2 +-
 src/Text/Pandoc/Readers/Native.hs               |  2 +-
 src/Text/Pandoc/Readers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Readers/Org.hs                  |  2 +-
 src/Text/Pandoc/Readers/Org/BlockStarts.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Blocks.hs           |  2 +-
 src/Text/Pandoc/Readers/Org/DocumentTree.hs     |  2 +-
 src/Text/Pandoc/Readers/Org/ExportSettings.hs   |  2 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Meta.hs             |  2 +-
 src/Text/Pandoc/Readers/Org/ParserState.hs      |  2 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs          |  2 +-
 src/Text/Pandoc/Readers/Org/Shared.hs           |  2 +-
 src/Text/Pandoc/Readers/RST.hs                  |  2 +-
 src/Text/Pandoc/Readers/Textile.hs              |  2 +-
 src/Text/Pandoc/RoffChar.hs                     |  2 +-
 src/Text/Pandoc/SelfContained.hs                |  2 +-
 src/Text/Pandoc/Shared.hs                       |  2 +-
 src/Text/Pandoc/Slides.hs                       |  2 +-
 src/Text/Pandoc/Templates.hs                    |  2 +-
 src/Text/Pandoc/Translations.hs                 |  2 +-
 src/Text/Pandoc/UTF8.hs                         |  2 +-
 src/Text/Pandoc/UUID.hs                         |  2 +-
 src/Text/Pandoc/Writers.hs                      |  2 +-
 src/Text/Pandoc/Writers/AsciiDoc.hs             |  2 +-
 src/Text/Pandoc/Writers/CommonMark.hs           |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs              |  2 +-
 src/Text/Pandoc/Writers/CslJson.hs              |  2 +-
 src/Text/Pandoc/Writers/Custom.hs               |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs              |  2 +-
 src/Text/Pandoc/Writers/Docx.hs                 |  2 +-
 src/Text/Pandoc/Writers/Docx/StyleMap.hs        |  2 +-
 src/Text/Pandoc/Writers/DokuWiki.hs             |  2 +-
 src/Text/Pandoc/Writers/EPUB.hs                 |  2 +-
 src/Text/Pandoc/Writers/FB2.hs                  |  2 +-
 src/Text/Pandoc/Writers/HTML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Ipynb.hs                |  2 +-
 src/Text/Pandoc/Writers/JATS.hs                 |  2 +-
 src/Text/Pandoc/Writers/JATS/Table.hs           |  2 +-
 src/Text/Pandoc/Writers/JATS/Types.hs           |  2 +-
 src/Text/Pandoc/Writers/Jira.hs                 |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs                |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Caption.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Notes.hs          |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Table.hs          |  2 +-
 src/Text/Pandoc/Writers/Man.hs                  |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs             |  2 +-
 src/Text/Pandoc/Writers/MediaWiki.hs            |  2 +-
 src/Text/Pandoc/Writers/Ms.hs                   |  2 +-
 src/Text/Pandoc/Writers/Native.hs               |  2 +-
 src/Text/Pandoc/Writers/ODT.hs                  |  2 +-
 src/Text/Pandoc/Writers/OOXML.hs                |  2 +-
 src/Text/Pandoc/Writers/OPML.hs                 |  2 +-
 src/Text/Pandoc/Writers/Org.hs                  |  4 ++--
 src/Text/Pandoc/Writers/RST.hs                  |  2 +-
 src/Text/Pandoc/Writers/RTF.hs                  |  2 +-
 src/Text/Pandoc/Writers/Roff.hs                 |  2 +-
 src/Text/Pandoc/Writers/Shared.hs               |  2 +-
 src/Text/Pandoc/Writers/TEI.hs                  |  2 +-
 src/Text/Pandoc/Writers/Texinfo.hs              |  2 +-
 src/Text/Pandoc/Writers/Textile.hs              |  2 +-
 src/Text/Pandoc/Writers/ZimWiki.hs              |  2 +-
 src/Text/Pandoc/XML.hs                          |  2 +-
 test/Tests/Command.hs                           |  2 +-
 test/Tests/Helpers.hs                           |  2 +-
 test/Tests/Lua.hs                               |  2 +-
 test/Tests/Lua/Module.hs                        |  2 +-
 test/Tests/Old.hs                               |  2 +-
 test/Tests/Readers/Creole.hs                    |  2 +-
 test/Tests/Readers/EPUB.hs                      |  2 +-
 test/Tests/Readers/HTML.hs                      |  2 +-
 test/Tests/Readers/Jira.hs                      |  2 +-
 test/Tests/Readers/LaTeX.hs                     |  2 +-
 test/Tests/Readers/Man.hs                       |  2 +-
 test/Tests/Readers/Markdown.hs                  |  2 +-
 test/Tests/Readers/Odt.hs                       |  2 +-
 test/Tests/Readers/Org.hs                       |  2 +-
 test/Tests/Readers/Org/Block.hs                 |  2 +-
 test/Tests/Readers/Org/Block/CodeBlock.hs       |  2 +-
 test/Tests/Readers/Org/Block/Figure.hs          |  2 +-
 test/Tests/Readers/Org/Block/Header.hs          |  2 +-
 test/Tests/Readers/Org/Block/List.hs            |  2 +-
 test/Tests/Readers/Org/Block/Table.hs           |  2 +-
 test/Tests/Readers/Org/Directive.hs             |  2 +-
 test/Tests/Readers/Org/Inline.hs                |  2 +-
 test/Tests/Readers/Org/Inline/Citation.hs       |  2 +-
 test/Tests/Readers/Org/Inline/Note.hs           |  2 +-
 test/Tests/Readers/Org/Inline/Smart.hs          |  2 +-
 test/Tests/Readers/Org/Meta.hs                  |  2 +-
 test/Tests/Readers/Org/Shared.hs                |  2 +-
 test/Tests/Readers/RST.hs                       |  2 +-
 test/Tests/Readers/Txt2Tags.hs                  |  2 +-
 test/Tests/Shared.hs                            |  2 +-
 trypandoc/trypandoc.hs                          |  2 +-
 175 files changed, 200 insertions(+), 200 deletions(-)

(limited to 'src')

diff --git a/COPYRIGHT b/COPYRIGHT
index eb916cc54..9992e5680 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,5 +1,5 @@
 Pandoc
-Copyright (C) 2006-2020 John MacFarlane <jgm at berkeley dot edu>
+Copyright (C) 2006-2021 John MacFarlane <jgm at berkeley dot edu>
 
 With the exceptions noted below, this code is released under the [GPL],
 version 2 or later:
@@ -37,7 +37,7 @@ The modules in the `pandoc-types` repository (Text.Pandoc.Definition,
 Text.Pandoc.Builder, Text.Pandoc.Generics, Text.Pandoc.JSON,
 Text.Pandoc.Walk) are licensed under the BSD 3-clause license:
 
-Copyright (c) 2006-2020, John MacFarlane
+Copyright (c) 2006-2021, John MacFarlane
 
 All rights reserved.
 
@@ -73,7 +73,7 @@ Pandoc's templates (in `data/templates`) are dual-licensed as either
 GPL (v2 or higher, same as pandoc) or (at your option) the BSD
 3-clause license.
 
-Copyright (c) 2014--2020, John MacFarlane
+Copyright (c) 2014--2021, John MacFarlane
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Muse.hs
@@ -83,19 +83,19 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Texinfo.hs
-Copyright (C) 2008-2020 John MacFarlane and Peter Wang
+Copyright (C) 2008-2021 John MacFarlane and Peter Wang
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/OpenDocument.hs
-Copyright (C) 2008-2020 Andrea Rossato and John MacFarlane
+Copyright (C) 2008-2021 Andrea Rossato and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Writers/Org.hs
-Copyright (C) 2010-2020 Puneeth Chaganti, John MacFarlane, and
+Copyright (C) 2010-2021 Puneeth Chaganti, John MacFarlane, and
                         Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
@@ -115,7 +115,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/Textile.hs
-Copyright (C) 2010-2020 Paul Rivier and John MacFarlane
+Copyright (C) 2010-2021 Paul Rivier and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -133,7 +133,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 src/Text/Pandoc/Readers/EPUB.hs
-Copyright (C) 2014-2020 Matthew Pickering and John MacFarlane
+Copyright (C) 2014-2021 Matthew Pickering and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -141,7 +141,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Org.hs
 src/Text/Pandoc/Readers/Org/*
 test/Tests/Readers/Org/*
-Copyright (C) 2014-2020 Albert Krewinkel
+Copyright (C) 2014-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -149,7 +149,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Lua.hs
 src/Text/Pandoc/Lua/*
 test/lua/*
-Copyright (C) 2017--2020 Albert Krewinkel and John MacFarlane
+Copyright (C) 2017--2021 Albert Krewinkel and John MacFarlane
 
 Released under the GNU General Public License version 2 or later.
 
@@ -157,7 +157,7 @@ Released under the GNU General Public License version 2 or later.
 src/Text/Pandoc/Readers/Jira.hs
 src/Text/Pandoc/Writers/Jira.hs
 test/Tests/Readers/Jira.hs
-Copyright (C) 2019--2020 Albert Krewinkel
+Copyright (C) 2019--2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -169,7 +169,7 @@ Released under the GNU General Public License version 2 or later.
 
 ----------------------------------------------------------------------
 data/pandoc.lua
-Copyright (C) 2017-2020 Albert Krewinkel
+Copyright (C) 2017-2021 Albert Krewinkel
 
 Released under the GNU General Public License version 2 or later.
 
@@ -183,7 +183,7 @@ Released under the Do What the Fuck You Want To Public License.
 ------------------------------------------------------------------------
 Pandoc embeds a lua interpreter (via hslua).
 
-Copyright © 1994–2019 Lua.org, PUC-Rio.
+Copyright © 1994–2020 Lua.org, PUC-Rio.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/MANUAL.txt b/MANUAL.txt
index 831ffb0b0..aa75a32a9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -6329,7 +6329,7 @@ application, here are some things to keep in mind:
 
 # Authors
 
-Copyright 2006--2020 John MacFarlane (jgm@berkeley.edu). Released
+Copyright 2006--2021 John MacFarlane (jgm@berkeley.edu). Released
 under the [GPL], version 2 or greater.  This software carries no
 warranty of any kind.  (See COPYRIGHT for full copyright and
 warranty notices.) For a full list of contributors, see the file
diff --git a/README.md b/README.md
index aea55b0c4..9317438fc 100644
--- a/README.md
+++ b/README.md
@@ -236,7 +236,7 @@ opening a new issue.
 
 ## License
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL](http://www.gnu.org/copyleft/gpl.html "GNU General Public License"),
 version 2 or greater. This software carries no warranty of any kind.
 (See COPYRIGHT for full copyright and warranty notices.)
diff --git a/README.template b/README.template
index 5eeafb9e3..53d14b584 100644
--- a/README.template
+++ b/README.template
@@ -81,7 +81,7 @@ new issue.
 License
 -------
 
-© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the
+© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the
 [GPL], version 2 or greater.  This software carries no warranty of
 any kind.  (See COPYRIGHT for full copyright and warranty notices.)
 
diff --git a/app/pandoc.hs b/app/pandoc.hs
index 9ed3b9e9f..162570f18 100644
--- a/app/pandoc.hs
+++ b/app/pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Main
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs
index 3e7b663b4..b3e67bb14 100644
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-
-Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs
index 4ba6feb03..ad4c83ad7 100644
--- a/benchmark/weigh-pandoc.hs
+++ b/benchmark/weigh-pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2016-2019 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2016-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/pandoc.cabal b/pandoc.cabal
index cce3c1a58..bb68ea402 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -4,7 +4,7 @@ version:         2.11.3.2
 build-type:      Simple
 license:         GPL-2.0-or-later
 license-file:    COPYING.md
-copyright:       (c) 2006-2020 John MacFarlane
+copyright:       (c) 2006-2021 John MacFarlane
 author:          John MacFarlane <jgm@berkeley.edu>
 maintainer:      John MacFarlane <jgm@berkeley.edu>
 bug-reports:     https://github.com/jgm/pandoc/issues
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 549aeddfb..f09dfd8c7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index e6d5c93d4..725c76424 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 21ee47b7b..307f28b5c 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.CommandLineOptions
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -968,7 +968,7 @@ usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]")
 
 copyrightMessage :: String
 copyrightMessage = intercalate "\n" [
- "Copyright (C) 2006-2020 John MacFarlane. Web:  https://pandoc.org",
+ "Copyright (C) 2006-2021 John MacFarlane. Web:  https://pandoc.org",
  "This is free software; see the source for copying conditions. There is no",
  "warranty, not even for merchantability or fitness for a particular purpose." ]
 
diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 155b7e586..17ed30fe9 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.App.FormatHeuristics
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 6dd19758e..0d96ab67c 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE FlexibleContexts    #-}
 {- |
    Module      : Text.Pandoc.App.Opt
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 139b408cb..53c7d82ef 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE TupleSections       #-}
 {- |
    Module      : Text.Pandoc.App
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 9e9cc8d9b..01a7b624a 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Asciify
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index b41e93125..69824aa57 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
index d98c85147..625feadbb 100644
--- a/src/Text/Pandoc/CSS.hs
+++ b/src/Text/Pandoc/CSS.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Text.Pandoc.CSS
-Copyright   : © 2006-2020 John MacFarlane <jgm@berkeley.edu>,
+Copyright   : © 2006-2021 John MacFarlane <jgm@berkeley.edu>,
                 2015-2016 Mauro Bieg,
                 2015      Ophir Lifshitz <hangfromthefloor@gmail.com>
 License     : GNU GPL, version 2 or above
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index 10812644f..ec212fa9a 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.CSV
-   Copyright   : Copyright (C) 2017–2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs
index 38682b9f9..55ed3f5bf 100644
--- a/src/Text/Pandoc/Data.hs
+++ b/src/Text/Pandoc/Data.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE TemplateHaskell #-}
 {- |
 Module      : Text.Pandoc.Data
-Copyright   : Copyright (C) 2013-2020 John MacFarlane
+Copyright   : Copyright (C) 2013-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 2c311bb49..204cf15ca 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Error
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 9865f897b..39c2a0489 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Extensions
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs
index 6d4846f98..1209ceeb7 100644
--- a/src/Text/Pandoc/Filter.hs
+++ b/src/Text/Pandoc/Filter.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs
index 83ec9a97c..d2323fac4 100644
--- a/src/Text/Pandoc/Filter/JSON.hs
+++ b/src/Text/Pandoc/Filter/JSON.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Filter
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs
index a76c8da2f..c238e53d9 100644
--- a/src/Text/Pandoc/Filter/Lua.hs
+++ b/src/Text/Pandoc/Filter/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Lua
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs
index 9e5e8fa77..1d16c53b9 100644
--- a/src/Text/Pandoc/Filter/Path.hs
+++ b/src/Text/Pandoc/Filter/Path.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Filter.Path
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index ce8880f84..0bb6ed319 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Highlighting
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs
index e37de4e00..e0c938938 100644
--- a/src/Text/Pandoc/Image.hs
+++ b/src/Text/Pandoc/Image.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-}
 {- |
 Module      : Text.Pandoc.Image
-Copyright   : Copyright (C) 2020 John MacFarlane
+Copyright   : Copyright (C) 2020-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 9ce5c668d..098c16721 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -3,7 +3,7 @@
 {-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {- |
 Module      : Text.Pandoc.ImageSize
-Copyright   : Copyright (C) 2011-2020 John MacFarlane
+Copyright   : Copyright (C) 2011-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 825fdaadb..3a3acedc7 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Logging
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs
index 39db0074a..f0e9e076b 100644
--- a/src/Text/Pandoc/Lua.hs
+++ b/src/Text/Pandoc/Lua.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017–2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs
index 59c962723..4e6880722 100644
--- a/src/Text/Pandoc/Lua/ErrorConversion.hs
+++ b/src/Text/Pandoc/Lua/ErrorConversion.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.ErrorConversion
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 94d7adeb2..bffe01a34 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -1,8 +1,8 @@
 {-# LANGUAGE FlexibleContexts  #-}
 {- |
 Module      : Text.Pandoc.Lua.Filter
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs
index 4285be662..29b788f04 100644
--- a/src/Text/Pandoc/Lua/Global.hs
+++ b/src/Text/Pandoc/Lua/Global.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index e89e9d6e0..0a5ce85cb 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs
index 1254402b6..f517c7c27 100644
--- a/src/Text/Pandoc/Lua/Marshaling.hs
+++ b/src/Text/Pandoc/Lua/Marshaling.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index c889618c4..6485da661 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -2,8 +2,8 @@
 {-# LANGUAGE LambdaCase           #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AST
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
index c4720aedf..82e26b963 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AnyValue
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
index 636650af3..147197c5d 100644
--- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings    #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.CommonState
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs
index c0e7aef60..606bdcfb2 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Context.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs
@@ -1,8 +1,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Context
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs
index e6614400d..0446302a1 100644
--- a/src/Text/Pandoc/Lua/Marshaling/List.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/List.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE UndecidableInstances #-}
 {- |
 Module      : Text.Pandoc.Lua.Marshaling.List
-Copyright   : © 2012-2020 John MacFarlane
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
index 2cf5b8893..70bd010a0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.MediaBag
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    Stability   : alpha
diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
index 74537a1dd..f698704e0 100644
--- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.PandocError
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
index 2e45affe4..dd7bf2e61 100644
--- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs
@@ -4,8 +4,8 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.ReaderOptions
-   Copyright   : © 2012-2020 John MacFarlane
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
index 98fa1efa4..6d43039fa 100644
--- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.SimpleTable
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs
index 9adb1b763..4f4ffac51 100644
--- a/src/Text/Pandoc/Lua/Marshaling/Version.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/Version.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables  #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.Version
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index e5a10217a..715e53885 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.MediaBag
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 3886568b7..a9ce3866d 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Pandoc
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs
index 04508e461..bd35babaf 100644
--- a/src/Text/Pandoc/Lua/Module/System.hs
+++ b/src/Text/Pandoc/Lua/Module/System.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.System
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs
index 999f2e588..bb4f02c3c 100644
--- a/src/Text/Pandoc/Lua/Module/Types.hs
+++ b/src/Text/Pandoc/Lua/Module/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Lua.Module.Types
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 7595b9c0f..1b04021a7 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Module.Utils
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 4c3b9d79d..d62fb725d 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Packages
-   Copyright   : Copyright © 2017-2020 Albert Krewinkel
+   Copyright   : Copyright © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs
index 6c3b410dd..4beac22b7 100644
--- a/src/Text/Pandoc/Lua/PandocLua.hs
+++ b/src/Text/Pandoc/Lua/PandocLua.hs
@@ -9,7 +9,7 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {- |
    Module      : Text.Pandoc.Lua.PandocLua
-   Copyright   : Copyright © 2020 Albert Krewinkel
+   Copyright   : Copyright © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs
index fbd013801..70a8a6d47 100644
--- a/src/Text/Pandoc/Lua/Util.hs
+++ b/src/Text/Pandoc/Lua/Util.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Lua.Util
-   Copyright   : © 2012–2020 John MacFarlane,
-                 © 2017-2020 Albert Krewinkel
+   Copyright   : © 2012-2021 John MacFarlane,
+                 © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs
index 695c7b44e..d6d973496 100644
--- a/src/Text/Pandoc/Lua/Walk.hs
+++ b/src/Text/Pandoc/Lua/Walk.hs
@@ -4,8 +4,8 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
 Module      : Text.Pandoc.Lua.Walk
-Copyright   : © 2012–2020 John MacFarlane,
-              © 2017-2020 Albert Krewinkel
+Copyright   : © 2012-2021 John MacFarlane,
+              © 2017-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 Stability   : alpha
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 4fe25ebe1..53c5cd018 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.MIME
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 26f44cef0..3249bcdeb 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {- |
    Module      : Text.Pandoc.MediaBag
-   Copyright   : Copyright (C) 2014-2015, 2017–2020 John MacFarlane
+   Copyright   : Copyright (C) 2014-2015, 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index ecd65a54d..92bda36b2 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE TemplateHaskell    #-}
 {- |
    Module      : Text.Pandoc.Options
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index c4080a227..7c0082c29 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.PDF
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index fd14341ad..4c4dd531d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -9,7 +9,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs
index 866972e3f..b896feb7e 100644
--- a/src/Text/Pandoc/Process.hs
+++ b/src/Text/Pandoc/Process.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Process
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 9a069f7d0..ac70f7d4c 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Readers
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 6c96ab30a..956b9f1f7 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.BibTeX
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 45f4d88d4..2958d6180 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CSV
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 9eef498e1..150a837e4 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 377186b1e..30bb19483 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b0846e345..d3b2dd4d3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.DocBook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 46112af19..bcf26c4a3 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.Combine
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2020 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index a573344ff..f9c9a8e26 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,7 +1,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 8e94a0812..afc7a3e25 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index e28ebe77b..4c069e2c3 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 91639fa4c..6179ea8e7 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -3,8 +3,8 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
-   Copyright   : © 2006-2020 John MacFarlane,
-                   2020 Albert Krewinkel
+   Copyright   : © 2006-2021 John MacFarlane,
+                   2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 4f82a1831..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.TagCategories
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs
index a94eeb828..12c519ad6 100644
--- a/src/Text/Pandoc/Readers/HTML/Types.hs
+++ b/src/Text/Pandoc/Readers/HTML/Types.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE MultiParamTypeClasses #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Types
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index a866e6ec3..70296bb6b 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 9266ce10d..5a5d0ee1e 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : © 2019-2020 Albert Krewinkel
+   Copyright   : © 2019-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 14a41a911..f49323996 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 814b2fe79..5f634818e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Lang
-   Copyright   : Copyright (C) 2018-2020 John MacFarlane
+   Copyright   : Copyright (C) 2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index e92ed387c..12a3ba2f6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables   #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Parsing
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index a017a2afb..f8c214318 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5888bf095..5c3a21bb7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d712b1120..cdb746c67 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.MediaWiki
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index a64b130e5..927291776 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Readers.Metadata
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 42843381a..9c8bc0374 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Native
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 903cdf4a1..5b8996025 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 851aec103..afeb27a87 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 8f7cac6ea..14233569c 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.BlockStarts
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 17e3ff986..6bd046e04 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 3b363270c..2dcbecb1d 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.DocumentTree
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index 9399ebd54..401e1bd8f 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ExportSettings
-   Copyright   : © 2016–2020 Albert Krewinkel
+   Copyright   : © 2016-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index b819a34a3..68c2ba5e0 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Inlines
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 4864d9478..6621822a2 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Meta
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 1e4799e7b..abe8a9ebf 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.ParserState
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index bce71c24d..d33920d47 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Readers.Org.Parsing
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 7f72077a4..ad7c65060 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Shared
-   Copyright   : Copyright (C) 2014-2020 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index eeb3d1389..707af905f 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Readers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 6691d8381..4991c6308 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Text.Pandoc.Readers.Textile
    Copyright   : Copyright (C) 2010-2012 Paul Rivier
-                               2010-2020 John MacFarlane
+                               2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Paul Rivier <paul*rivier#demotera*com>
diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs
index 67e8b9cd5..d1c38204f 100644
--- a/src/Text/Pandoc/RoffChar.hs
+++ b/src/Text/Pandoc/RoffChar.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.RoffChar
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 061361aba..c9e20cad0 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE TupleSections     #-}
 {- |
    Module      : Text.Pandoc.SelfContained
-   Copyright   : Copyright (C) 2011-2020 John MacFarlane
+   Copyright   : Copyright (C) 2011-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 4853621c8..b908a0172 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE OverloadedStrings     #-}
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
index 9ea0389c9..a3e550b1f 100644
--- a/src/Text/Pandoc/Slides.hs
+++ b/src/Text/Pandoc/Slides.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Slides
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 0c10b258d..e83f26329 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Templates
-   Copyright   : Copyright (C) 2009-2020 John MacFarlane
+   Copyright   : Copyright (C) 2009-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 200d756f6..0c7d7ab23 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Translations
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 4621e1765..b583dbbdb 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.UTF8
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
index ca0df2d0b..12579be90 100644
--- a/src/Text/Pandoc/UUID.hs
+++ b/src/Text/Pandoc/UUID.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.UUID
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 0654c2d85..49531d924 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -4,7 +4,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 0a312d1d1..b4ef7c8b9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.AsciiDoc
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs
index 66ded218f..8733b7149 100644
--- a/src/Text/Pandoc/Writers/CommonMark.hs
+++ b/src/Text/Pandoc/Writers/CommonMark.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.CommonMark
-   Copyright   : Copyright (C) 2015-2020 John MacFarlane
+   Copyright   : Copyright (C) 2015-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0a6313513..4d44842e2 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.ConTeXt
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index 08310de65..13b95586b 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.CslJson
-   Copyright   : Copyright (C) 2020 John MacFarlane
+   Copyright   : Copyright (C) 2020-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs
index 8da611b61..58c4bb5be 100644
--- a/src/Text/Pandoc/Writers/Custom.hs
+++ b/src/Text/Pandoc/Writers/Custom.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings  #-}
 {- |
    Module      : Text.Pandoc.Writers.Custom
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index affa0de04..a6776608d 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 65946ec88..a99e13a85 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -7,7 +7,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.Docx
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
index c3c54c7e5..04868eaad 100644
--- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs
+++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs
@@ -2,7 +2,7 @@
 {- |
    Module : Text.Pandoc.Writers.Docx.StyleMap
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
-                   2014-2020 John MacFarlane <jgm@berkeley.edu>,
+                   2014-2021 John MacFarlane <jgm@berkeley.edu>,
                    2015-2019 Nikolay Yakimov <root@livid.pp.ru>
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 90ec6824f..7df47c912 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.DokuWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Clare Macrae <clare.macrae@googlemail.com>
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index c8f5ff60d..3e3fd8fd6 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.EPUB
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 701ff3d9b..25b1f28d1 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -3,7 +3,7 @@
 {- |
 Module      : Text.Pandoc.Writers.FB2
 Copyright   : Copyright (C) 2011-2012 Sergey Astanin
-                            2012-2020 John MacFarlane
+                            2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 
 Maintainer  : John MacFarlane
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 0b051c8f0..26df0325e 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -6,7 +6,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.HTML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs
index d01d5a7e5..2613851c5 100644
--- a/src/Text/Pandoc/Writers/Ipynb.hs
+++ b/src/Text/Pandoc/Writers/Ipynb.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Writers.Ipynb
-   Copyright   : Copyright (C) 2019-2020 John MacFarlane
+   Copyright   : Copyright (C) 2019-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 7058a4557..e8d93b8d5 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index a4d42832d..465480f59 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS.Table
-   Copyright   : © 2020 Albert Krewinkel
+   Copyright   : © 2020-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 8162f3bc0..54ed4a8bd 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -1,6 +1,6 @@
 {- |
    Module      : Text.Pandoc.Writers.JATS.Types
-   Copyright   : Copyright (C) 2017-2020 John MacFarlane
+   Copyright   : Copyright (C) 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index 6bc048a61..c21085a4f 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE PatternGuards #-}
 {- |
    Module      : Text.Pandoc.Writers.Jira
-   Copyright   : © 2010-2020 Albert Krewinkel, John MacFarlane
+   Copyright   : © 2010-2021 Albert Krewinkel, John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 2281290c0..df922e17b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
index 61ca41fb1..7b9ce186f 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Caption
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
index 216a7bfc3..f225ef0c5 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Notes.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Notes
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 8411d9f80..6f8386937 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Table
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 4eb0db042..edb70f53e 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Man
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 1b5c00468..d62727d90 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -5,7 +5,7 @@
 {-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.Markdown
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index d1912caa6..5029be69f 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.MediaWiki
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 0fc333bc2..48395c420 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Ms
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 4d4dfca15..9c2ce805d 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Native
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index e41fb7176..05dfad5eb 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.ODT
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index ac991b594..3ac007f4e 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.OOXML
-   Copyright   : Copyright (C) 2012-2020 John MacFarlane
+   Copyright   : Copyright (C) 2012-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs
index 810a94775..8c9229fc0 100644
--- a/src/Text/Pandoc/Writers/OPML.hs
+++ b/src/Text/Pandoc/Writers/OPML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE CPP               #-}
 {- |
    Module      : Text.Pandoc.Writers.OPML
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 2af93017d..43ebf1807 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -3,8 +3,8 @@
 {- |
    Module      : Text.Pandoc.Writers.Org
    Copyright   : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com>
-                   2010-2020 John MacFarlane <jgm@berkeley.edu>
-                   2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+                   2010-2021 John MacFarlane <jgm@berkeley.edu>
+                   2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 8beeef46a..a3be1d723 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.RST
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index e3966ed07..cf27011c2 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
    Module      : Text.Pandoc.Writers.RTF
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs
index 00b027cc9..6af56242f 100644
--- a/src/Text/Pandoc/Writers/Roff.hs
+++ b/src/Text/Pandoc/Writers/Roff.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Roff
-   Copyright   : Copyright (C) 2007-2020 John MacFarlane
+   Copyright   : Copyright (C) 2007-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 129e45e9d..fc3f8ff3a 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Shared
-   Copyright   : Copyright (C) 2013-2020 John MacFarlane
+   Copyright   : Copyright (C) 2013-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index a9ee5eece..b926c48a1 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE PatternGuards     #-}
 {- |
    Module      : Text.Pandoc.Writers.Docbook
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index c6debd9ce..53da70f84 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.Texinfo
-   Copyright   : Copyright (C) 2008-2020 John MacFarlane
+   Copyright   : Copyright (C) 2008-2021 John MacFarlane
                                2012 Peter Wang
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 61ddb7497..03d030477 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.Textile
-   Copyright   : Copyright (C) 2010-2020 John MacFarlane
+   Copyright   : Copyright (C) 2010-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 902b093d3..9e45f0417 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Writers.ZimWiki
-   Copyright   : © 2008-2020 John MacFarlane,
+   Copyright   : © 2008-2021 John MacFarlane,
                    2017-2019 Alex Ivkin
    License     : GNU GPL, version 2 or above
 
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 4b71d7b69..c4e3ed1e7 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.XML
-   Copyright   : Copyright (C) 2006-2020 John MacFarlane
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 522c4b3a1..b3e2a0509 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Command
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs
index c9ee6d206..21898d10e 100644
--- a/test/Tests/Helpers.hs
+++ b/test/Tests/Helpers.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE FlexibleInstances    #-}
 {- |
    Module      : Tests.Helpers
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs
index 853375327..1dfbbd053 100644
--- a/test/Tests/Lua.hs
+++ b/test/Tests/Lua.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Lua
-   Copyright   : © 2017-2020 Albert Krewinkel
+   Copyright   : © 2017-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs
index e2570e87f..d88633cf8 100644
--- a/test/Tests/Lua/Module.hs
+++ b/test/Tests/Lua/Module.hs
@@ -1,6 +1,6 @@
 {- |
 Module      : Tests.Lua.Module
-Copyright   : © 2019-2020 Albert Krewinkel
+Copyright   : © 2019-2021 Albert Krewinkel
 License     : GNU GPL, version 2 or above
 
 Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index cf0396d0a..638620a36 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Old
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs
index 15b826460..1fc0e62d7 100644
--- a/test/Tests/Readers/Creole.hs
+++ b/test/Tests/Readers/Creole.hs
@@ -3,7 +3,7 @@
 {- |
    Module      : Tests.Readers.Creole
    Copyright   : © 2017 Sascha Wilde
-                   2017-2020 John MacFarlane
+                   2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Sascha Wilde <wilde@sha-bang.de>
diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs
index 700d6723d..3c75dd08d 100644
--- a/test/Tests/Readers/EPUB.hs
+++ b/test/Tests/Readers/EPUB.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Readers.EPUB
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.eu>
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index e4c681421..578c76860 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.HTML
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index 32b8ecb7c..bf78fe1fe 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.Jira
-   Copyright   : © 2019-2020 Albert Krewinel
+   Copyright   : © 2019-2021 Albert Krewinel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index c50c91ca1..2a52ffd18 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.LaTeX
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index f591aa00d..4f3ab5a28 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -2,7 +2,7 @@
 {- |
    Module      : Tests.Readers.Man
    Copyright   : © 2018-2019 Yan Pas <yanp.bugz@gmail.com>,
-                   2018-2020 John MacFarlane
+                   2018-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 3af5e2a94..18f909583 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Markdown
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index 14062c884..f5e427ba2 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Odt
-   Copyright   : © 2015-2020 John MacFarlane
+   Copyright   : © 2015-2021 John MacFarlane
                    2015 Martin Linnemann
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Readers/Org.hs b/test/Tests/Readers/Org.hs
index d4f7bb6dc..290bb603e 100644
--- a/test/Tests/Readers/Org.hs
+++ b/test/Tests/Readers/Org.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs
index 995bd0316..2ce07c4bb 100644
--- a/test/Tests/Readers/Org/Block.hs
+++ b/test/Tests/Readers/Org/Block.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index 2648a6e1f..d40c3bc1d 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.CodeBlock
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs
index 56ddde9d8..8822f5b03 100644
--- a/test/Tests/Readers/Org/Block/Figure.hs
+++ b/test/Tests/Readers/Org/Block/Figure.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Figure
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs
index d38d26efb..887055451 100644
--- a/test/Tests/Readers/Org/Block/Header.hs
+++ b/test/Tests/Readers/Org/Block/Header.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index 15373b3b3..ac03c583b 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs
index 31c994d3f..cb38fcc12 100644
--- a/test/Tests/Readers/Org/Block/Table.hs
+++ b/test/Tests/Readers/Org/Block/Table.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Table
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs
index 727a29658..00cb9762b 100644
--- a/test/Tests/Readers/Org/Directive.hs
+++ b/test/Tests/Readers/Org/Directive.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Directive
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index b5d0caa26..13e9fef21 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 792e4559c..87bb3ca75 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Citation
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs
index 5924e69cc..20157d2ae 100644
--- a/test/Tests/Readers/Org/Inline/Note.hs
+++ b/test/Tests/Readers/Org/Inline/Note.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Note
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs
index e9309108e..7fde380af 100644
--- a/test/Tests/Readers/Org/Inline/Smart.hs
+++ b/test/Tests/Readers/Org/Inline/Smart.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Smart
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index bc167f2a5..b30b8949a 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Meta
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs
index 184adee44..4d0848575 100644
--- a/test/Tests/Readers/Org/Shared.hs
+++ b/test/Tests/Readers/Org/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Shared
-   Copyright   : © 2014-2020 Albert Krewinkel
+   Copyright   : © 2014-2021 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
    Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs
index bd9897ebc..68241b7f9 100644
--- a/test/Tests/Readers/RST.hs
+++ b/test/Tests/Readers/RST.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Tests.Readers.RST
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs
index 989b7f673..62f336690 100644
--- a/test/Tests/Readers/Txt2Tags.hs
+++ b/test/Tests/Readers/Txt2Tags.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Txt2Tags
-   Copyright   : © 2014-2020 John MacFarlane,
+   Copyright   : © 2014-2021 John MacFarlane,
                  © 2014 Matthew Pickering
    License     : GNU GPL, version 2 or above
 
diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs
index a23edf452..72a59fec0 100644
--- a/test/Tests/Shared.hs
+++ b/test/Tests/Shared.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
-   Copyright   : © 2006-2020 John MacFarlane
+   Copyright   : © 2006-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs
index 9383c92ce..9a53aa18c 100644
--- a/trypandoc/trypandoc.hs
+++ b/trypandoc/trypandoc.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
-   Copyright   : © 2014-2020 John MacFarlane <jgm@berkeley.edu>
+   Copyright   : © 2014-2021 John MacFarlane <jgm@berkeley.edu>
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-- 
cgit v1.2.3


From fe1378227b24fa6a8661b2e0d377b808eb270c52 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 9 Jan 2021 13:35:07 +0100
Subject: Org reader: allow multiple pipe chars in todo sequences

Additional pipe chars, used to separate "action" state from "no further
action" states, are ignored. E.g., for the following sequence, both
`DONE` and `FINISHED` are states with no further action required.

    #+TODO: UNFINISHED | DONE | FINISHED

Previously, parsing of the todo sequence failed if multiple pipe chars
were included.

Closes: #7014
---
 src/Text/Pandoc/Readers/Org/Meta.hs | 14 ++++++++++----
 test/Tests/Readers/Org/Meta.hs      | 10 ++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 6621822a2..a1b21046a 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -239,7 +239,7 @@ lineOfInlines = do
 todoSequence :: Monad m => OrgParser m TodoSequence
 todoSequence = try $ do
   todoKws <- todoKeywords
-  doneKws <- optionMaybe $ todoDoneSep *> todoKeywords
+  doneKws <- optionMaybe $ todoDoneSep *> doneKeywords
   newline
   -- There must be at least one DONE keyword. The last TODO keyword is
   -- taken if necessary.
@@ -250,11 +250,17 @@ todoSequence = try $ do
                     (x:xs) -> return $ keywordsToSequence (reverse xs) [x]
 
  where
+   todoKeyword :: Monad m => OrgParser m Text
+   todoKeyword = many1Char nonspaceChar <* skipSpaces
+
    todoKeywords :: Monad m => OrgParser m [Text]
    todoKeywords = try $
-     let keyword = many1Char nonspaceChar <* skipSpaces
-         endOfKeywords = todoDoneSep <|> void newline
-     in manyTill keyword (lookAhead endOfKeywords)
+     let endOfKeywords = todoDoneSep <|> void newline
+     in manyTill todoKeyword (lookAhead endOfKeywords)
+
+   doneKeywords :: Monad m => OrgParser m [Text]
+   doneKeywords = try $
+     manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline)
 
    todoDoneSep :: Monad m => OrgParser m ()
    todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index b30b8949a..3c50f891b 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -116,6 +116,16 @@ tests =
       "#+LANGUAGE: de-DE" =?>
       Pandoc (setMeta "lang" (MetaString "de-DE") nullMeta) mempty
 
+    , testGroup "Todo sequences"
+      [ "not included in document" =:
+        "#+todo: WAITING | FINISHED" =?>
+        Pandoc mempty mempty
+
+      , "can contain multiple pipe characters" =:
+        "#+todo: UNFINISHED | RESEARCH | NOTES | CHART\n" =?>
+        Pandoc mempty mempty
+      ]
+
     , testGroup "LaTeX"
       [ "LATEX_HEADER" =:
         "#+latex_header: \\usepackage{tikz}" =?>
-- 
cgit v1.2.3


From 15e33b33b4df67f989062a356e009ffc596bbc32 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 10 Jan 2021 10:15:30 -0800
Subject: T.P.Citeproc: refactor and export `getReferences`.

See #7016.
---
 src/Text/Pandoc/Citeproc.hs | 79 +++++++++++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index e87ddcbcd..bd54ca2bf 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -5,7 +5,9 @@
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc
-  ( processCitations )
+  ( processCitations,
+    getReferences
+  )
 where
 
 import Citeproc
@@ -46,7 +48,7 @@ import Safe (lastMay, initSafe)
 -- import Debug.Trace as Trace (trace, traceShowId)
 
 
-processCitations :: PandocMonad m => Pandoc -> m Pandoc
+processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
   let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
                 >>= metaValueToText
@@ -96,31 +98,9 @@ processCitations (Pandoc meta bs) = do
                ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
                  metaValueToText)
   let locale = Citeproc.mergeLocales mblang style
-  let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
-      getCiteId _ = mempty
-  let metanocites = lookupMeta "nocite" meta
-  let meta' = deleteMeta "nocite" meta
-  let nocites = maybe mempty (query getCiteId) metanocites
-  let citeIds = query getCiteId (Pandoc meta bs)
-  let idpred = if "*" `Set.member` nocites
-                  then const True
-                  else (`Set.member` citeIds)
-  let inlineRefs = case lookupMeta "references" meta of
-                    Just (MetaList rs) -> mapMaybe metaValueToReference rs
-                    _                  -> []
-  externalRefs <- case lookupMeta "bibliography" meta of
-                    Just (MetaList xs) ->
-                      mconcat <$>
-                        mapM (getRefsFromBib locale idpred)
-                          (mapMaybe metaValueToText xs)
-                    Just x ->
-                      case metaValueToText x of
-                        Just fp -> getRefsFromBib locale idpred fp
-                        Nothing -> return []
-                    Nothing -> return []
-  let refs = map (linkifyVariables . legacyDateRanges)
-                 (externalRefs ++ inlineRefs)
-                 -- note that inlineRefs can override externalRefs
+
+  refs <- getReferences (Just locale) (Pandoc meta bs)
+
   let otherIdsMap = foldr (\ref m ->
                              case T.words . extractText <$>
                                   M.lookup "other-ids"
@@ -130,8 +110,10 @@ processCitations (Pandoc meta bs) = do
                                   (\id' ->
                                     M.insert id' (referenceId ref)) m ids)
                           M.empty refs
-  -- TODO: issue warning if no refs defined
+  let meta' = deleteMeta "nocite" meta
   let citations = getCitations locale otherIdsMap $ Pandoc meta' bs
+
+
   let linkCites = maybe False truish $ lookupMeta "link-citations" meta
   let opts = defaultCiteprocOptions{ linkCitations = linkCites }
   let result = Citeproc.citeproc opts style (localeLanguage locale)
@@ -161,6 +143,7 @@ processCitations (Pandoc meta bs) = do
                       B.toList . movePunctuationInsideQuotes .  B.fromList
                     _ -> id
 
+  let metanocites = lookupMeta "nocite" meta
   let Pandoc meta'' bs' =
          maybe id (setMeta "nocite") metanocites .
          walk (map capitalizeNoteCitation .
@@ -172,6 +155,46 @@ processCitations (Pandoc meta bs) = do
          $ insertRefs refkvs classes meta''
             (walk fixLinks $ B.toList bibs) bs'
 
+-- | Get references defined inline in the metadata and via an external
+-- bibliography.  Only references that are actually cited in the document
+-- (either with a genuine citation or with `nocite`) are returned.
+-- URL variables are converted to links.
+getReferences :: PandocMonad m
+              => Maybe Locale -> Pandoc -> m [Reference Inlines]
+getReferences mblocale (Pandoc meta bs) = do
+  let lang = maybe (Lang "en" (Just "US")) (parseLang . stringify) $
+             lookupMeta "lang" meta
+  let locale = case mblocale of
+                Just l  -> l
+                Nothing -> either mempty id $ getLocale lang
+
+  let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
+      getCiteId _ = mempty
+  let metanocites = lookupMeta "nocite" meta
+  let nocites = maybe mempty (query getCiteId) metanocites
+  let citeIds = query getCiteId (Pandoc meta bs)
+  let idpred = if "*" `Set.member` nocites
+                  then const True
+                  else (`Set.member` citeIds)
+  let inlineRefs = case lookupMeta "references" meta of
+                    Just (MetaList rs) -> mapMaybe metaValueToReference rs
+                    _                  -> []
+  externalRefs <- case lookupMeta "bibliography" meta of
+                    Just (MetaList xs) ->
+                      mconcat <$>
+                        mapM (getRefsFromBib locale idpred)
+                          (mapMaybe metaValueToText xs)
+                    Just x ->
+                      case metaValueToText x of
+                        Just fp -> getRefsFromBib locale idpred fp
+                        Nothing -> return []
+                    Nothing -> return []
+  return $ map (linkifyVariables . legacyDateRanges)
+               (externalRefs ++ inlineRefs)
+            -- note that inlineRefs can override externalRefs
+
+
+
 -- If we have a span.csl-left-margin followed by span.csl-right-inline,
 -- we insert a space. This ensures that they will be separated by a space,
 -- even in formats that don't have special handling for the display spans.
-- 
cgit v1.2.3


From 402d984bc53773e1876000d0d9857b053c002904 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 10 Jan 2021 10:25:58 -0800
Subject: T.P.Citeproc: factor out getLang.

---
 src/Text/Pandoc/Citeproc.hs | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index bd54ca2bf..e81f93cdd 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -94,9 +94,8 @@ processCitations (Pandoc meta bs) = do
     case styleRes of
        Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
        Right style -> return style{ styleAbbreviations = mbAbbrevs }
-  mblang <- maybe (return Nothing) bcp47LangToIETF
-               ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
-                 metaValueToText)
+
+  mblang <- getLang meta
   let locale = Citeproc.mergeLocales mblang style
 
   refs <- getReferences (Just locale) (Pandoc meta bs)
@@ -155,6 +154,12 @@ processCitations (Pandoc meta bs) = do
          $ insertRefs refkvs classes meta''
             (walk fixLinks $ B.toList bibs) bs'
 
+-- Retrieve citeproc lang based on metadata.
+getLang :: PandocMonad m => Meta -> m (Maybe Lang)
+getLang meta = maybe (return Nothing) bcp47LangToIETF
+                 ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>=
+                   metaValueToText)
+
 -- | Get references defined inline in the metadata and via an external
 -- bibliography.  Only references that are actually cited in the document
 -- (either with a genuine citation or with `nocite`) are returned.
@@ -162,11 +167,13 @@ processCitations (Pandoc meta bs) = do
 getReferences :: PandocMonad m
               => Maybe Locale -> Pandoc -> m [Reference Inlines]
 getReferences mblocale (Pandoc meta bs) = do
-  let lang = maybe (Lang "en" (Just "US")) (parseLang . stringify) $
-             lookupMeta "lang" meta
-  let locale = case mblocale of
-                Just l  -> l
-                Nothing -> either mempty id $ getLocale lang
+  locale <- case mblocale of
+                Just l  -> return l
+                Nothing -> do
+                  mblang <- getLang meta
+                  case mblang of
+                    Just lang -> return $ either mempty id $ getLocale lang
+                    Nothing   -> return mempty
 
   let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs
       getCiteId _ = mempty
-- 
cgit v1.2.3


From d98ec4feb80ae0a9bf192c9b14aaef033ba7fd6e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 10 Jan 2021 11:48:53 -0800
Subject: T.P.Citeproc: factor out and export `getStyle`.

---
 src/Text/Pandoc/Citeproc.hs | 100 ++++++++++++++++++++++++--------------------
 1 file changed, 55 insertions(+), 45 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index e81f93cdd..9649c6971 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -6,7 +6,8 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Citeproc
   ( processCitations,
-    getReferences
+    getReferences,
+    getStyle
   )
 where
 
@@ -50,50 +51,7 @@ import Safe (lastMay, initSafe)
 
 processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
-  let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
-                >>= metaValueToText
-
-  let getFile defaultExtension fp = do
-        oldRp <- getResourcePath
-        mbUdd <- getUserDataDir
-        setResourcePath $ oldRp ++ maybe []
-                                   (\u -> [u <> "/csl",
-                                           u <> "/csl/dependent"]) mbUdd
-        let fp' = if T.any (=='.') fp || "data:" `T.isPrefixOf` fp
-                     then fp
-                     else fp <> defaultExtension
-        (result, _) <- fetchItem fp'
-        setResourcePath oldRp
-        return result
-
-  let getCslDefault = readDataFile "default.csl"
-
-  cslContents <- UTF8.toText <$> maybe getCslDefault (getFile ".csl") cslfile
-
-  let abbrevFile = lookupMeta "citation-abbreviations" meta >>= metaValueToText
-
-  mbAbbrevs <- case abbrevFile of
-                 Nothing -> return Nothing
-                 Just fp -> do
-                   rawAbbr <- getFile ".json" fp
-                   case eitherDecode (L.fromStrict rawAbbr) of
-                     Left err -> throwError $ PandocCiteprocError $
-                                 CiteprocParseError $
-                                 "Could not parse abbreviations file " <> fp
-                                 <> "\n" <> T.pack err
-                     Right abbr -> return $ Just abbr
-
-  let getParentStyle url = do
-        -- first, try to retrieve the style locally, then use HTTP.
-        let basename = T.takeWhileEnd (/='/') url
-        UTF8.toText <$>
-          catchError (getFile ".csl" basename) (\_ -> fst <$> fetchItem url)
-
-  styleRes <- Citeproc.parseStyle getParentStyle cslContents
-  style <-
-    case styleRes of
-       Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
-       Right style -> return style{ styleAbbreviations = mbAbbrevs }
+  style <- getStyle (Pandoc meta bs)
 
   mblang <- getLang meta
   let locale = Citeproc.mergeLocales mblang style
@@ -154,6 +112,58 @@ processCitations (Pandoc meta bs) = do
          $ insertRefs refkvs classes meta''
             (walk fixLinks $ B.toList bibs) bs'
 
+-- | Retrieve the CSL style specified by the csl or citation-style
+-- metadata field in a pandoc document, or the default CSL style
+-- if none is specified.  Retrieve the parent style
+-- if the style is a dependent style.  Add abbreviations defined
+-- in an abbreviation file if one has been specified.
+getStyle :: PandocMonad m => Pandoc -> m (Style Inlines)
+getStyle (Pandoc meta _) = do
+  let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
+                >>= metaValueToText
+
+  let getFile defaultExtension fp = do
+        oldRp <- getResourcePath
+        mbUdd <- getUserDataDir
+        setResourcePath $ oldRp ++ maybe []
+                                   (\u -> [u <> "/csl",
+                                           u <> "/csl/dependent"]) mbUdd
+        let fp' = if T.any (=='.') fp || "data:" `T.isPrefixOf` fp
+                     then fp
+                     else fp <> defaultExtension
+        (result, _) <- fetchItem fp'
+        setResourcePath oldRp
+        return result
+
+  let getCslDefault = readDataFile "default.csl"
+
+  cslContents <- UTF8.toText <$> maybe getCslDefault (getFile ".csl") cslfile
+
+  let abbrevFile = lookupMeta "citation-abbreviations" meta >>= metaValueToText
+
+  mbAbbrevs <- case abbrevFile of
+                 Nothing -> return Nothing
+                 Just fp -> do
+                   rawAbbr <- getFile ".json" fp
+                   case eitherDecode (L.fromStrict rawAbbr) of
+                     Left err -> throwError $ PandocCiteprocError $
+                                 CiteprocParseError $
+                                 "Could not parse abbreviations file " <> fp
+                                 <> "\n" <> T.pack err
+                     Right abbr -> return $ Just abbr
+
+  let getParentStyle url = do
+        -- first, try to retrieve the style locally, then use HTTP.
+        let basename = T.takeWhileEnd (/='/') url
+        UTF8.toText <$>
+          catchError (getFile ".csl" basename) (\_ -> fst <$> fetchItem url)
+
+  styleRes <- Citeproc.parseStyle getParentStyle cslContents
+  case styleRes of
+     Left err    -> throwError $ PandocAppError $ prettyCiteprocError err
+     Right style -> return style{ styleAbbreviations = mbAbbrevs }
+
+
 -- Retrieve citeproc lang based on metadata.
 getLang :: PandocMonad m => Meta -> m (Maybe Lang)
 getLang meta = maybe (return Nothing) bcp47LangToIETF
-- 
cgit v1.2.3


From e741c7f5533d60971e98c6ced477d998335ef0b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 10 Jan 2021 12:49:53 -0800
Subject: Fix infinite HTTP requests when writing epubs from URL source.

Due to a bug in code added to avoid overwriting the cover image
if it had the form `fileX.YYY`, pandoc made an endless sequence
of HTTP requests when writing epub with input from a URL.

Closes #7013.
---
 src/Text/Pandoc/Writers/EPUB.hs | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 3e3fd8fd6..783f190f5 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -465,7 +465,12 @@ pandocToEPUB version opts doc = do
                 case epubCoverImage metadata of
                      Nothing   -> return ([],[])
                      Just img  -> do
-                       let coverImage = takeFileName img
+                       let fp = takeFileName img
+                       mediaPaths <- gets (map (fst . snd) . stMediaPaths)
+                       coverImageName <-  -- see #4206
+                            if ("media/" <> fp) `elem` mediaPaths
+                               then getMediaNextNewName (takeExtension fp)
+                               else return fp
                        imgContent <- lift $ P.readFileLazy img
                        (coverImageWidth, coverImageHeight) <-
                              case imageSize opts' (B.toStrict imgContent) of
@@ -478,7 +483,7 @@ pandocToEPUB version opts doc = do
                                     ("coverpage", toVal' "true"),
                                     ("pagetitle", toVal $
                                       escapeStringForXML $ TS.pack plainTitle),
-                                    ("cover-image", toVal' coverImage),
+                                    ("cover-image", toVal' coverImageName),
                                     ("cover-image-width", toVal' $
                                        show coverImageWidth),
                                     ("cover-image-height", toVal' $
@@ -486,7 +491,7 @@ pandocToEPUB version opts doc = do
                                      cssvars True <> vars }
                             (Pandoc meta [])
                        coverEntry <- mkEntry "text/cover.xhtml" cpContent
-                       coverImageEntry <- mkEntry ("media/" ++ coverImage)
+                       coverImageEntry <- mkEntry ("media/" ++ coverImageName)
                                              imgContent
                        return ( [ coverEntry ]
                               , [ coverImageEntry ] )
@@ -1076,8 +1081,7 @@ getMediaNextNewName :: PandocMonad m => String -> E m String
 getMediaNextNewName ext = do
   nextId <- gets stMediaNextId
   modify $ \st -> st { stMediaNextId = nextId + 1 }
-  let nextName = "file" ++ show nextId ++ ext
-  (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName)
+  return $ "file" ++ show nextId ++ ext
 
 isHtmlFormat :: Format -> Bool
 isHtmlFormat (Format "html") = True
-- 
cgit v1.2.3


From 68fa43799963fff11a980d5d3959184c3d34a723 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 11 Jan 2021 00:35:48 +0100
Subject: JATS writer: fix citations (#7018)

* JATS writer: keep code lines at 80 chars or below

* JATS writer: fix citations
---
 COPYRIGHT                       |  11 +--
 data/jats.csl                   | 203 ----------------------------------------
 pandoc.cabal                    |   2 -
 src/Text/Pandoc/App.hs          |  16 +---
 src/Text/Pandoc/Writers/JATS.hs |  33 ++++---
 test/command/7016.md            |  48 ++++++++++
 6 files changed, 72 insertions(+), 241 deletions(-)
 delete mode 100644 data/jats.csl
 create mode 100644 test/command/7016.md

(limited to 'src')

diff --git a/COPYRIGHT b/COPYRIGHT
index 9992e5680..a6e3a897c 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -181,7 +181,7 @@ http://github.com/paulrouget/dzslides
 Released under the Do What the Fuck You Want To Public License.
 
 ------------------------------------------------------------------------
-Pandoc embeds a lua interpreter (via hslua).
+Pandoc embeds a Lua interpreter (via hslua).
 
 Copyright © 1994–2020 Lua.org, PUC-Rio.
 
@@ -203,12 +203,3 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-------------------------------------------------------------------------
-The template pandoc.jats is Copyright 2013--2015 Martin Fenner,
-released under GPL version 2 or later.
-
-The file data/jats.csl is derived from a csl file by Martin Fenner,
-revised by Martin Paul Eve and then John MacFarlane.
-"This work is licensed under a Creative Commons Attribution-ShareAlike 3.0
-License. Originally by Martin Fenner."
diff --git a/data/jats.csl b/data/jats.csl
deleted file mode 100644
index 6972cb3f8..000000000
--- a/data/jats.csl
+++ /dev/null
@@ -1,203 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" default-locale="en-US">
-  <info>
-    <title>Journal Article Tag Suite</title>
-    <title-short>JATS</title-short>
-    <id>http://www.zotero.org/styles/journal-article-tag-suite</id>
-    <link href="https://github.com/MartinPaulEve/JATS-CSL/blob/master/jats.csl" rel="self"/>
-    <link rel="documentation" href="http://jats.nlm.nih.gov/archiving/tag-library/1.0/index.html"/>
-    <author>
-      <name>Martin Paul Eve</name>
-      <email>martin@martineve.com</email>
-    </author>
-    <category citation-format="numeric"/>
-    <category field="medicine"/>
-    <category field="biology"/>
-    <summary>Use this style to generate bibliographic data in Journal Article Tagging Suite (JATS) 1.0 XML format</summary>
-    <updated>2014-06-21T17:41:26+00:00</updated>
-    <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License. Originally by Martin Fenner.</rights>
-  </info>
-  <locale xml:lang="en">
-    <terms>
-        <term name="et-al">{{jats}}&lt;etal/&gt;{{/jats}}</term>
-    </terms>
-  </locale>
-  <macro name="citation-number">
-    <text variable="citation-number" prefix="{{jats}}id=&quot;ref-{{/jats}}" suffix="{{jats}}&quot;&gt;{{/jats}}"/>
-  </macro>
-  <macro name="author">
-    <names variable="author" delimiter=" ">
-        <name prefix="{{jats}}&lt;name&gt;{{/jats}}" suffix="{{jats}}&lt;/name&gt;{{/jats}}" name-as-sort-order="all" sort-separator=" ">
-            <name-part name="family" text-case="capitalize-first" prefix="{{jats}}&lt;surname&gt;{{/jats}}" suffix="{{jats}}&lt;/surname&gt;{{/jats}}"/>
-            <name-part name="given" text-case="capitalize-first" prefix="{{jats}}&lt;given-names&gt;{{/jats}}" suffix="{{jats}}&lt;/given-names&gt;{{/jats}}"/>
-      </name>
-      <substitute>
-        <names variable="editor"/>
-      </substitute>
-    </names>
-  </macro>
-
-  <macro name="editor" delimiter=" ">
-    <names variable="editor" prefix="{{jats}}&lt;person-group person-group-type=&quot;editor&quot;&gt;{{/jats}}" suffix="{{jats}}&lt;/person-group&gt;{{/jats}}">
-      <name prefix="{{jats}}&lt;name&gt;{{/jats}}" suffix="{{jats}}&lt;/name&gt;{{/jats}}" name-as-sort-order="all" sort-separator=" ">
-        <name-part name="family" text-case="capitalize-first" prefix="{{jats}}&lt;surname&gt;{{/jats}}" suffix="{{jats}}&lt;/surname&gt;{{/jats}}"/>
-        <name-part name="given" text-case="capitalize-first" prefix="{{jats}}&lt;given-names&gt;{{/jats}}" suffix="{{jats}}&lt;/given-names&gt;{{/jats}}"/>
-      </name>
-      <substitute>
-        <names variable="editor"/>
-      </substitute>
-    </names>
-  </macro>
-
-  <macro name="editor">
-    <group delimiter=": ">
-      <names variable="editor">
-        <name prefix="{{jats}}&lt;name&gt;{{/jats}}" suffix="{{jats}}&lt;/name&gt;{{/jats}}" name-as-sort-order="all" sort-separator="">
-          <name-part name="family" text-case="capitalize-first" prefix="{{jats}}&lt;surname&gt;{{/jats}}" suffix="{{jats}}&lt;/surname&gt;{{/jats}}"/>
-          <name-part name="given" text-case="capitalize-first" prefix="{{jats}}&lt;given-names&gt;{{/jats}}" suffix="{{jats}}&lt;given-names&gt;{{/jats}}"/>
-        </name>
-      </names>
-    </group>
-  </macro>
-  <macro name="title">
-    <choose>
-      <if type="book" match="any">
-        <group prefix="{{jats}}&lt;source&gt;{{/jats}}" suffix="{{jats}}&lt;/source&gt;{{/jats}}">
-	        <text variable="title"/>
-				</group>
-			</if>
-      <else>
-        <group prefix="{{jats}}&lt;article-title&gt;{{/jats}}" suffix="{{jats}}&lt;/article-title&gt;{{/jats}}">
-	        <text variable="title"/>
-				</group>
-      </else>
-    </choose>
-  </macro>
-  <macro name="container-title">
-    <text variable="container-title" form="short" prefix="{{jats}}&lt;source&gt;{{/jats}}" suffix="{{jats}}&lt;/source&gt;{{/jats}}"/>
-  </macro>
-  <macro name="publisher">
-    <text variable="publisher" prefix="{{jats}}&lt;publisher-name&gt;{{/jats}}" suffix="{{jats}}&lt;/publisher-name&gt;{{/jats}}"/>
-    <text variable="publisher-place" prefix="{{jats}}&lt;publisher-loc&gt;{{/jats}}" suffix="{{jats}}&lt;/publisher-loc&gt;{{/jats}}"/>
-  </macro>
-  <macro name="link">
-    <choose>
-      <if match="any" variable="DOI">
-        <group prefix="{{jats}}&lt;pub-id pub-id-type=&quot;doi&quot;&gt;{{/jats}}" suffix="{{jats}}&lt;/pub-id&gt;{{/jats}}">
-          <text variable="DOI"/>
-        </group>
-      </if>
-    </choose>
-    <choose>
-      <if match="any" variable="PMID">
-        <group prefix="{{jats}}&lt;pub-id pub-id-type=&quot;pmid&quot;&gt;{{/jats}}" suffix="{{jats}}&lt;/pub-id&gt;{{/jats}}">
-          <text variable="PMID"/>
-        </group>
-      </if>
-    </choose>
-    <choose>
-      <if variable="URL" match="any">
-        <text variable="URL" />
-      </if>
-    </choose>
-  </macro>
-  <macro name="date">
-    <choose>
-      <if type="article-journal article-magazine article-newspaper report patent book" match="any">
-        <group prefix="{{jats}}&lt;date&gt;{{/jats}}" suffix="{{jats}}&lt;/date&gt;{{/jats}}">
-          <date variable="issued">
-            <date-part name="day" form="numeric-leading-zeros" prefix="{{jats}}&lt;day&gt;{{/jats}}" suffix="{{jats}}&lt;/day&gt;{{/jats}}"/>
-            <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}&lt;month&gt;{{/jats}}" suffix="{{jats}}&lt;/month&gt;{{/jats}}"/>
-            <date-part name="year" prefix="{{jats}}&lt;year&gt;{{/jats}}" suffix="{{jats}}&lt;/year&gt;{{/jats}}"/>
-          </date>
-        </group>
-      </if>
-      <else>
-        <group prefix="{{jats}}&lt;date-in-citation content-type=&quot;access-date&quot;{{/jats}}" suffix="{{jats}}&lt;/date-in-citation&gt;{{/jats}}">
-          <date variable="accessed" prefix="{{jats}} iso-8601-date=&quot;{{/jats}}" suffix="{{jats}}&quot;&gt;{{/jats}}">
-            <date-part name="year"/>
-            <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}-{{/jats}}"/>
-            <date-part name="day" form="numeric-leading-zeros" prefix="{{jats}}-{{/jats}}"/>
-          </date>
-          <date variable="accessed">
-            <date-part name="day" prefix="{{jats}}&lt;day&gt;{{/jats}}" suffix="{{jats}}&lt;/day&gt;{{/jats}}"/>
-            <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}&lt;month&gt;{{/jats}}" suffix="{{jats}}&lt;/month&gt;{{/jats}}"/>
-            <date-part name="year" prefix="{{jats}}&lt;year&gt;{{/jats}}" suffix="{{jats}}&lt;/year&gt;{{/jats}}"/>
-          </date>
-        </group>
-      </else>
-    </choose>
-  </macro>
-  <macro name="location">
-    <choose>
-      <if type="article-journal article-magazine" match="any">
-        <text variable="volume" prefix="{{jats}}&lt;volume&gt;{{/jats}}" suffix="{{jats}}&lt;/volume&gt;{{/jats}}"/>
-        <text variable="issue" prefix="{{jats}}&lt;issue&gt;{{/jats}}" suffix="{{jats}}&lt;/issue&gt;{{/jats}}"/>
-      </if>
-    </choose>
-    <choose>
-      <if type="article-journal article-magazine article-newspaper chapter" match="any">
-        <text variable="page-first" prefix="{{jats}}&lt;fpage&gt;{{/jats}}" suffix="{{jats}}&lt;/fpage&gt;{{/jats}}"/>
-      </if>
-    </choose>
-  </macro>
-  <macro name="publication-type">
-    <group prefix="{{jats}} publication-type=&quot;{{/jats}}" suffix="{{jats}}&quot;&gt;{{/jats}}">
-      <choose>
-        <if type="article-journal article-magazine article-newspaper" match="any">
-          <text value="journal"/>
-        </if>
-        <else-if type="book" match="any">
-          <text value="book"/>
-        </else-if>
-        <else-if type="chapter" match="any">
-          <text value="bookchapter"/>
-        </else-if>
-        <else-if type="dataset" match="any">
-          <text value="dataset"/>
-        </else-if>
-        <else-if type="patent" match="any">
-          <text value="patent"/>
-        </else-if>
-        <else-if type="report" match="any">
-          <text value="report"/>
-        </else-if>
-        <else-if type="review" match="any">
-          <text value="review"/>
-        </else-if>
-        <else>
-          <text value="standard"/>
-        </else>
-      </choose>
-    </group>
-  </macro>
-  <citation collapse="citation-number">
-    <sort>
-      <key variable="citation-number"/>
-    </sort>
-    <layout delimiter=",">
-      <group prefix="{{jats}}&lt;xref ref-type=&quot;bibr&quot; rid=&quot;{{/jats}}" suffix="{{jats}}&lt;/xref&gt;{{/jats}}">
-        <text variable="citation-number" prefix="{{jats}}ref-{{/jats}}" suffix="{{jats}}&quot;&gt;{{/jats}}"/>
-        <text variable="citation-number"/>
-      </group>
-    </layout>
-  </citation>
-  <bibliography sort-separator="">
-    <layout>
-      <group prefix="{{jats}}&lt;ref {{/jats}}" suffix="{{jats}}&lt;/ref&gt;{{/jats}}">
-        <text macro="citation-number"/>
-        <group prefix="{{jats}}&lt;element-citation{{/jats}}" suffix="{{jats}}&lt;/element-citation&gt;{{/jats}}">
-          <text macro="publication-type"/>
-          <text macro="author" prefix="{{jats}}&lt;person-group person-group-type=&quot;author&quot;&gt;{{/jats}}" suffix="{{jats}}&lt;/person-group&gt;{{/jats}}"/>
-          <text macro="title" />
-          <text macro="container-title"/>
-          <text macro="editor"/>
-          <text macro="publisher"/>
-          <text macro="date"/>
-          <text macro="location"/>
-          <text macro="link"/>
-        </group>
-      </group>
-    </layout>
-  </bibliography>
-</style>
diff --git a/pandoc.cabal b/pandoc.cabal
index ede9af6f0..db8dab491 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -180,8 +180,6 @@ data-files:
                  data/pandoc.List.lua
                  -- bash completion template
                  data/bash_completion.tpl
-                 -- jats csl
-                 data/jats.csl
                  -- citeproc
                  data/default.csl
                  citeproc/biblatex-localization/*.lbx.strings
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 725c76424..437af3257 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -50,10 +50,9 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
 import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
-import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
-import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
+import Text.Pandoc.SelfContained (makeSelfContained)
 import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
          headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
          defaultUserDataDirs, tshow, findM)
@@ -190,17 +189,6 @@ convertWithOpts opts = do
                     Nothing -> readDataFile "abbreviations"
                     Just f  -> readFileStrict f
 
-    metadata <- if format == "jats" &&
-                   isNothing (lookupMeta "csl" (optMetadata opts)) &&
-                   isNothing (lookupMeta "citation-style"
-                                               (optMetadata opts))
-                   then do
-                     jatsCSL <- readDataFile "jats.csl"
-                     let jatsEncoded = makeDataURI
-                                         ("application/xml", jatsCSL)
-                     return $ setMeta "csl" jatsEncoded $ optMetadata opts
-                   else return $ optMetadata opts
-
     case lookupMetaString "lang" (optMetadata opts) of
            ""      -> setTranslations $ Lang "en" "" "US" []
            l       -> case parseBCP47 l of
@@ -286,7 +274,7 @@ convertWithOpts opts = do
                       then fillMediaBag
                       else return)
               >=> return . adjustMetadata (metadataFromFile <>)
-              >=> return . adjustMetadata (<> metadata)
+              >=> return . adjustMetadata (<> optMetadata opts)
               >=> applyTransforms transforms
               >=> applyFilters readerOpts filters [T.unpack format]
               >=> maybe return extractMedia (optExtractMedia opts)
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index e8d93b8d5..b2266d179 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -3,7 +3,7 @@
 {-# LANGUAGE ViewPatterns #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
-   Copyright   : Copyright (C) 2017-2021 John MacFarlane
+   Copyright   : 2017-2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -168,13 +168,15 @@ plainToPara x         = x
 -- | Convert a list of pairs of terms and definitions into a list of
 -- JATS varlistentrys.
 deflistItemsToJATS :: PandocMonad m
-                      => WriterOptions -> [([Inline],[[Block]])] -> JATS m (Doc Text)
+                   => WriterOptions
+                   -> [([Inline],[[Block]])] -> JATS m (Doc Text)
 deflistItemsToJATS opts items =
   vcat <$> mapM (uncurry (deflistItemToJATS opts)) items
 
 -- | Convert a term and a list of blocks into a JATS varlistentry.
 deflistItemToJATS :: PandocMonad m
-                     => WriterOptions -> [Inline] -> [[Block]] -> JATS m (Doc Text)
+                  => WriterOptions
+                  -> [Inline] -> [[Block]] -> JATS m (Doc Text)
 deflistItemToJATS opts term defs = do
   term' <- inlinesToJATS opts term
   def' <- wrappedBlocksToJATS (not . isPara)
@@ -186,7 +188,8 @@ deflistItemToJATS opts term defs = do
 
 -- | Convert a list of lists of blocks to a list of JATS list items.
 listItemsToJATS :: PandocMonad m
-                => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text)
+                => WriterOptions
+                -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text)
 listItemsToJATS opts markers items =
   case markers of
        Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items
@@ -194,12 +197,13 @@ listItemsToJATS opts markers items =
 
 -- | Convert a list of blocks into a JATS list item.
 listItemToJATS :: PandocMonad m
-               => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text)
+               => WriterOptions
+               -> Maybe Text -> [Block] -> JATS m (Doc Text)
 listItemToJATS opts mbmarker item = do
   contents <- wrappedBlocksToJATS (not . isParaOrList) opts
                  (walk demoteHeaderAndRefs item)
   return $ inTagsIndented "list-item" $
-           maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker
+           maybe empty (inTagsSimple "label" . text . T.unpack) mbmarker
            $$ contents
 
 imageMimeType :: Text -> [(Text, Text)] -> (Text, Text)
@@ -247,7 +251,9 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do
   return $ inTags True "sec" attribs $
       inTagsSimple "title" title' $$ contents
 -- Bibliography reference:
-blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) =
+blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident =
+  inTags True "ref" [("id", ident)] .
+  inTagsSimple "mixed-citation" <$>
   inlinesToJATS opts lst
 blockToJATS opts (Div ("refs",_,_) xs) = do
   contents <- blocksToJATS opts xs
@@ -470,10 +476,13 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _))
   | escapeURI t == email =
   return $ inTagsSimple "email" $ literal (escapeStringForXML email)
 inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do
-  let attr = [("id", ident) | not (T.null ident)] ++
-             [("alt", stringify txt) | not (null txt)] ++
-             [("rid", src)] ++
-             [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
+  let attr = mconcat
+             [ [("id", ident) | not (T.null ident)]
+             , [("alt", stringify txt) | not (null txt)]
+             , [("rid", src)]
+             , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
+             , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src]
+             ]
   if null txt
      then return $ selfClosingTag "xref" attr
      else do
@@ -529,7 +538,7 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) =
 demoteHeaderAndRefs x = x
 
 parseDate :: Text -> Maybe Day
-parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day
+parseDate s = msum (map (`parsetimeWith` T.unpack s) formats)
   where parsetimeWith = parseTimeM True defaultTimeLocale
         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
                     "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
diff --git a/test/command/7016.md b/test/command/7016.md
new file mode 100644
index 000000000..c2d791ce9
--- /dev/null
+++ b/test/command/7016.md
@@ -0,0 +1,48 @@
+```
+% pandoc --citeproc --to=jats_archiving --standalone
+---
+csl: command/apa.csl
+references:
+- id: doe
+  type: article
+  author:
+  - family: Doe
+    given: Jane
+  container-title: Proceedings of the Academy of Test Inputs
+  doi: 10.x/nope
+  issued: 2021
+  title: Another article
+...
+Blah [@doe].
+^D
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN"
+                  "JATS-archivearticle1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+</journal-title-group>
+<issn></issn>
+<publisher>
+<publisher-name></publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+</article-meta>
+</front>
+<body>
+<p>Blah (Doe, 2021).</p>
+</body>
+<back>
+<ref-list>
+  <ref id="ref-doe">
+    <mixed-citation>Doe, J. (2021). Another article. <italic>Proceedings
+    of the Academy of Test Inputs</italic>.
+    doi:<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.x/nope">10.x/nope</ext-link></mixed-citation>
+  </ref>
+</ref-list>
+</back>
+</article>
+```
-- 
cgit v1.2.3


From c451207b08edc36fa5c2f1af5556a8d211e023ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 12 Jan 2021 09:49:10 -0800
Subject: Docx writer: handle table header using styles.

Instead of hard-coding the border and header cell vertical alignment,
we now let this be determined by the Table style, making use of
Word's "conditional formatting" for the table's first row.
For headerless tables, we use the tblLook element to tell Word
not to apply conditional first-row formatting.

Closes #7008.
---
 data/docx/word/styles.xml                          |  15 +++++++++
 src/Text/Pandoc/Writers/Docx.hs                    |  37 +++++++++++----------
 test/docx/golden/block_quotes.docx                 | Bin 10001 -> 10092 bytes
 test/docx/golden/codeblock.docx                    | Bin 9853 -> 9944 bytes
 test/docx/golden/comments.docx                     | Bin 10188 -> 10279 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 9952 -> 10042 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10578 -> 10666 bytes
 test/docx/golden/definition_list.docx              | Bin 9850 -> 9941 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9856 -> 9947 bytes
 test/docx/golden/document-properties.docx          | Bin 10332 -> 10423 bytes
 test/docx/golden/headers.docx                      | Bin 9989 -> 10080 bytes
 test/docx/golden/image.docx                        | Bin 26667 -> 26758 bytes
 test/docx/golden/inline_code.docx                  | Bin 9789 -> 9880 bytes
 test/docx/golden/inline_formatting.docx            | Bin 9969 -> 10060 bytes
 test/docx/golden/inline_images.docx                | Bin 26725 -> 26816 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10010 -> 10101 bytes
 test/docx/golden/links.docx                        | Bin 10185 -> 10276 bytes
 test/docx/golden/lists.docx                        | Bin 10261 -> 10352 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10052 -> 10143 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10141 -> 10232 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10053 -> 10144 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10148 -> 10239 bytes
 test/docx/golden/notes.docx                        | Bin 9955 -> 10046 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9888 -> 9980 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10023 -> 10115 bytes
 test/docx/golden/table_one_row.docx                | Bin 9834 -> 9932 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10199 -> 10249 bytes
 test/docx/golden/tables.docx                       | Bin 10225 -> 10266 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9833 -> 9924 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9816 -> 9907 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9850 -> 9941 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 9962 -> 10053 bytes
 test/docx/golden/unicode.docx                      | Bin 9774 -> 9865 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9822 -> 9913 bytes
 34 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/data/docx/word/styles.xml b/data/docx/word/styles.xml
index 6bb5a3f52..832b1b25b 100644
--- a/data/docx/word/styles.xml
+++ b/data/docx/word/styles.xml
@@ -350,6 +350,21 @@
         <w:right w:w="108" w:type="dxa" />
       </w:tblCellMar>
     </w:tblPr>
+    <w:tblStylePr w:type="firstRow">
+      <w:tblPr>
+        <w:jc w:val="left"/>
+        <w:tblInd w:w="0" w:type="dxa"/>
+      </w:tblPr>
+      <w:trPr>
+        <w:jc w:val="left"/>
+      </w:trPr>
+      <w:tcPr>
+        <w:vAlign w:val="bottom"/>
+        <w:tcBorders>
+          <w:bottom w:val="single"/>
+        </w:tcBorders>
+      </w:tcPr>
+    </w:tblStylePr>
   </w:style>
   <w:style w:type="paragraph" w:customStyle="1" w:styleId="DefinitionTerm">
     <w:name w:val="Definition Term" />
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index a99e13a85..8f498775d 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1023,23 +1023,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
             _ -> es ++ [Elem $ mknode "w:p" [] ()]
   headers' <- mapM cellToOpenXML $ zip aligns headers
   rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  let borderProps = Elem $ mknode "w:tcPr" []
-                    [ mknode "w:tcBorders" []
-                      $ mknode "w:bottom" [("w:val","single")] ()
-                    , mknode "w:vAlign" [("w:val","bottom")] () ]
   compactStyle <- pStyleM "Compact"
   let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell border contents = mknode "w:tc" []
-                            $ [ borderProps | border ] ++
-                            if null contents
-                               then emptyCell'
-                               else contents
-  let mkrow border cells =
+  let mkcell contents = mknode "w:tc" []
+                            $ if null contents
+                                 then emptyCell'
+                                 else contents
+  let mkrow cells =
          mknode "w:tr" [] $
-         [ mknode "w:trPr" []
-           [ mknode "w:cnfStyle" [("w:firstRow","1")] ()]
-         | border]
-         ++ map (mkcell border) cells
+           map mkcell cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
@@ -1047,6 +1039,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
                        [("w:w", show (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
   modify $ \s -> s { stInTable = False }
+  -- for compatibility with Word <= 2007, we include a val with a bitmask
+  -- 0×0020  Apply first row conditional formatting
+  -- 0×0040  Apply last row conditional formatting
+  -- 0×0080  Apply first column conditional formatting
+  -- 0×0100  Apply last column conditional formatting
+  -- 0×0200  Do not apply row banding conditional formatting
+  -- 0×0400  Do not apply column banding conditional formattin
+  let tblLookVal :: Int
+      tblLookVal = if hasHeader then 0x20 else 0
   return $
     caption' ++
     [Elem $
@@ -1059,15 +1060,17 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
-                               ,("w:noVBand","0")] () :
+                               ,("w:noVBand","0")
+                               ,("w:val", printf "%04x" tblLookVal)
+                               ] () :
           [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
           | not (null caption) ] )
       : mknode "w:tblGrid" []
         (if all (==0) widths
             then []
             else map mkgridcol widths)
-      : [ mkrow True headers' | hasHeader ] ++
-      map (mkrow False) rows'
+      : [ mkrow headers' | hasHeader ] ++
+      map mkrow rows'
       )]
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index bbc8d8de9..3e1bf16e7 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index e20efcab4..66f055063 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index f1185da98..fb3a02a0a 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 83243ab8c..bc6c2702a 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 17804bb81..8c555a5bd 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 21629e208..c21b3a5b3 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 5cf8db0b0..92ce144e9 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 14bfab6d4..d21b67309 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index 416743aa1..3558a47bf 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index ef2940f89..606df92a3 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 479ea65ec..759269cac 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index e12e3b38d..c37777080 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 8bd57bb8c..9450b1a73 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 2c6a638fc..6f0b830e6 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 11e52c4b1..e53889cfb 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 7667990c4..5dbe298b7 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3e8c6d2b2..194181288 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 05a7cf060..6e0b634f7 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index f5ae4a384..477178e77 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index d02c77271..51110356e 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index f7fdcbe11..b6206cdf5 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index ae7f8f1f0..07b576080 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 5e433b736..d46095eb7 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index d404878c6..7caba4e93 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 79c395262..6aaa6da61 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index df9680773..5746c5ad0 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index bb73b82f6..5f22dccc6 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index 7df484aaa..ab5c4f56d 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index d717b93ab..085f33162 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 791182db2..1ac86d5c8 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index b64a7b58e..c2c443b19 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index b5116d1a8..5ea18d32e 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 387d3e76ee81138588195ebe8dad7720f2623b02 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 12 Jan 2021 10:20:32 -0800
Subject: Markdown writer: cleaned up raw formats.

We now react appropriately to gfm, commonmark, and commonmark_x
as raw formats.
---
 src/Text/Pandoc/Writers/Markdown.hs | 69 +++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d62727d90..de9075ac4 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -494,25 +494,24 @@ blockToMarkdown' opts b@(RawBlock f str) = do
   let renderEmpty = mempty <$ report (BlockNotRendered b)
   case variant of
     PlainText -> renderEmpty
-    _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
-                  "markdown_mmd", "markdown_strict"] ->
-            return $ literal str <> literal "\n"
-      | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-      | f `elem` ["html", "html5", "html4"] ->
-            case () of
-              _ | isEnabled Ext_markdown_attribute opts -> return $
-                    literal (addMarkdownAttribute str) <> literal "\n"
-                | isEnabled Ext_raw_html opts -> return $
-                    literal str <> literal "\n"
-                | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-                | otherwise -> renderEmpty
-      | f `elem` ["latex", "tex"] ->
-            case () of
-              _ | isEnabled Ext_raw_tex opts -> return $
-                    literal str <> literal "\n"
-                | isEnabled Ext_raw_attribute opts -> rawAttribBlock
-                | otherwise -> renderEmpty
-      | otherwise -> renderEmpty
+    Commonmark
+      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
+         -> return $ literal str <> literal "\n"
+    Markdown
+      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+                  "markdown_mmd", "markdown_strict"]
+         -> return $ literal str <> literal "\n"
+    _ | isEnabled Ext_raw_attribute opts -> rawAttribBlock
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_markdown_attribute opts
+         -> return $ literal (addMarkdownAttribute str) <> literal "\n"
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_raw_html opts
+         -> return $ literal str <> literal "\n"
+      | f `elem` ["latex", "tex"]
+      , isEnabled Ext_raw_tex opts
+         -> return $ literal str <> literal "\n"
+    _ -> renderEmpty
 blockToMarkdown' opts HorizontalRule =
   return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline
 blockToMarkdown' opts (Header level attr inlines) = do
@@ -1250,21 +1249,23 @@ inlineToMarkdown opts il@(RawInline f str) = do
   let renderEmpty = mempty <$ report (InlineNotRendered il)
   case variant of
     PlainText -> renderEmpty
-    _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
-                  "markdown_mmd", "markdown_strict"] ->
-            return $ literal str
-      | isEnabled Ext_raw_attribute opts -> rawAttribInline
-      | f `elem` ["html", "html5", "html4"] ->
-            case () of
-              _ | isEnabled Ext_raw_html opts -> return $ literal str
-                | isEnabled Ext_raw_attribute opts -> rawAttribInline
-                | otherwise -> renderEmpty
-      | f `elem` ["latex", "tex"] ->
-            case () of
-              _ | isEnabled Ext_raw_tex opts -> return $ literal str
-                | isEnabled Ext_raw_attribute opts -> rawAttribInline
-                | otherwise -> renderEmpty
-      | otherwise -> renderEmpty
+    Commonmark
+      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
+         -> return $ literal str
+    Markdown
+      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+                  "markdown_mmd", "markdown_strict"]
+         -> return $ literal str
+    _ | isEnabled Ext_raw_attribute opts -> rawAttribInline
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_raw_html opts
+         -> return $ literal str
+      | f `elem` ["latex", "tex"]
+      , isEnabled Ext_raw_tex opts
+         -> return $ literal str
+    _ -> renderEmpty
+
+
 inlineToMarkdown opts LineBreak = do
   variant <- asks envVariant
   if variant == PlainText || isEnabled Ext_hard_line_breaks opts
-- 
cgit v1.2.3


From 83336a45a78b288dcc0104d2432135bdb3f5e8f1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 16 Jan 2021 11:15:35 -0800
Subject: Recognize more extensions as markdown by default.

`mkdn`, `mkd`, `mdwn`, `mdown`, `Rmd`.
Closes #7034.
---
 src/Text/Pandoc/App/FormatHeuristics.hs | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 17ed30fe9..65a1a7b82 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -48,6 +48,11 @@ formatFromFilePath x =
     ".lhs"      -> Just "markdown+lhs"
     ".ltx"      -> Just "latex"
     ".markdown" -> Just "markdown"
+    ".mkdn"     -> Just "markdown"
+    ".mkd"      -> Just "markdown"
+    ".mdwn"     -> Just "markdown"
+    ".mdown"    -> Just "markdown"
+    ".Rmd"      -> Just "markdown"
     ".md"       -> Just "markdown"
     ".ms"       -> Just "ms"
     ".muse"     -> Just "muse"
-- 
cgit v1.2.3


From 6efd3460a776620fdb93812daa4f6831e6c332ce Mon Sep 17 00:00:00 2001
From: Gautier DI FOLCO <gautier.difolco@gmail.com>
Date: Sun, 17 Jan 2021 01:15:33 +0100
Subject: Markdown reader: support GitHub wiki's internal links (#2923) (#6458)

Canges overview:

 * Add a `Ext_markdown_github_wikilink` constructor to `Extension` [API change].
 * Add the parser `githubWikiLink` in `Text.Pandoc.Readers.Markdown`
 * Add tests.
---
 src/Text/Pandoc/Extensions.hs       |  3 +++
 src/Text/Pandoc/Readers/Markdown.hs | 25 +++++++++++++++++++++++++
 test/Tests/Readers/Markdown.hs      | 30 ++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 39c2a0489..69257ecc8 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -118,6 +118,7 @@ data Extension =
     | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
     | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown iff
                                   --   container has attribute 'markdown'
+    | Ext_wikilinks -- ^ Interpret a markdown wiki link
     | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
     | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
     | Ext_mmd_link_attributes     -- ^ MMD style reference link attributes
@@ -258,6 +259,7 @@ githubMarkdownExtensions = extensionsFromList
   , Ext_emoji
   , Ext_fenced_code_blocks
   , Ext_backtick_code_blocks
+  , Ext_wikilinks
   ]
 
 -- | Extensions to be used with multimarkdown.
@@ -444,6 +446,7 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_tex_math_single_backslash
        , Ext_tex_math_double_backslash
        , Ext_markdown_attribute
+       , Ext_wikilinks
        , Ext_mmd_title_block
        , Ext_abbreviations
        , Ext_autolink_bare_uris
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5c3a21bb7..8fd0b68e2 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Data.Functor (($>))
 import Data.List (transpose, elemIndex, sortOn)
 import qualified Data.Map as M
 import Data.Maybe
@@ -1485,6 +1486,7 @@ inline = choice [ whitespace
                 , note
                 , cite
                 , bracketedSpan
+                , githubWikiLink
                 , link
                 , image
                 , math
@@ -1780,6 +1782,29 @@ source = do
 linkTitle :: PandocMonad m => MarkdownParser m Text
 linkTitle = quotedTitle '"' <|> quotedTitle '\''
 
+-- Github wiki style link, with optional title
+-- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content
+githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines)
+githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink
+  where
+    wikilink = try $ do
+      string "[["
+      firstPart <- fmap mconcat . sequence <$> wikiText
+      (char '|' *> complexWikilink firstPart)
+        <|> (string "]]" $> (B.link
+                               <$> (stringify <$> firstPart)
+                               <*> return "wikilink"
+                               <*> firstPart))
+
+    complexWikilink firstPart = do
+      url <- fmap stringify . sequence <$> wikiText
+      string "]]"
+      return $ B.link <$> url
+                      <*> return "wikilink"
+                      <*> firstPart
+
+    wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar)
+
 link :: PandocMonad m => MarkdownParser m (F Inlines)
 link = try $ do
   st <- getState
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 18f909583..a2abcb143 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -307,6 +307,36 @@ tests = [ testGroup "inline code"
             "[https://example.org(](url)" =?>
             para (link "url" "" (text "https://example.org("))
           ]
+        , testGroup "Github wiki links"
+          [ test markdownGH "autolink" $
+            "[[https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "https://example.org"))
+          , test markdownGH "link with title" $
+            "[[title|https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "title"))
+          , test markdownGH "bad link with title" $
+            "[[title|random string]]" =?>
+            para (link "random-string" "wikilink" (text "title"))
+          , test markdownGH "autolink not being a link" $
+            "[[Name of page]]" =?>
+            para (link "Name-of-page" "wikilink" (text "Name of page"))
+          , test markdownGH "autolink not being a link with a square bracket" $
+            "[[Name of ]page]]" =?>
+            para (link "Name-of-]page" "wikilink" (text "Name of ]page"))
+          , test markdownGH "formatting (strong and emphasis) should not be interpreted" $
+             "[[***a**b **c**d*|https://example.org]]" =?>
+             para (text "[[" <> emph (strong (str "a") <> str "b" <> space
+                   <> strong (str "c") <> str "d") <> text "|https://example.org]]")
+          , test markdownGH "inlined code should not make a link" $
+            "[[ti`|`le|https://example.org]]" =?>
+            para (text "[[ti" <> code "|" <> text "le|https://example.org]]")
+          , test markdownGH "link with title and a cut should take the middle part as link" $
+            "[[tit|le|https://example.org]]" =?>
+            para (link "le" "wikilink" (text "tit"))
+          , test markdownGH "link with inline start should be a link" $
+            "[[t`i*t_le|https://example.org]]" =?>
+            para (link "https://example.org" "wikilink" (text "t`i*t_le"))
+          ]
         , testGroup "Headers"
           [ "blank line before header" =:
             "\n# Header\n"
-- 
cgit v1.2.3


From c841bcf3b01548b2f9b462b39d8edda4c10be534 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 16 Jan 2021 16:22:04 -0800
Subject: Revert "Markdown reader: support GitHub wiki's internal links (#2923)
 (#6458)"

This reverts commit 6efd3460a776620fdb93812daa4f6831e6c332ce.

Since this extension is designed to be used with
GitHub markdown (gfm), we need to implement the parser
as a commonmark extension (commonmark-extensions),
rather than in pandoc's markdown reader.  When that is
done, we can add it here.
---
 src/Text/Pandoc/Extensions.hs       |  3 ---
 src/Text/Pandoc/Readers/Markdown.hs | 25 -------------------------
 test/Tests/Readers/Markdown.hs      | 30 ------------------------------
 3 files changed, 58 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 69257ecc8..39c2a0489 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -118,7 +118,6 @@ data Extension =
     | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
     | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown iff
                                   --   container has attribute 'markdown'
-    | Ext_wikilinks -- ^ Interpret a markdown wiki link
     | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
     | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
     | Ext_mmd_link_attributes     -- ^ MMD style reference link attributes
@@ -259,7 +258,6 @@ githubMarkdownExtensions = extensionsFromList
   , Ext_emoji
   , Ext_fenced_code_blocks
   , Ext_backtick_code_blocks
-  , Ext_wikilinks
   ]
 
 -- | Extensions to be used with multimarkdown.
@@ -446,7 +444,6 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_tex_math_single_backslash
        , Ext_tex_math_double_backslash
        , Ext_markdown_attribute
-       , Ext_wikilinks
        , Ext_mmd_title_block
        , Ext_abbreviations
        , Ext_autolink_bare_uris
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8fd0b68e2..5c3a21bb7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,7 +21,6 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.Functor (($>))
 import Data.List (transpose, elemIndex, sortOn)
 import qualified Data.Map as M
 import Data.Maybe
@@ -1486,7 +1485,6 @@ inline = choice [ whitespace
                 , note
                 , cite
                 , bracketedSpan
-                , githubWikiLink
                 , link
                 , image
                 , math
@@ -1782,29 +1780,6 @@ source = do
 linkTitle :: PandocMonad m => MarkdownParser m Text
 linkTitle = quotedTitle '"' <|> quotedTitle '\''
 
--- Github wiki style link, with optional title
--- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content
-githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines)
-githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink
-  where
-    wikilink = try $ do
-      string "[["
-      firstPart <- fmap mconcat . sequence <$> wikiText
-      (char '|' *> complexWikilink firstPart)
-        <|> (string "]]" $> (B.link
-                               <$> (stringify <$> firstPart)
-                               <*> return "wikilink"
-                               <*> firstPart))
-
-    complexWikilink firstPart = do
-      url <- fmap stringify . sequence <$> wikiText
-      string "]]"
-      return $ B.link <$> url
-                      <*> return "wikilink"
-                      <*> firstPart
-
-    wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar)
-
 link :: PandocMonad m => MarkdownParser m (F Inlines)
 link = try $ do
   st <- getState
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index a2abcb143..18f909583 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -307,36 +307,6 @@ tests = [ testGroup "inline code"
             "[https://example.org(](url)" =?>
             para (link "url" "" (text "https://example.org("))
           ]
-        , testGroup "Github wiki links"
-          [ test markdownGH "autolink" $
-            "[[https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "https://example.org"))
-          , test markdownGH "link with title" $
-            "[[title|https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "title"))
-          , test markdownGH "bad link with title" $
-            "[[title|random string]]" =?>
-            para (link "random-string" "wikilink" (text "title"))
-          , test markdownGH "autolink not being a link" $
-            "[[Name of page]]" =?>
-            para (link "Name-of-page" "wikilink" (text "Name of page"))
-          , test markdownGH "autolink not being a link with a square bracket" $
-            "[[Name of ]page]]" =?>
-            para (link "Name-of-]page" "wikilink" (text "Name of ]page"))
-          , test markdownGH "formatting (strong and emphasis) should not be interpreted" $
-             "[[***a**b **c**d*|https://example.org]]" =?>
-             para (text "[[" <> emph (strong (str "a") <> str "b" <> space
-                   <> strong (str "c") <> str "d") <> text "|https://example.org]]")
-          , test markdownGH "inlined code should not make a link" $
-            "[[ti`|`le|https://example.org]]" =?>
-            para (text "[[ti" <> code "|" <> text "le|https://example.org]]")
-          , test markdownGH "link with title and a cut should take the middle part as link" $
-            "[[tit|le|https://example.org]]" =?>
-            para (link "le" "wikilink" (text "tit"))
-          , test markdownGH "link with inline start should be a link" $
-            "[[t`i*t_le|https://example.org]]" =?>
-            para (link "https://example.org" "wikilink" (text "t`i*t_le"))
-          ]
         , testGroup "Headers"
           [ "blank line before header" =:
             "\n# Header\n"
-- 
cgit v1.2.3


From 1c4d14cdcc60c1bfcc579a5b5ab154e8e62e54c7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 18 Jan 2021 11:32:02 -0800
Subject: RST writer: fix #7039.

We were losing content from inside spans with a class,
due to logic that is meant to avoid nested inline
structures that can't be represented in RST.

The logic was a bit stricter than necessary.  This
commit fixes the issue.
---
 src/Text/Pandoc/Writers/RST.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index a3be1d723..d01e13db4 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -519,8 +519,8 @@ flatten outer
           (Quoted _ _, _)          -> keep f i
           (_, Quoted _ _)          -> keep f i
           -- spans are not rendered using RST inlines, so we can keep them
-          (Span ("",[],[]) _, _)   -> keep f i
-          (_, Span ("",[],[]) _)   -> keep f i
+          (Span (_,_,[]) _, _)   -> keep f i
+          (_, Span (_,_,[]) _)   -> keep f i
           -- inlineToRST handles this case properly so it's safe to keep
           ( Link{}, Image{})       -> keep f i
           -- parent inlines would prevent links from being correctly
-- 
cgit v1.2.3


From 5f98ac62e3f91a3301c13f17d9081b6ecb39e5a9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 19 Jan 2021 20:39:58 -0800
Subject: JATS writer: Ensure that disp-quote is always wrapped in p.

Closes #7041.
---
 src/Text/Pandoc/Writers/JATS.hs   |  4 +-
 test/writer.jats_archiving        | 68 +++++++++++++++++-------------
 test/writer.jats_articleauthoring | 88 +++++++++++++++++++++------------------
 test/writer.jats_publishing       | 68 +++++++++++++++++-------------
 4 files changed, 131 insertions(+), 97 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index b2266d179..c75d40745 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -156,9 +156,11 @@ wrappedBlocksToJATS needsWrap opts =
     wrappedBlockToJATS b = do
       inner <- blockToJATS opts b
       return $
-        if needsWrap b
+        if needsWrap b || isBlockQuote b -- see #7041
            then inTags True "p" [("specific-use","wrapper")] inner
            else inner
+    isBlockQuote (BlockQuote _) = True
+    isBlockQuote _ = False
 
 -- | Auxiliary function to convert Plain block to Para.
 plainToPara :: Block -> Block
diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving
index a708e0367..2d06c5cab 100644
--- a/test/writer.jats_archiving
+++ b/test/writer.jats_archiving
@@ -78,31 +78,39 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <disp-quote>
-    <p>This is a block quote. It is pretty short.</p>
-  </disp-quote>
-  <disp-quote>
-    <p>Code in a block quote:</p>
-    <preformat>sub status {
-    print &quot;working&quot;;
-}</preformat>
-    <p>A list:</p>
-    <list list-type="order">
-      <list-item>
-        <p>item one</p>
-      </list-item>
-      <list-item>
-        <p>item two</p>
-      </list-item>
-    </list>
-    <p>Nested block quotes:</p>
+  <p specific-use="wrapper">
     <disp-quote>
-      <p>nested</p>
+      <p>This is a block quote. It is pretty short.</p>
     </disp-quote>
+  </p>
+  <p specific-use="wrapper">
     <disp-quote>
-      <p>nested</p>
+      <p>Code in a block quote:</p>
+      <preformat>sub status {
+    print &quot;working&quot;;
+}</preformat>
+      <p>A list:</p>
+      <list list-type="order">
+        <list-item>
+          <p>item one</p>
+        </list-item>
+        <list-item>
+          <p>item two</p>
+        </list-item>
+      </list>
+      <p>Nested block quotes:</p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
     </disp-quote>
-  </disp-quote>
+  </p>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -829,10 +837,12 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <disp-quote>
-      <p>Blockquoted:
-      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-    </disp-quote>
+    <p specific-use="wrapper">
+      <disp-quote>
+        <p>Blockquoted:
+        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+      </disp-quote>
+    </p>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -856,9 +866,11 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
   <italic>not</italic> be a footnote reference, because it contains a
   space.[^my note] Here is an inline
   note.<xref ref-type="fn" rid="fn3">3</xref></p>
-  <disp-quote>
-    <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
-  </disp-quote>
+  <p specific-use="wrapper">
+    <disp-quote>
+      <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
+    </disp-quote>
+  </p>
   <list list-type="order">
     <list-item>
       <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>
diff --git a/test/writer.jats_articleauthoring b/test/writer.jats_articleauthoring
index 90437992e..65ad5e37c 100644
--- a/test/writer.jats_articleauthoring
+++ b/test/writer.jats_articleauthoring
@@ -67,39 +67,43 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <disp-quote>
-    <p>This is a block quote. It is pretty short.</p>
-  </disp-quote>
-  <disp-quote>
-    <p>Code in a block quote:</p>
-    <p specific-use="wrapper">
-      <preformat>sub status {
+  <p specific-use="wrapper">
+    <disp-quote>
+      <p>This is a block quote. It is pretty short.</p>
+    </disp-quote>
+  </p>
+  <p specific-use="wrapper">
+    <disp-quote>
+      <p>Code in a block quote:</p>
+      <p specific-use="wrapper">
+        <preformat>sub status {
     print &quot;working&quot;;
 }</preformat>
-    </p>
-    <p>A list:</p>
-    <p specific-use="wrapper">
-      <list list-type="order">
-        <list-item>
-          <p>item one</p>
-        </list-item>
-        <list-item>
-          <p>item two</p>
-        </list-item>
-      </list>
-    </p>
-    <p>Nested block quotes:</p>
-    <p specific-use="wrapper">
-      <disp-quote>
-        <p>nested</p>
-      </disp-quote>
-    </p>
-    <p specific-use="wrapper">
-      <disp-quote>
-        <p>nested</p>
-      </disp-quote>
-    </p>
-  </disp-quote>
+      </p>
+      <p>A list:</p>
+      <p specific-use="wrapper">
+        <list list-type="order">
+          <list-item>
+            <p>item one</p>
+          </list-item>
+          <list-item>
+            <p>item two</p>
+          </list-item>
+        </list>
+      </p>
+      <p>Nested block quotes:</p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
+    </disp-quote>
+  </p>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -813,10 +817,12 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <disp-quote>
-      <p>Blockquoted:
-      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-    </disp-quote>
+    <p specific-use="wrapper">
+      <disp-quote>
+        <p>Blockquoted:
+        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+      </disp-quote>
+    </p>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -854,11 +860,13 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
     and <monospace>]</monospace> verbatim characters, as well as [bracketed
     text].</p>
   </fn></p>
-  <disp-quote>
-    <p>Notes can go in quotes.<fn>
-      <p>In quote.</p>
-    </fn></p>
-  </disp-quote>
+  <p specific-use="wrapper">
+    <disp-quote>
+      <p>Notes can go in quotes.<fn>
+        <p>In quote.</p>
+      </fn></p>
+    </disp-quote>
+  </p>
   <list list-type="order">
     <list-item>
       <p>And in list items.<fn>
diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing
index 6d5a04057..915471576 100644
--- a/test/writer.jats_publishing
+++ b/test/writer.jats_publishing
@@ -78,31 +78,39 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <disp-quote>
-    <p>This is a block quote. It is pretty short.</p>
-  </disp-quote>
-  <disp-quote>
-    <p>Code in a block quote:</p>
-    <preformat>sub status {
-    print &quot;working&quot;;
-}</preformat>
-    <p>A list:</p>
-    <list list-type="order">
-      <list-item>
-        <p>item one</p>
-      </list-item>
-      <list-item>
-        <p>item two</p>
-      </list-item>
-    </list>
-    <p>Nested block quotes:</p>
+  <p specific-use="wrapper">
     <disp-quote>
-      <p>nested</p>
+      <p>This is a block quote. It is pretty short.</p>
     </disp-quote>
+  </p>
+  <p specific-use="wrapper">
     <disp-quote>
-      <p>nested</p>
+      <p>Code in a block quote:</p>
+      <preformat>sub status {
+    print &quot;working&quot;;
+}</preformat>
+      <p>A list:</p>
+      <list list-type="order">
+        <list-item>
+          <p>item one</p>
+        </list-item>
+        <list-item>
+          <p>item two</p>
+        </list-item>
+      </list>
+      <p>Nested block quotes:</p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
+      <p specific-use="wrapper">
+        <disp-quote>
+          <p>nested</p>
+        </disp-quote>
+      </p>
     </disp-quote>
-  </disp-quote>
+  </p>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -829,10 +837,12 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <disp-quote>
-      <p>Blockquoted:
-      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-    </disp-quote>
+    <p specific-use="wrapper">
+      <disp-quote>
+        <p>Blockquoted:
+        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+      </disp-quote>
+    </p>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -856,9 +866,11 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
   <italic>not</italic> be a footnote reference, because it contains a
   space.[^my note] Here is an inline
   note.<xref ref-type="fn" rid="fn3">3</xref></p>
-  <disp-quote>
-    <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
-  </disp-quote>
+  <p specific-use="wrapper">
+    <disp-quote>
+      <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
+    </disp-quote>
+  </p>
   <list list-type="order">
     <list-item>
       <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>
-- 
cgit v1.2.3


From 87083bd1d6d715e349313dc6af9cbac82f1bb4ea Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 20 Jan 2021 19:08:02 +0100
Subject: Text.Pandoc.Citeproc: use finer grained imports

This allows to import the module in writers without causing a circular
dependency.
---
 src/Text/Pandoc/Citeproc.hs | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 9649c6971..6658c8c0c 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,33 +18,36 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir)
 import qualified Text.Pandoc.BCP47 as BCP47
-import Data.ByteString (ByteString)
-import qualified Data.ByteString.Lazy as L
+import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
+import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
-import Text.Pandoc.Walk
-import Text.Pandoc.Builder as B
-import Text.Pandoc (PandocMonad(..), PandocError(..),
-                    readDataFile, ReaderOptions(..), pandocExtensions,
-                    report, LogMessage(..), fetchItem)
+import Text.Pandoc.Class (PandocMonad(..), getResourcePath, getUserDataDir,
+                          fetchItem, readDataFile, report, setResourcePath)
+import Text.Pandoc.Error (PandocError(..))
+import Text.Pandoc.Extensions (pandocExtensions)
+import Text.Pandoc.Logging (LogMessage(..))
+import Text.Pandoc.Options (ReaderOptions(..))
 import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Walk (query, walk, walkM)
+import Control.Applicative ((<|>))
+import Control.Monad.Except (catchError, throwError)
+import Control.Monad.State (State, evalState, get, put, runState)
 import Data.Aeson (eitherDecode)
-import Data.Default
-import Data.Ord ()
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Lazy as L
+import Data.Char (isPunctuation, isUpper)
+import Data.Default (Default(def))
+import qualified Data.Foldable as Foldable
 import qualified Data.Map as M
+import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Ord ()
+import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
-import Data.Char (isPunctuation, isUpper)
 import Data.Text (Text)
 import qualified Data.Text as T
-import Control.Monad.State
-import qualified Data.Sequence as Seq
-import qualified Data.Foldable as Foldable
-import System.FilePath
-import Control.Applicative
-import Control.Monad.Except
-import Data.Maybe (mapMaybe, fromMaybe)
+import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
 -- import Debug.Trace as Trace (trace, traceShowId)
 
-- 
cgit v1.2.3


From fa952c8dbea9ad47ea684729f862a3c6bdd0fecc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 20 Jan 2021 21:08:01 -0800
Subject: Add biblatex, bibtex as output formats (closes #7040).

* `biblatex` and `bibtex` are now supported as output
  as well as input formats.

* New module Text.Pandoc.Writers.BibTeX, exporting
  writeBibTeX and writeBibLaTeX. [API change]

* New unexported function `writeBibtexString` in
  Text.Pandoc.Citeproc.BibTeX.
---
 MANUAL.txt                         |   8 +-
 pandoc.cabal                       |   1 +
 src/Text/Pandoc/Citeproc/BibTeX.hs | 254 ++++++++++++++++++++++++++++++++++++-
 src/Text/Pandoc/Templates.hs       |   2 +
 src/Text/Pandoc/Writers.hs         |   5 +
 src/Text/Pandoc/Writers/BibTeX.hs  |  48 +++++++
 6 files changed, 312 insertions(+), 6 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/BibTeX.hs

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index b9c4ef637..436e9d36c 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -281,6 +281,8 @@ header when requesting a document from a URL:
     ::: {#output-formats}
     - `asciidoc` ([AsciiDoc]) or `asciidoctor` ([AsciiDoctor])
     - `beamer` ([LaTeX beamer][`beamer`] slide show)
+    - `bibtex` ([BibTeX] bibliography)
+    - `biblatex` ([BibLaTeX] bibliography)
     - `commonmark` ([CommonMark] Markdown)
     - `commonmark_x` ([CommonMark] Markdown with extensions)
     - `context` ([ConTeXt])
@@ -5258,11 +5260,11 @@ section from a BibTeX, BibLaTeX, or CSL JSON bibliography:
     pandoc chem.bib -s -f biblatex -t markdown
     pandoc chem.json -s -f csljson -t markdown
 
-`pandoc` can also be used to produce CSL JSON bibliography
-from BibTeX, BibLaTeX, or markdown YAML:
+Indeed, `pandoc` can convert between any of these
+citation formats:
 
     pandoc chem.bib -s -f biblatex -t csljson
-    pandoc chem.yaml -s -f markdown -t csljson
+    pandoc chem.yaml -s -f markdown -t biblatex
 
 Running pandoc on a bibliography file with the `--citeproc`
 option will create a formatted bibliography in the format
diff --git a/pandoc.cabal b/pandoc.cabal
index 0d63cbe35..27c25069b 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -573,6 +573,7 @@ library
                    Text.Pandoc.Writers.Shared,
                    Text.Pandoc.Writers.OOXML,
                    Text.Pandoc.Writers.AnnotatedTable,
+                   Text.Pandoc.Writers.BibTeX,
                    Text.Pandoc.Lua,
                    Text.Pandoc.PDF,
                    Text.Pandoc.UTF8,
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index ed723a11c..10730a1e9 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE ViewPatterns      #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE FlexibleContexts #-}
@@ -17,6 +18,7 @@
 module Text.Pandoc.Citeproc.BibTeX
     ( Variant(..)
     , readBibtexString
+    , writeBibtexString
     )
     where
 
@@ -24,10 +26,11 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Readers.LaTeX (readLaTeX)
 import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
-import Text.Pandoc.Options (ReaderOptions(..))
-import Text.Pandoc.Class (runPure)
+import Text.Pandoc.Options (ReaderOptions(..), WriterOptions)
 import Text.Pandoc.Error (PandocError)
 import Text.Pandoc.Shared (stringify)
+import Text.Pandoc.Writers.LaTeX (writeLaTeX)
+import Text.Pandoc.Class (runPure)
 import qualified Text.Pandoc.Walk       as Walk
 import Citeproc.Types
 import Citeproc.Pandoc ()
@@ -46,8 +49,9 @@ import qualified Data.Sequence          as Seq
 import           Data.Char              (isAlphaNum, isDigit, isLetter,
                                          isUpper, toLower, toUpper,
                                          isLower, isPunctuation)
-import           Data.List              (foldl', intercalate)
+import           Data.List              (foldl', intercalate, intersperse)
 import           Safe                   (readMay)
+import           Text.Printf            (printf)
 
 data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
@@ -68,6 +72,250 @@ readBibtexString variant locale idpred contents = do
           Left err -> Left err
           Right xs -> return xs
 
+-- | Write BibTeX or BibLaTeX given given a 'Reference'.
+writeBibtexString :: WriterOptions       -- ^ options (for writing LaTex)
+                  -> Variant             -- ^ bibtex or biblatex
+                  -> Maybe Lang          -- ^ Language
+                  -> Reference Inlines   -- ^ Reference to write
+                  -> Text
+writeBibtexString opts variant mblang ref =
+  "@" <> bibtexType <> "{" <> unItemId (referenceId ref) <> ",\n  " <>
+  renderFields fs <> "\n}\n"
+
+ where
+  bibtexType =
+    case referenceType ref of
+      "article-magazine"  -> "article"
+      "article-newspaper" -> "article"
+      "article-journal"   -> "article"
+      "book"              -> "book"
+      "pamphlet"          -> "booklet"
+      "dataset" | variant == Biblatex -> "dataset"
+      "webpage" | variant == Biblatex -> "online"
+      "chapter"           -> case getVariable "editor" of
+                                Just _  -> "incollection"
+                                Nothing -> "inbook"
+      "entry-encyclopedia" | variant == Biblatex -> "inreference"
+                           | otherwise -> "inbook"
+      "paper-conference"  -> "inproceedings"
+      "thesis" -> case getVariableAsText "genre" of
+                    Just "mathesis" -> "mastersthesis"
+                    _               -> "phdthesis"
+      "patent"            | variant == Biblatex -> "patent"
+      "report"            | variant == Biblatex -> "report"
+                          | otherwise -> "techreport"
+      "speech"            -> "unpublished"
+      "manuscript"        -> "unpublished"
+      "graphic"           | variant == Biblatex -> "artwork"
+      "song"              | variant == Biblatex -> "music"
+      "legal_case"        | variant == Biblatex -> "jurisdictionN"
+      "legislation"       | variant == Biblatex -> "legislation"
+      "treaty"            | variant == Biblatex -> "legal"
+      "personal_communication" | variant == Biblatex -> "letter"
+      "motion_picture"    | variant == Biblatex -> "movie"
+      "review"             | variant == Biblatex -> "review"
+      _                   -> "misc"
+  
+  mbSubtype =
+    case referenceType ref of
+      "article-magazine"  -> Just "magazine"
+      "article-newspaper" -> Just "newspaper"
+      _ -> Nothing
+
+  fs =
+    case variant of
+      Biblatex ->
+           [ "author"
+           , "editor"
+           , "translator"
+           , "publisher"
+           , "title"
+           , "booktitle"
+           , "journal"
+           , "series"
+           , "edition"
+           , "volume"
+           , "volumes"
+           , "number"
+           , "pages"
+           , "date"
+           , "eventdate"
+           , "urldate"
+           , "address"
+           , "url"
+           , "doi"
+           , "isbn"
+           , "issn"
+           , "type"
+           , "entrysubtype"
+           , "note"
+           , "language"
+           , "abstract"
+           , "keywords"
+           ]
+      Bibtex ->
+           [ "author"
+           , "editor"
+           , "translator"
+           , "publisher"
+           , "title"
+           , "booktitle"
+           , "journal"
+           , "series"
+           , "edition"
+           , "volume"
+           , "number"
+           , "pages"
+           , "year"
+           , "month"
+           , "address"
+           , "type"
+           , "note"
+           ]
+
+  valToInlines (TextVal t) = B.text t
+  valToInlines (FancyVal ils) = ils
+  valToInlines (NumVal n) = B.text (T.pack $ show n)
+  valToInlines (NamesVal names) =
+    mconcat $ intersperse (B.space <> B.text "and" <> B.space)
+            $ map renderName names
+  valToInlines (DateVal date) = B.text $
+    case dateLiteral date of
+      Just t  -> t
+      Nothing -> T.intercalate "/" (map renderDatePart (dateParts date)) <>
+                    (if dateCirca date then "~" else mempty)
+
+  renderDatePart (DateParts xs) = T.intercalate "-" $
+                                    map (T.pack . printf "%02d") xs
+
+  renderName name =
+    case nameLiteral name of
+      Just t  -> B.text t
+      Nothing -> spacedMaybes
+                  [ nameNonDroppingParticle name
+                  , nameFamily name
+                  , if nameCommaSuffix name
+                        then (", " <>) <$> nameSuffix name
+                        else nameSuffix name ]
+                  <>
+                  spacedMaybes
+                   [ (", " <>) <$> nameGiven name,
+                     nameDroppingParticle name ]
+
+  titlecase = case mblang of
+                Just (Lang "en" _) -> titlecase'
+                Nothing            -> titlecase'
+                _                  -> id
+
+  titlecase' = addTextCase mblang TitleCase .
+    (\ils -> B.fromList
+               (case B.toList ils of
+                  Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs
+                  xs         -> Walk.walk spanAroundCapitalizedWords xs))
+
+  -- protect capitalized words when we titlecase
+  spanAroundCapitalizedWords (Str t)
+    | not (T.all (\c -> isLower c || not (isLetter c)) t) =
+       Span ("",["nocase"],[]) [Str t]
+  spanAroundCapitalizedWords x = x
+
+  spacedMaybes = mconcat . intersperse B.space . mapMaybe (fmap B.text)
+
+  toLaTeX x =
+    case runPure (writeLaTeX opts $ doc (B.plain x)) of
+           Left _  -> Nothing
+           Right t -> Just t
+
+  renderField name = (\contents -> name <> " = {" <> contents <> "}")
+                      <$> getContentsFor name
+
+  getVariable v = lookupVariable (toVariable v) ref
+
+  getVariableAsText v = (stringify . valToInlines) <$> getVariable v
+
+  getYear val =
+    case val of
+       DateVal date ->
+         case dateLiteral date of
+           Just t -> toLaTeX (B.text t)
+           Nothing ->
+             case dateParts date of
+               [DateParts (y1:_), DateParts (y2:_)] ->
+                 Just (T.pack (printf "%04d" y1) <> "--" <>
+                        T.pack (printf "%04d" y2))
+               [DateParts (y1:_)] ->
+                 Just (T.pack (printf "%04d" y1))
+               _ -> Nothing
+       _ -> Nothing
+
+  toMonth 1 = "jan"
+  toMonth 2 = "feb"
+  toMonth 3 = "mar"
+  toMonth 4 = "apr"
+  toMonth 5 = "may"
+  toMonth 6 = "jun"
+  toMonth 7 = "jul"
+  toMonth 8 = "aug"
+  toMonth 9 = "sep"
+  toMonth 10 = "oct"
+  toMonth 11 = "nov"
+  toMonth 12 = "dec"
+  toMonth x  = T.pack $ show x
+
+  getMonth val =
+    case val of
+       DateVal date ->
+         case dateParts date of
+           [DateParts (_:m1:_), DateParts (_:m2:_)] ->
+             Just (toMonth m1 <> "--" <> toMonth m2)
+           [DateParts (_:m1:_)] -> Just (toMonth m1)
+           _ -> Nothing
+       _ -> Nothing
+
+  getContentsFor :: Text -> Maybe Text
+  getContentsFor "type" =
+    getVariableAsText "genre" >>=
+       \case
+          "mathesis"  -> Just "mastersthesis"
+          "phdthesis" -> Just "phdthesis"
+          _           -> Nothing
+  getContentsFor "entrysubtype" = mbSubtype
+  getContentsFor "journal"
+    | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"]
+      = getVariable "container-title" >>= toLaTeX . valToInlines
+    | otherwise = Nothing
+  getContentsFor "booktitle"
+    | bibtexType `elem`
+       ["inbook","incollection","inproceedings","inreference","bookinbook"]
+    = (getVariable "volume-title" <|> getVariable "container-title")
+                               >>= toLaTeX . valToInlines
+    | otherwise = Nothing
+  getContentsFor "series" = getVariable "collection-title"
+                               >>= toLaTeX . valToInlines
+  getContentsFor "address" = getVariable "publisher-place"
+                               >>= toLaTeX . valToInlines
+  getContentsFor "date"  = getVariable "issued" >>= toLaTeX . valToInlines
+  getContentsFor "eventdate" = getVariable "event-date" >>= toLaTeX . valToInlines
+  getContentsFor "urldate"  = getVariable "accessed" >>= toLaTeX . valToInlines
+  getContentsFor "year"  = getVariable "issued" >>= getYear
+  getContentsFor "month"  = getVariable "issued" >>= getMonth
+  getContentsFor "number" = (getVariable "number"
+                         <|> getVariable "collection-number"
+                         <|> getVariable "issue") >>= toLaTeX . valToInlines
+
+  getContentsFor x = getVariable x >>=
+    if isURL x
+       then Just . stringify . valToInlines
+       else toLaTeX .
+            (if x == "title"
+                then titlecase
+                else id) .
+            valToInlines
+
+  isURL x = x `elem` ["url","doi","issn","isbn"]
+
+  renderFields = T.intercalate ",\n  " . mapMaybe renderField
+
 defaultLang :: Lang
 defaultLang = Lang "en" (Just "US")
 
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index e83f26329..3e539bff7 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -81,6 +81,8 @@ getDefaultTemplate writer = do
   case format of
        "native"  -> return ""
        "csljson" -> return ""
+       "bibtex"  -> return ""
+       "biblatex" -> return ""
        "json"    -> return ""
        "docx"    -> return ""
        "fb2"     -> return ""
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 49531d924..95d6270b5 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -21,6 +21,8 @@ module Text.Pandoc.Writers
     , writeAsciiDoc
     , writeAsciiDoctor
     , writeBeamer
+    , writeBibTeX
+    , writeBibLaTeX
     , writeCommonMark
     , writeConTeXt
     , writeCustom
@@ -85,6 +87,7 @@ import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Error
 import Text.Pandoc.Writers.AsciiDoc
+import Text.Pandoc.Writers.BibTeX
 import Text.Pandoc.Writers.CommonMark
 import Text.Pandoc.Writers.ConTeXt
 import Text.Pandoc.Writers.CslJson
@@ -185,6 +188,8 @@ writers = [
   ,("tei"          , TextWriter writeTEI)
   ,("muse"         , TextWriter writeMuse)
   ,("csljson"      , TextWriter writeCslJson)
+  ,("bibtex"       , TextWriter writeBibTeX)
+  ,("biblatex"     , TextWriter writeBibLaTeX)
   ]
 
 -- | Retrieve writer, extensions based on formatSpec (format+extensions).
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
new file mode 100644
index 000000000..e1cb47ca1
--- /dev/null
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -0,0 +1,48 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.BibTeX
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Writes a BibTeX or BibLaTeX bibliographies based on the
+'references' metadata in a Pandoc document.
+-}
+module Text.Pandoc.Writers.BibTeX
+  ( writeBibTeX
+  , writeBibLaTeX
+  )
+where
+
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Data.Text (Text)
+import Data.Maybe (mapMaybe)
+import Citeproc (parseLang)
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Citeproc.BibTeX as BibTeX
+import Text.Pandoc.Citeproc.MetaValue (metaValueToReference)
+import Text.Pandoc.Writers.Shared (lookupMetaString)
+
+-- | Write BibTeX based on the references metadata from a Pandoc document.
+writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeBibTeX = writeBibTeX' BibTeX.Bibtex
+
+-- | Write BibLaTeX based on the references metadata from a Pandoc document.
+writeBibLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeBibLaTeX = writeBibTeX' BibTeX.Biblatex
+
+writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
+writeBibTeX' variant opts (Pandoc meta _) = do
+  let mblang = case lookupMetaString "lang" meta of
+                 "" -> Nothing
+                 t  -> Just $ parseLang t
+  let refs = case lookupMeta "references" meta of
+               Just (MetaList xs) -> mapMaybe metaValueToReference xs
+               _ -> []
+  return $ mconcat $
+    map (BibTeX.writeBibtexString opts variant mblang) refs
+
-- 
cgit v1.2.3


From b4b3560191b3699dd4db9d069244925a3c6074db Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 20 Jan 2021 19:09:36 +0100
Subject: JATS writer: allow to use element-citation

---
 MANUAL.txt                                 |   7 ++
 pandoc.cabal                               |   1 +
 src/Text/Pandoc/Extensions.hs              |  14 +++
 src/Text/Pandoc/Writers/JATS.hs            |  21 ++--
 src/Text/Pandoc/Writers/JATS/References.hs | 160 +++++++++++++++++++++++++++++
 src/Text/Pandoc/Writers/JATS/Types.hs      |   4 +-
 test/command/7042.md                       | 146 ++++++++++++++++++++++++++
 7 files changed, 346 insertions(+), 7 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/JATS/References.hs
 create mode 100644 test/command/7042.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index b9c4ef637..7bf74a8f9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3093,6 +3093,13 @@ output format.
 Some aspects of [Pandoc's Markdown citation syntax](#citations)
 are also accepted in `org` input.
 
+#### Extension: `element_citations` ####
+
+In the `jats` output formats, this causes reference items to
+be replaced with `<element-citation>` elements. These
+elements are not influenced by CSL styles, but all information
+on the item is included in tags.
+
 #### Extension: `ntb` ####
 
 In the `context` output format this enables the use of [Natural Tables
diff --git a/pandoc.cabal b/pandoc.cabal
index 0d63cbe35..07feb10dd 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -638,6 +638,7 @@ library
                    Text.Pandoc.Readers.Metadata,
                    Text.Pandoc.Readers.Roff,
                    Text.Pandoc.Writers.Docx.StyleMap,
+                   Text.Pandoc.Writers.JATS.References,
                    Text.Pandoc.Writers.JATS.Table,
                    Text.Pandoc.Writers.JATS.Types,
                    Text.Pandoc.Writers.LaTeX.Caption,
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 39c2a0489..7aa32c52c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -88,6 +88,7 @@ data Extension =
                                   --   does not affect readers/writers directly; it causes
                                   --   the eastAsianLineBreakFilter to be applied after
                                   --   parsing, in Text.Pandoc.App.convertWithOpts.
+    | Ext_element_citations   -- ^ Use element-citation elements for JATS citations
     | Ext_emoji               -- ^ Support emoji like :smile:
     | Ext_empty_paragraphs -- ^ Allow empty paragraphs
     | Ext_epub_html_exts      -- ^ Recognise the EPUB extended version of HTML
@@ -412,6 +413,11 @@ getDefaultExtensions "textile"         = extensionsFromList
                                            Ext_smart,
                                            Ext_raw_html,
                                            Ext_auto_identifiers]
+getDefaultExtensions "jats"            = extensionsFromList
+                                          [Ext_auto_identifiers]
+getDefaultExtensions "jats_archiving"  = getDefaultExtensions "jats"
+getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats"
+getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats"
 getDefaultExtensions "opml"            = pandocExtensions -- affects notes
 getDefaultExtensions _                 = extensionsFromList
                                           [Ext_auto_identifiers]
@@ -554,6 +560,14 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_smart
     , Ext_raw_tex
     ]
+  getAll "jats"            =
+    extensionsFromList
+    [ Ext_auto_identifiers
+    , Ext_element_citations
+    ]
+  getAll "jats_archiving"  = getAll "jats"
+  getAll "jats_publishing" = getAll "jats"
+  getAll "jats_articleauthoring" = getAll "jats"
   getAll "opml"            = allMarkdownExtensions -- affects notes
   getAll "twiki"           = autoIdExtensions <>
     extensionsFromList
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index c75d40745..a9369db7a 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -29,6 +29,7 @@ import Data.Maybe (fromMaybe)
 import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime)
 import qualified Data.Text as T
 import Data.Text (Text)
+import Text.Pandoc.Citeproc (getReferences)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (languages, languagesByExtension)
@@ -40,6 +41,7 @@ import Text.DocLayout
 import Text.Pandoc.Shared
 import Text.Pandoc.Templates (renderTemplate)
 import Text.DocTemplates (Context(..), Val(..))
+import Text.Pandoc.Writers.JATS.References (referencesToJATS)
 import Text.Pandoc.Writers.JATS.Table (tableToJATS)
 import Text.Pandoc.Writers.JATS.Types
 import Text.Pandoc.Writers.Math
@@ -71,15 +73,19 @@ writeJATS = writeJatsArchiving
 
 -- | Convert a @'Pandoc'@ document to JATS.
 writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text
-writeJats tagSet opts d =
-  runReaderT (evalStateT (docToJATS opts d) initialState)
-             environment
-  where initialState = JATSState { jatsNotes = [] }
-        environment = JATSEnv
+writeJats tagSet opts d = do
+  refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts
+          then getReferences Nothing d
+          else pure []
+  let environment = JATSEnv
           { jatsTagSet = tagSet
           , jatsInlinesWriter = inlinesToJATS
           , jatsBlockWriter = blockToJATS
+          , jatsReferences = refs
           }
+  let initialState = JATSState { jatsNotes = [] }
+  runReaderT (evalStateT (docToJATS opts d) initialState)
+             environment
 
 -- | Convert Pandoc document to string in JATS format.
 docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text
@@ -258,7 +264,10 @@ blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident =
   inTagsSimple "mixed-citation" <$>
   inlinesToJATS opts lst
 blockToJATS opts (Div ("refs",_,_) xs) = do
-  contents <- blocksToJATS opts xs
+  refs <- asks jatsReferences
+  contents <- if null refs
+              then blocksToJATS opts xs
+              else referencesToJATS opts refs
   return $ inTagsIndented "ref-list" contents
 blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do
   contents <- blocksToJATS opts bs
diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs
new file mode 100644
index 000000000..4ee7eb9dd
--- /dev/null
+++ b/src/Text/Pandoc/Writers/JATS/References.hs
@@ -0,0 +1,160 @@
+{-# LANGUAGE LambdaCase        #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.JATS.References
+   Copyright   : © 2021 Albert Krewinkel
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
+   Stability   : alpha
+   Portability : portable
+
+Creation of a bibliography list using @<element-citation>@ elements in
+reference items.
+-}
+module Text.Pandoc.Writers.JATS.References
+  ( referencesToJATS
+  , referenceToJATS
+  ) where
+
+import Citeproc.Pandoc ()
+import Citeproc.Types
+  ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..)
+  , Val (..) , lookupVariable, valToText
+  )
+import Data.Text (Text)
+import Text.DocLayout (Doc, empty, isEmpty, literal, vcat)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Builder (Inlines)
+import Text.Pandoc.Options (WriterOptions)
+import Text.Pandoc.Shared (tshow)
+import Text.Pandoc.Writers.JATS.Types
+import Text.Pandoc.XML (inTags)
+import qualified Data.Text as T
+
+referencesToJATS :: PandocMonad m
+                 => WriterOptions
+                 -> [Reference Inlines]
+                 -> JATS m (Doc Text)
+referencesToJATS opts =
+  fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts)
+
+referenceToJATS :: PandocMonad m
+                => WriterOptions
+                -> Reference Inlines
+                -> JATS m (Doc Text)
+referenceToJATS _opts ref = do
+  let refType = referenceType ref
+  let pubType = [("publication-type", refType) | not (T.null refType)]
+  let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))]
+           . inTags True "element-citation" pubType
+  return . wrap . vcat $
+    [ authors
+    , "title" `varInTag`
+      if refType == "book"
+      then "source"
+      else "article-title"
+    , if refType == "book"
+      then empty
+      else "container-title" `varInTag` "source"
+    , editors
+    , "publisher"       `varInTag` "publisher-name"
+    , "publisher-place" `varInTag` "publisher-loc"
+    , yearTag
+    , accessed
+    , "volume"          `varInTag` "volume"
+    , "issue"           `varInTag` "issue"
+    , "page-first"      `varInTag` "fpage"
+    , "page-last"       `varInTag` "lpage"
+    , "pages"           `varInTag` "page-range"
+    , "ISBN"            `varInTag` "isbn"
+    , "ISSN"            `varInTag` "issn"
+    , varInTagWith "doi"  "pub-id" [("pub-id-type", "doi")]
+    , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")]
+    ]
+  where
+    varInTag var tagName = varInTagWith var tagName []
+
+    varInTagWith var tagName tagAttribs =
+      case lookupVariable var ref >>= valToText of
+        Nothing  -> mempty
+        Just val -> inTags' tagName tagAttribs $ literal val
+
+    authors = case lookupVariable "author" ref of
+      Just (NamesVal names) ->
+        inTags True "person-group" [("person-group-type", "author")] . vcat $
+        map toNameElements names
+      _                     -> empty
+
+    editors = case lookupVariable "editor" ref of
+      Just (NamesVal names) ->
+        inTags True "person-group" [("person-group-type", "editor")] . vcat $
+        map toNameElements names
+      _                     -> empty
+
+    yearTag =
+      case lookupVariable "issued" ref of
+        Just (DateVal date) -> toDateElements date
+        _ -> empty
+
+    accessed =
+      case lookupVariable "accessed" ref of
+        Just (DateVal d) -> inTags' "date-in-citation"
+                                    [("content-type", "access-date")]
+                                    (toDateElements d)
+        _ -> empty
+
+toDateElements :: Date -> Doc Text
+toDateElements date =
+  case dateParts date of
+    dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <>
+                                  monthElement m <>
+                                  dayElement d
+    dp@(DateParts (y:m:_)):_   -> yearElement y dp <> monthElement m
+    dp@(DateParts (y:_)):_     -> yearElement y dp
+    _                          -> empty
+
+yearElement :: Int -> DateParts -> Doc Text
+yearElement year dp =
+  inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year)
+
+monthElement :: Int -> Doc Text
+monthElement month = inTags' "month" [] . literal $ twoDigits month
+
+dayElement :: Int -> Doc Text
+dayElement day = inTags' "day" [] . literal $ twoDigits day
+
+iso8601 :: DateParts -> Text
+iso8601 = T.intercalate "-" . \case
+  DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d]
+  DateParts (y:m:_)   -> [fourDigits y, twoDigits m]
+  DateParts (y:_)     -> [fourDigits y]
+  _                   -> []
+
+twoDigits :: Int -> Text
+twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n
+
+fourDigits :: Int -> Text
+fourDigits n = T.takeEnd 4 $ "000" <> tshow n
+
+toNameElements :: Name -> Doc Text
+toNameElements name =
+  if not (isEmpty nameTags)
+  then inTags' "name" [] nameTags
+  else nameLiteral name `inNameTag` "string-name"
+    where
+      inNameTag val tag = maybe empty (inTags' tag [] . literal) val
+      surnamePrefix = maybe mempty (`T.snoc` ' ') $
+                      nameNonDroppingParticle name
+      givenSuffix = maybe mempty (T.cons ' ') $
+                    nameDroppingParticle name
+      nameTags = mconcat
+        [ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname"
+        , ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names"
+        , nameSuffix name `inNameTag` "suffix"
+        ]
+
+-- | Put the supplied contents between start and end tags of tagType,
+--   with specified attributes.
+inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text
+inTags' = inTags False
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 54ed4a8bd..6fdddc0b5 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types
   )
 where
 
+import Citeproc.Types (Reference)
 import Control.Monad.Reader (ReaderT)
 import Control.Monad.State (StateT)
 import Data.Text (Text)
 import Text.DocLayout (Doc)
-import Text.Pandoc.Definition (Block, Inline)
+import Text.Pandoc.Builder (Block, Inline, Inlines)
 import Text.Pandoc.Options (WriterOptions)
 
 -- | JATS tag set variant
@@ -40,6 +41,7 @@ data JATSEnv m = JATSEnv
   { jatsTagSet :: JATSTagSet
   , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
   , jatsBlockWriter   :: WriterOptions -> Block    -> JATS m (Doc Text)
+  , jatsReferences    :: [Reference Inlines]
   }
 
 -- | JATS writer type
diff --git a/test/command/7042.md b/test/command/7042.md
new file mode 100644
index 000000000..de0294da3
--- /dev/null
+++ b/test/command/7042.md
@@ -0,0 +1,146 @@
+```
+% pandoc -f markdown -t jats_publishing+element_citations --citeproc -s
+---
+nocite: "[@*]"
+references:
+- author:
+  - family: Jane
+    given: Doe
+  container-title: Public Library of Tests
+  id: year-month
+  issued: 1999-08
+  title: Year and month
+  type: article-journal
+- accessed: 1999-01-22
+  author:
+  - family: Negidius
+    given: Numerius
+  container-title: Public Library of Tests
+  id: access-date
+  issued: 1911-10-03
+  title: Entry with access date
+  type: article-journal
+- author:
+  - family: Beethoven
+    given: Ludwig
+    dropping-particle: van
+  - family: Bray
+    given: Jan
+    non-dropping-particle: de
+  container-title: Public Library of Tests
+  id: name-particles
+  issued: 1820
+  title: Name particles, dropping and non-dropping
+  type: article-journal
+- author:
+  - 宮水 三葉
+  - 立花 瀧
+  title: Big Book of Tests
+  id: book-with-japanese-authors
+  issued: 2016
+  type: book
+- author:
+  - family: Watson
+    given: J. D.
+  - family: Crick
+    given: F. H. C.
+  container-title: Nature
+  doi: '10.1038/171737a0'
+  id: full-journal-article-entry
+  issue: 4356
+  issued: '1953-04-01'
+  pages: 737-738
+  pmid: 13054692
+  title: 'Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid'
+  type: article-journal
+  volume: 171
+...
+^D
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
+                  "JATS-publishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+</journal-title-group>
+<issn></issn>
+<publisher>
+<publisher-name></publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+</article-meta>
+</front>
+<body>
+
+</body>
+<back>
+<ref-list>
+  <ref-list>
+    <ref id="ref-year-month">
+      <element-citation publication-type="article-journal">
+        <person-group person-group-type="author">
+          <name><surname>Jane</surname><given-names>Doe</given-names></name>
+        </person-group>
+        <article-title>Year and month</article-title>
+        <source>Public Library of Tests</source>
+        <year iso-8601-date="1999-08">1999</year><month>08</month>
+      </element-citation>
+    </ref>
+    <ref id="ref-access-date">
+      <element-citation publication-type="article-journal">
+        <person-group person-group-type="author">
+          <name><surname>Negidius</surname><given-names>Numerius</given-names></name>
+        </person-group>
+        <article-title>Entry with access date</article-title>
+        <source>Public Library of Tests</source>
+        <year iso-8601-date="1911-10-03">1911</year><month>10</month><day>03</day>
+        <date-in-citation content-type="access-date"><year iso-8601-date="1999-01-22">1999</year><month>01</month><day>22</day></date-in-citation>
+      </element-citation>
+    </ref>
+    <ref id="ref-name-particles">
+      <element-citation publication-type="article-journal">
+        <person-group person-group-type="author">
+          <name><surname>Beethoven</surname><given-names>Ludwig van</given-names></name>
+          <name><surname>de Bray</surname><given-names>Jan</given-names></name>
+        </person-group>
+        <article-title>Name particles, dropping and non-dropping</article-title>
+        <source>Public Library of Tests</source>
+        <year iso-8601-date="1820">1820</year>
+      </element-citation>
+    </ref>
+    <ref id="ref-book-with-japanese-authors">
+      <element-citation publication-type="book">
+        <person-group person-group-type="author">
+          <string-name>宮水 三葉</string-name>
+          <string-name>立花 瀧</string-name>
+        </person-group>
+        <source>Big Book of Tests</source>
+        <year iso-8601-date="2016">2016</year>
+      </element-citation>
+    </ref>
+    <ref id="ref-full-journal-article-entry">
+      <element-citation publication-type="article-journal">
+        <person-group person-group-type="author">
+          <name><surname>Watson</surname><given-names>J. D.</given-names></name>
+          <name><surname>Crick</surname><given-names>F. H. C.</given-names></name>
+        </person-group>
+        <article-title>Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid</article-title>
+        <source>Nature</source>
+        <year iso-8601-date="1953-04-01">1953</year><month>04</month><day>01</day>
+        <volume>171</volume>
+        <issue>4356</issue>
+        <fpage>737</fpage>
+        <page-range>737-738</page-range>
+        <pub-id pub-id-type="doi">10.1038/171737a0</pub-id>
+        <pub-id pub-id-type="pmid">13054692</pub-id>
+      </element-citation>
+    </ref>
+  </ref-list>
+</ref-list>
+</back>
+</article>
+
+```
-- 
cgit v1.2.3


From 198ce0cde99731680b62294a7dff1b218b29bd4d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 22 Jan 2021 20:49:41 -0800
Subject: ImageSize:   use viewBox for svg if no length, width.

This change allows pandoc to extract size information
from more SVGs.  Closes #7045.
---
 src/Text/Pandoc/ImageSize.hs |  8 ++++++--
 test/command/svg.md          | 30 ++++++++++++++----------------
 2 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index 098c16721..e19958f6a 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -328,12 +328,16 @@ getSize img =
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
   doc <- Xml.parseXMLDoc $ UTF8.toString img
+  let viewboxSize = do
+        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
+        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
+        return (w,h)
   let dpi = fromIntegral $ writerDpi opts
   let dirToInt dir = do
         dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
         return $ inPixel opts dim
-  w <- dirToInt "width"
-  h <- dirToInt "height"
+  w <- dirToInt "width" <|> (fst <$> viewboxSize)
+  h <- dirToInt "height" <|> (snd <$> viewboxSize)
   return ImageSize {
     pxX  = w
   , pxY  = h
diff --git a/test/command/svg.md b/test/command/svg.md
index 57506570b..4ba836b20 100644
--- a/test/command/svg.md
+++ b/test/command/svg.md
@@ -36,28 +36,27 @@
 % pandoc -f latex -t icml
 \includegraphics{command/SVG_logo.svg}
 ^D
-[WARNING] Could not determine image size for 'command/SVG_logo.svg': could not determine SVG size
 <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
   <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
-    <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100">
+    <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 37.5 -37.5">
       <Properties>
         <PathGeometry>
           <GeometryPathType PathOpen="false">
             <PathPointArray>
-              <PathPointType Anchor="-150 -100" LeftDirection="-150 -100" RightDirection="-150 -100" />
-              <PathPointType Anchor="-150 100" LeftDirection="-150 100" RightDirection="-150 100" />
-              <PathPointType Anchor="150 100" LeftDirection="150 100" RightDirection="150 100" />
-              <PathPointType Anchor="150 -100" LeftDirection="150 -100" RightDirection="150 -100" />
+              <PathPointType Anchor="-37.5 -37.5" LeftDirection="-37.5 -37.5" RightDirection="-37.5 -37.5" />
+              <PathPointType Anchor="-37.5 37.5" LeftDirection="-37.5 37.5" RightDirection="-37.5 37.5" />
+              <PathPointType Anchor="37.5 37.5" LeftDirection="37.5 37.5" RightDirection="37.5 37.5" />
+              <PathPointType Anchor="37.5 -37.5" LeftDirection="37.5 -37.5" RightDirection="37.5 -37.5" />
             </PathPointArray>
           </GeometryPathType>
         </PathGeometry>
       </Properties>
-      <Image Self="ue6" ItemTransform="1 0 0 1 -150 -100">
+      <Image Self="ue6" ItemTransform="1 0 0 1 -37.5 -37.5">
         <Properties>
           <Profile type="string">
             $ID/Embedded
           </Profile>
-          <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" />
+          <GraphicBounds Left="0" Top="0" Right="75" Bottom="75" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/SVG_logo.svg" />
       </Image>
@@ -70,28 +69,27 @@
 % pandoc -f latex -t icml
 \includegraphics{command/SVG_logo-without-xml-declaration.svg}
 ^D
-[WARNING] Could not determine image size for 'command/SVG_logo-without-xml-declaration.svg': could not determine SVG size
 <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
   <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
-    <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100">
+    <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 37.5 -37.5">
       <Properties>
         <PathGeometry>
           <GeometryPathType PathOpen="false">
             <PathPointArray>
-              <PathPointType Anchor="-150 -100" LeftDirection="-150 -100" RightDirection="-150 -100" />
-              <PathPointType Anchor="-150 100" LeftDirection="-150 100" RightDirection="-150 100" />
-              <PathPointType Anchor="150 100" LeftDirection="150 100" RightDirection="150 100" />
-              <PathPointType Anchor="150 -100" LeftDirection="150 -100" RightDirection="150 -100" />
+              <PathPointType Anchor="-37.5 -37.5" LeftDirection="-37.5 -37.5" RightDirection="-37.5 -37.5" />
+              <PathPointType Anchor="-37.5 37.5" LeftDirection="-37.5 37.5" RightDirection="-37.5 37.5" />
+              <PathPointType Anchor="37.5 37.5" LeftDirection="37.5 37.5" RightDirection="37.5 37.5" />
+              <PathPointType Anchor="37.5 -37.5" LeftDirection="37.5 -37.5" RightDirection="37.5 -37.5" />
             </PathPointArray>
           </GeometryPathType>
         </PathGeometry>
       </Properties>
-      <Image Self="ue6" ItemTransform="1 0 0 1 -150 -100">
+      <Image Self="ue6" ItemTransform="1 0 0 1 -37.5 -37.5">
         <Properties>
           <Profile type="string">
             $ID/Embedded
           </Profile>
-          <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" />
+          <GraphicBounds Left="0" Top="0" Right="75" Bottom="75" />
         </Properties>
         <Link Self="ueb" LinkResourceURI="file:command/SVG_logo-without-xml-declaration.svg" />
       </Image>
-- 
cgit v1.2.3


From 490065f3ed3dd9377a740ad6fcbc441a658889dd Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 26 Jan 2021 14:19:30 +0100
Subject: Lua: always load built-in Lua scripts from default data-dir

The Lua modules `pandoc` and `pandoc.List` are now always loaded from the
system's default data directory. Loading from a different directory by
overriding the default path, e.g. via `--data-dir`, is no longer supported to
avoid unexpected behavior and to address security concerns.
---
 src/Text/Pandoc/Lua/Init.hs          | 18 ++++++++++++++----
 src/Text/Pandoc/Lua/Module/Pandoc.hs |  8 ++++----
 src/Text/Pandoc/Lua/Packages.hs      | 33 +++++++--------------------------
 src/Text/Pandoc/Lua/PandocLua.hs     | 31 +++++++++++++++++++------------
 4 files changed, 44 insertions(+), 46 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs
index 0a5ce85cb..baa6f0295 100644
--- a/src/Text/Pandoc/Lua/Init.hs
+++ b/src/Text/Pandoc/Lua/Init.hs
@@ -12,17 +12,18 @@ module Text.Pandoc.Lua.Init
   ( runLua
   ) where
 
+import Control.Monad (when)
 import Control.Monad.Catch (try)
 import Control.Monad.Trans (MonadIO (..))
 import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr)
 import Foreign.Lua (Lua)
 import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8)
+import Text.Pandoc.Class.PandocMonad (readDataFile)
 import Text.Pandoc.Class.PandocIO (PandocIO)
 import Text.Pandoc.Error (PandocError)
 import Text.Pandoc.Lua.Packages (installPandocPackageSearcher)
-import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua,
-                                  loadScriptFromDataDir, runPandocLua)
-
+import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua)
+import Text.Pandoc.Lua.Util (throwTopMessageAsError')
 import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Definition as Pandoc
 import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc
@@ -44,7 +45,7 @@ initLuaState = do
   liftPandocLua Lua.openlibs
   installPandocPackageSearcher
   initPandocModule
-  loadScriptFromDataDir "init.lua"
+  loadInitScript "init.lua"
  where
   initPandocModule :: PandocLua ()
   initPandocModule = do
@@ -61,6 +62,15 @@ initLuaState = do
     -- assign module to global variable
     liftPandocLua $ Lua.setglobal "pandoc"
 
+  loadInitScript :: FilePath -> PandocLua ()
+  loadInitScript scriptFile = do
+    script <- readDataFile scriptFile
+    status <- liftPandocLua $ Lua.dostring script
+    when (status /= Lua.OK) . liftPandocLua $
+      throwTopMessageAsError'
+      (("Couldn't load '" ++ scriptFile ++ "'.\n") ++)
+
+
 -- | AST elements are marshaled via normal constructor functions in the
 -- @pandoc@ module. However, accessing Lua globals from Haskell is
 -- expensive (due to error handling). Accessing the Lua registry is much
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index a9ce3866d..a8afecd2e 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -25,7 +25,7 @@ import Text.Pandoc.Definition (Block, Inline)
 import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..))
 import Text.Pandoc.Lua.Marshaling ()
 import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua,
-                                  loadScriptFromDataDir)
+                                  loadDefaultModule)
 import Text.Pandoc.Walk (Walkable)
 import Text.Pandoc.Options (ReaderOptions (readerExtensions))
 import Text.Pandoc.Process (pipeProcess)
@@ -38,11 +38,11 @@ import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Lua.Util as LuaUtil
 import Text.Pandoc.Error
 
--- | Push the "pandoc" on the lua stack. Requires the `list` module to be
--- loaded.
+-- | Push the "pandoc" package to the Lua stack. Requires the `List`
+-- module to be loadable.
 pushModule :: PandocLua NumResults
 pushModule = do
-  loadScriptFromDataDir "pandoc.lua"
+  loadDefaultModule "pandoc"
   addFunction "read" readDoc
   addFunction "pipe" pipeFn
   addFunction "walk_block" walkBlock
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index d62fb725d..5949a1a7d 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE ScopedTypeVariables #-}
 {- |
    Module      : Text.Pandoc.Lua.Packages
    Copyright   : Copyright © 2017-2021 Albert Krewinkel
@@ -15,13 +12,9 @@ module Text.Pandoc.Lua.Packages
   ( installPandocPackageSearcher
   ) where
 
-import Control.Monad.Catch (try)
 import Control.Monad (forM_)
-import Data.ByteString (ByteString)
-import Foreign.Lua (Lua, NumResults)
-import Text.Pandoc.Error (PandocError)
-import Text.Pandoc.Class.PandocMonad (readDataFile)
-import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua)
+import Foreign.Lua (NumResults)
+import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule)
 
 import qualified Foreign.Lua as Lua
 import qualified Foreign.Lua.Module.Text as Text
@@ -54,24 +47,12 @@ pandocPackageSearcher pkgName =
     "pandoc.types"    -> pushWrappedHsFun Types.pushModule
     "pandoc.utils"    -> pushWrappedHsFun Utils.pushModule
     "text"            -> pushWrappedHsFun Text.pushModule
-    _                 -> searchPureLuaLoader
+    "pandoc.List"     -> pushWrappedHsFun (loadDefaultModule pkgName)
+    _                 -> reportPandocSearcherFailure
  where
   pushWrappedHsFun f = liftPandocLua $ do
     Lua.pushHaskellFunction f
     return 1
-  searchPureLuaLoader = do
-    let filename = pkgName ++ ".lua"
-    try (readDataFile filename) >>= \case
-      Right script -> pushWrappedHsFun (loadStringAsPackage pkgName script)
-      Left (_ :: PandocError) -> liftPandocLua $ do
-        Lua.push ("\n\tno file '" ++ filename ++ "' in pandoc's datadir")
-        return (1 :: NumResults)
-
-loadStringAsPackage :: String -> ByteString -> Lua NumResults
-loadStringAsPackage pkgName script = do
-  status <- Lua.dostring script
-  if status == Lua.OK
-    then return (1 :: NumResults)
-    else do
-      msg <- Lua.popValue
-      Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg)
+  reportPandocSearcherFailure = liftPandocLua $ do
+    Lua.push ("\n\t" <> pkgName <> "is not one of pandoc's default packages")
+    return (1 :: NumResults)
diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs
index 4beac22b7..750e019b6 100644
--- a/src/Text/Pandoc/Lua/PandocLua.hs
+++ b/src/Text/Pandoc/Lua/PandocLua.hs
@@ -23,24 +23,23 @@ module Text.Pandoc.Lua.PandocLua
   , runPandocLua
   , liftPandocLua
   , addFunction
-  , loadScriptFromDataDir
+  , loadDefaultModule
   ) where
 
-import Control.Monad (when)
 import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow)
 import Control.Monad.Except (MonadError (catchError, throwError))
 import Control.Monad.IO.Class (MonadIO (liftIO))
 import Foreign.Lua (Lua (..), NumResults, Pushable, ToHaskellFunction)
 import Text.Pandoc.Class.PandocIO (PandocIO)
-import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDataFile)
-import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDefaultDataFile)
+import Text.Pandoc.Error (PandocError (PandocLuaError))
 import Text.Pandoc.Lua.Global (Global (..), setGlobals)
 import Text.Pandoc.Lua.ErrorConversion (errorConversion)
 
 import qualified Control.Monad.Catch as Catch
+import qualified Data.Text as T
 import qualified Foreign.Lua as Lua
 import qualified Text.Pandoc.Class.IO as IO
-import qualified Text.Pandoc.Lua.Util as LuaUtil
 
 -- | Type providing access to both, pandoc and Lua operations.
 newtype PandocLua a = PandocLua { unPandocLua :: Lua a }
@@ -86,14 +85,22 @@ addFunction name fn = liftPandocLua $ do
   Lua.pushHaskellFunction fn
   Lua.rawset (-3)
 
--- | Load a file from pandoc's data directory.
-loadScriptFromDataDir :: FilePath -> PandocLua ()
-loadScriptFromDataDir scriptFile = do
-  script <- readDataFile scriptFile
+-- | Load a pure Lua module included with pandoc. Leaves the result on
+-- the stack and returns @NumResults 1@.
+--
+-- The script is loaded from the default data directory. We do not load
+-- from data directories supplied via command line, as this could cause
+-- scripts to be executed even though they had not been passed explicitly.
+loadDefaultModule :: String -> PandocLua NumResults
+loadDefaultModule name = do
+  script <- readDefaultDataFile (name <> ".lua")
   status <- liftPandocLua $ Lua.dostring script
-  when (status /= Lua.OK) . liftPandocLua $
-    LuaUtil.throwTopMessageAsError'
-      (("Couldn't load '" ++ scriptFile ++ "'.\n") ++)
+  if status == Lua.OK
+    then return (1 :: NumResults)
+    else do
+      msg <- liftPandocLua Lua.popValue
+      let err = "Error while loading `" <> name <> "`.\n" <> msg
+      throwError $ PandocLuaError (T.pack err)
 
 -- | Global variables which should always be set.
 defaultGlobals :: PandocIO [Global]
-- 
cgit v1.2.3


From 12bc6625352aaece955f9f0700f88e9280721ced Mon Sep 17 00:00:00 2001
From: Mauro Bieg <mb21@users.noreply.github.com>
Date: Tue, 26 Jan 2021 08:53:26 +0100
Subject: LaTeX writer: change BCP47 lang tag from jp to ja

fixes #7047
---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index df922e17b..93603a26e 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1520,7 +1520,7 @@ commonFromBcp47 (Lang l _ _ _) = fromIso l
     fromIso "ie"  = "interlingua"
     fromIso "is"  = "icelandic"
     fromIso "it"  = "italian"
-    fromIso "jp"  = "japanese"
+    fromIso "ja"  = "japanese"
     fromIso "km"  = "khmer"
     fromIso "kmr" = "kurmanji"
     fromIso "kn"  = "kannada"
-- 
cgit v1.2.3


From 98c2a52b4ee6c833c0a2f2652386cec024e377eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 26 Jan 2021 22:45:57 -0800
Subject: Clean up BibTeX parsing.

Previously there was a messy code path that gave strange
results in some cases, not passing through raw tex but
trying to extract a string content.  This was an artefact
of trying to handle some special bibtex-specific commands
in the BibTeX reader. Now we just handle these in the
LaTeX reader and simplify parsing in the BibTeX reader.
This does mean that more raw tex will be passed through
(and currently this is not sensitive to the `raw_tex`
extension; this should be fixed).

Closes #7049.
---
 src/Text/Pandoc/Citeproc/BibTeX.hs | 33 +--------------------------------
 src/Text/Pandoc/Readers/LaTeX.hs   | 18 ++++++++++++++++++
 test/command/biblatex-cotton.md    |  6 +++---
 test/command/biblatex-murray.md    |  5 +++--
 4 files changed, 25 insertions(+), 37 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 10730a1e9..5b9068378 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -750,41 +750,10 @@ blocksToInlines bs =
        _          -> B.fromList $ Walk.query (:[]) bs
 
 adjustSpans :: Lang -> Inline -> Inline
-adjustSpans lang (RawInline (Format "latex") s)
-  | s == "\\hyphen" || s == "\\hyphen " = Str "-"
-  | otherwise = parseRawLaTeX lang s
+adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s
 adjustSpans _ SoftBreak = Space
 adjustSpans _ x = x
 
-parseRawLaTeX :: Lang -> Text -> Inline
-parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
-  case parseLaTeX lang contents of
-          Right [Para ys]  -> f command ys
-          Right [Plain ys] -> f command ys
-          Right []         -> f command []
-          _                -> RawInline (Format "latex") t
-   where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
-         command  = T.strip command'
-         contents = T.drop 1 $ T.dropEnd 1 contents'
-         f "mkbibquote"    ils = Span nullAttr [Quoted DoubleQuote ils]
-         f "mkbibemph"     ils = Span nullAttr [Emph ils]
-         f "mkbibitalic"   ils = Span nullAttr [Emph ils]
-         f "mkbibbold"     ils = Span nullAttr [Strong ils]
-         f "mkbibparens"   ils = Span nullAttr $
-                                  [Str "("] ++ ils ++ [Str ")"]
-         f "mkbibbrackets" ils = Span nullAttr $
-                                  [Str "["] ++ ils ++ [Str "]"]
-         -- ... both should be nestable & should work in year fields
-         f "autocap"    ils    = Span nullAttr ils
-           -- TODO: should work in year fields
-         f "textnormal" ils    = Span ("",["nodecor"],[]) ils
-         f "bibstring" [Str s] = Str $ resolveKey' lang s
-         f "adddot"    []      = Str "."
-         f "adddotspace" []    = Span nullAttr [Str ".", Space]
-         f "addabbrvspace" []  = Space
-         f _            ils    = Span nullAttr ils
-parseRawLaTeX _ t = RawInline (Format "latex") t
-
 latex' :: Text -> Bib [Block]
 latex' t = do
   lang <- gets localeLang
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f49323996..91c71c000 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -940,6 +940,24 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("uline", underline <$> tok)
   -- plain tex stuff that should just be passed through as raw tex
   , ("ifdim", ifdim)
+  -- bibtex
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
   ]
 
 accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
diff --git a/test/command/biblatex-cotton.md b/test/command/biblatex-cotton.md
index 51c3ee2f4..7a7a2e296 100644
--- a/test/command/biblatex-cotton.md
+++ b/test/command/biblatex-cotton.md
@@ -43,9 +43,9 @@ Cotton, F. A., Wilkinson, G., Murillio, C. A., & Bochmann, M. (1999).
 ---
 nocite: "[@*]"
 references:
-- annote: A book entry with author authors and an edition field. By
-    default, long author and editor lists are automatically truncated.
-    This is configurable
+- annote: A book entry with `\arabic{author}`{=latex} authors and an
+    edition field. By default, long author and editor lists are
+    automatically truncated. This is configurable
   author:
   - family: Cotton
     given: Frank Albert
diff --git a/test/command/biblatex-murray.md b/test/command/biblatex-murray.md
index 78081c6fe..8242e4822 100644
--- a/test/command/biblatex-murray.md
+++ b/test/command/biblatex-murray.md
@@ -55,8 +55,9 @@ properties as a function of core size. *Langmuir*, *14*(1), 17–30.
 ---
 nocite: "[@*]"
 references:
-- annote: An article entry with author authors. By default, long author
-    and editor lists are automatically truncated. This is configurable
+- annote: An article entry with `\arabic{author}`{=latex} authors. By
+    default, long author and editor lists are automatically truncated.
+    This is configurable
   author:
   - family: Hostetler
     given: Michael J.
-- 
cgit v1.2.3


From 300b9b0ea365187240115afbfed0df7fa438a7b3 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 29 Jan 2021 09:37:45 +0100
Subject: JATS writer: escape special chars in reference elements.

Prevents the generation of invalid markup if a citation element contains
an ampersand or another character with a special meaning in XML.
---
 src/Text/Pandoc/Writers/JATS/References.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs
index 4ee7eb9dd..903144128 100644
--- a/src/Text/Pandoc/Writers/JATS/References.hs
+++ b/src/Text/Pandoc/Writers/JATS/References.hs
@@ -29,7 +29,7 @@ import Text.Pandoc.Builder (Inlines)
 import Text.Pandoc.Options (WriterOptions)
 import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Writers.JATS.Types
-import Text.Pandoc.XML (inTags)
+import Text.Pandoc.XML (escapeStringForXML, inTags)
 import qualified Data.Text as T
 
 referencesToJATS :: PandocMonad m
@@ -78,7 +78,8 @@ referenceToJATS _opts ref = do
     varInTagWith var tagName tagAttribs =
       case lookupVariable var ref >>= valToText of
         Nothing  -> mempty
-        Just val -> inTags' tagName tagAttribs $ literal val
+        Just val -> inTags' tagName tagAttribs . literal $
+                    escapeStringForXML val
 
     authors = case lookupVariable "author" ref of
       Just (NamesVal names) ->
@@ -143,7 +144,9 @@ toNameElements name =
   then inTags' "name" [] nameTags
   else nameLiteral name `inNameTag` "string-name"
     where
-      inNameTag val tag = maybe empty (inTags' tag [] . literal) val
+      inNameTag mVal tag = case mVal of
+        Nothing  -> empty
+        Just val -> inTags' tag [] . literal $ escapeStringForXML val
       surnamePrefix = maybe mempty (`T.snoc` ' ') $
                       nameNonDroppingParticle name
       givenSuffix = maybe mempty (T.cons ' ') $
-- 
cgit v1.2.3


From 9223788a05fe619d567bcbdf4cb31db63de86f32 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 29 Jan 2021 18:29:17 -0800
Subject: Markdown writer: handle math right before digit.

We insert an HTML comment to avoid a `$` right before
a digit, which pandoc will not recognize as a math delimiter.
---
 src/Text/Pandoc/Writers/Markdown.hs | 6 +++++-
 test/command/7058.md                | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7058.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index de9075ac4..898905603 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Writers.Markdown (
   writePlain) where
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum)
+import Data.Char (isAlphaNum, isDigit)
 import Data.Default
 import Data.List (find, intersperse, sortOn, transpose)
 import qualified Data.Map as M
@@ -987,6 +987,10 @@ inlineListToMarkdown opts lst = do
   inlist <- asks envInList
   go (if inlist then avoidBadWrapsInList lst else lst)
   where go [] = return empty
+        go (x@Math{}:y@(Str t):zs)
+          | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058
+          = liftM2 (<>) (inlineToMarkdown opts x)
+              (go (RawInline (Format "html") "<!-- -->" : y : zs))
         go (i:is) = case i of
             Link {} -> case is of
                 -- If a link is followed by another link, or '[', '(' or ':'
diff --git a/test/command/7058.md b/test/command/7058.md
new file mode 100644
index 000000000..69e5dd445
--- /dev/null
+++ b/test/command/7058.md
@@ -0,0 +1,6 @@
+```
+% pandoc -f latex -t markdown
+5\(-\)8 \(x\)
+^D
+5$-$`<!-- -->`{=html}8 $x$
+```
-- 
cgit v1.2.3


From 66959172588409becb42993bb94ea106ac3e5606 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 30 Jan 2021 18:10:22 -0800
Subject: CslJson writer:  output `[]` if no references in input,

instead of raising a PandocAppError as before.
---
 src/Text/Pandoc/Writers/CslJson.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index 13b95586b..f0defdd38 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -46,11 +46,11 @@ writeCslJson _opts (Pandoc meta _) = do
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
-  case lookupMeta "references" meta of
-    Just (MetaList rs) -> return $ (UTF8.toText $
-         toCslJson locale (mapMaybe metaValueToReference rs))
-          <> "\n"
-    _ -> throwError $ PandocAppError "No references field"
+  let rs = case lookupMeta "references" meta of
+             Just (MetaList rs) -> rs
+             _ -> []
+  return $ UTF8.toText
+           (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n"
 
 fromInlines :: [Inline] -> CslJson Text
 fromInlines = foldMap fromInline . B.fromList
-- 
cgit v1.2.3


From 9c8ff53b54836cbe7fa9417784f3204cd041f3ac Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 31 Jan 2021 14:37:47 +0100
Subject: CslJson writer: fix compiler warning

---
 src/Text/Pandoc/Writers/CslJson.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index f0defdd38..a10def95e 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -47,7 +47,7 @@ writeCslJson _opts (Pandoc meta _) = do
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
   let rs = case lookupMeta "references" meta of
-             Just (MetaList rs) -> rs
+             Just (MetaList xs) -> xs
              _ -> []
   return $ UTF8.toText
            (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n"
-- 
cgit v1.2.3


From d1875b69ec136c19d9b2336ff57e360ff7b7bef6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 31 Jan 2021 12:05:46 -0800
Subject: RST reader: fix handling of header in CSV tables.

The interpretation of this line is not affected
by the delim option. Closes #7064.
---
 src/Text/Pandoc/Readers/RST.hs |  9 +++++----
 test/command/7064.md           | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 4 deletions(-)
 create mode 100644 test/command/7064.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 707af905f..0f32d993c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -877,10 +877,11 @@ csvTableDirective top fields rawcsv = do
                     (bs, _) <- fetchItem u
                     return $ UTF8.toText bs
                   Nothing -> return rawcsv
-  let res = parseCSV opts (case explicitHeader of
-                              Just h  -> h <> "\n" <> rawcsv'
-                              Nothing -> rawcsv')
-  case res of
+  let header' = case explicitHeader of
+                  Just h  -> parseCSV defaultCSVOptions h
+                  Nothing -> Right []
+  let res = parseCSV opts rawcsv'
+  case (<>) <$> header' <*> res of
        Left e  ->
          throwError $ PandocParsecError "csv table" e
        Right rawrows -> do
diff --git a/test/command/7064.md b/test/command/7064.md
new file mode 100644
index 000000000..58b72e363
--- /dev/null
+++ b/test/command/7064.md
@@ -0,0 +1,32 @@
+```
+% pandoc -f rst -t html
+.. csv-table:: Changes
+    :header: "Version", "Date", "Description"
+    :widths: 15, 15, 70
+    :delim: $
+
+    0.1.0 $ 18/02/2013 $ Initial Release
+^D
+<table>
+<caption>Changes</caption>
+<colgroup>
+<col style="width: 15%" />
+<col style="width: 15%" />
+<col style="width: 70%" />
+</colgroup>
+<thead>
+<tr class="header">
+<th>Version</th>
+<th>Date</th>
+<th>Description</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>0.1.0</td>
+<td>18/02/2013</td>
+<td>Initial Release</td>
+</tr>
+</tbody>
+</table>
+```
-- 
cgit v1.2.3


From b239c89a82b66abc55bf7c08e37492938c817c56 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Feb 2021 08:43:26 -0800
Subject: BibTeX writer fixes.  Closes #7067.

+ Require citeproc 0.3.0.7, which correctly titlecases when titles
  contain non-ASCII characters.
+ Correctly handle 'pages' (= 'page' in CSL).
+ Correctly handle BibLaTeX 'langid' (= 'language' in CSL).
+ In BibTeX output, protect foreign titles since there's no language
  field.
---
 pandoc.cabal                       |  2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs | 21 ++++++---
 stack.yaml                         |  4 +-
 test/command/7067.md               | 90 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 9 deletions(-)
 create mode 100644 test/command/7067.md

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 0890318e3..1bdd2a6b3 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -410,7 +410,7 @@ library
                  blaze-markup          >= 0.8      && < 0.9,
                  bytestring            >= 0.9      && < 0.12,
                  case-insensitive      >= 1.2      && < 1.3,
-                 citeproc              >= 0.3.0.6  && < 0.4,
+                 citeproc              >= 0.3.0.7  && < 0.4,
                  commonmark            >= 0.1.1.3  && < 0.2,
                  commonmark-extensions >= 0.2.0.4  && < 0.3,
                  commonmark-pandoc     >= 0.2      && < 0.3,
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 5b9068378..2b43fffb6 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -115,7 +115,7 @@ writeBibtexString opts variant mblang ref =
       "motion_picture"    | variant == Biblatex -> "movie"
       "review"             | variant == Biblatex -> "review"
       _                   -> "misc"
-  
+
   mbSubtype =
     case referenceType ref of
       "article-magazine"  -> Just "magazine"
@@ -149,7 +149,7 @@ writeBibtexString opts variant mblang ref =
            , "type"
            , "entrysubtype"
            , "note"
-           , "language"
+           , "langid"
            , "abstract"
            , "keywords"
            ]
@@ -202,12 +202,19 @@ writeBibtexString opts variant mblang ref =
                    [ (", " <>) <$> nameGiven name,
                      nameDroppingParticle name ]
 
-  titlecase = case mblang of
+  mblang' = (parseLang <$> getVariableAsText "language") <|> mblang
+
+  titlecase = case mblang' of
                 Just (Lang "en" _) -> titlecase'
                 Nothing            -> titlecase'
-                _                  -> id
-
-  titlecase' = addTextCase mblang TitleCase .
+                _                  ->
+                  case variant of
+                    Bibtex         -> B.spanWith nullAttr
+                     -- BibTex lacks a language field, so we wrap non-English
+                     -- titles in {} to protect case.
+                    Biblatex       -> id
+
+  titlecase' = addTextCase mblang' TitleCase .
     (\ils -> B.fromList
                (case B.toList ils of
                   Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs
@@ -299,6 +306,8 @@ writeBibtexString opts variant mblang ref =
   getContentsFor "urldate"  = getVariable "accessed" >>= toLaTeX . valToInlines
   getContentsFor "year"  = getVariable "issued" >>= getYear
   getContentsFor "month"  = getVariable "issued" >>= getMonth
+  getContentsFor "pages"  = getVariable "page" >>= toLaTeX . valToInlines
+  getContentsFor "langid"  = getVariable "language" >>= toLaTeX . valToInlines
   getContentsFor "number" = (getVariable "number"
                          <|> getVariable "collection-number"
                          <|> getVariable "issue") >>= toLaTeX . valToInlines
diff --git a/stack.yaml b/stack.yaml
index 6c8c39fa7..dc6fa6517 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -19,10 +19,10 @@ extra-deps:
 - doctemplates-0.9
 - texmath-0.12.1
 - tasty-bench-0.1
-- citeproc-0.3.0.6
+- citeproc-0.3.0.7
 #- citeproc:
 #  git: https://github.com/jgm/citeproc.git
-#  commit: da1f9702fa70d7e1bad9a796f897e920e2d578f7
+#  commit: feb3b7580c6738eec3b23921f7c1739cfba611aa
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-16.23
diff --git a/test/command/7067.md b/test/command/7067.md
new file mode 100644
index 000000000..a66e3aa1f
--- /dev/null
+++ b/test/command/7067.md
@@ -0,0 +1,90 @@
+```
+% pandoc -t biblatex
+---
+references:
+- id: garaud
+  author:
+    - family: Garaud
+      given: Marcel
+  container-title: Bulletin de la Societé des antiquaires de l’Ouest
+  collection-title: 4
+  issued:
+    - year: 1967
+  language: fr-FR
+  page: 11-27
+  title: Recherches sur les défrichements dans la Gâtine poitevine aux XI^e^ et XII^e^ siècles
+  type: article-journal
+  volume: 9
+...
+^D
+@article{garaud,
+  author = {Garaud, Marcel},
+  title = {Recherches sur les défrichements dans la Gâtine poitevine aux
+XI\textsuperscript{e} et XII\textsuperscript{e} siècles},
+  journal = {Bulletin de la Societé des antiquaires de l’Ouest},
+  series = {4},
+  volume = {9},
+  pages = {11-27},
+  date = {1967},
+  langid = {fr-FR}
+}
+```
+
+```
+% pandoc -t bibtex
+---
+references:
+- id: garaud
+  author:
+    - family: Garaud
+      given: Marcel
+  container-title: Bulletin de la Société des antiquaires de l’Ouest
+  collection-title: 4
+  issued:
+    - year: 1967
+  language: fr-FR
+  page: 11-27
+  title: Recherches sur les défrichements dans la Gâtine poitevine aux XI^e^ et XII^e^ siècles
+  type: article-journal
+  volume: 9
+...
+^D
+@article{garaud,
+  author = {Garaud, Marcel},
+  title = {{Recherches sur les défrichements dans la Gâtine poitevine aux
+XI\textsuperscript{e} et XII\textsuperscript{e} siècles}},
+  journal = {Bulletin de la Société des antiquaires de l’Ouest},
+  series = {4},
+  volume = {9},
+  pages = {11-27},
+  year = {1967}
+}
+```
+
+This tests the titlecasing of a word with an accented second letter:
+```
+% pandoc -t bibtex
+---
+references:
+- id: garaud
+  author:
+    - family: Garaud
+      given: Marcel
+  container-title: English Journal
+  issued:
+    - year: 1967
+  language: en-US
+  title: Research on the défrichements in the Gâtine poitevine
+  type: article-journal
+  volume: 9
+...
+^D
+@article{garaud,
+  author = {Garaud, Marcel},
+  title = {Research on the Défrichements in the {Gâtine} Poitevine},
+  journal = {English Journal},
+  volume = {9},
+  year = {1967}
+}
+```
+
-- 
cgit v1.2.3


From 02d3c71e7224853ecabaa9ac4cd947ec2ac1e579 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Feb 2021 18:02:17 -0800
Subject: BibTeX writer: use doclayout and doctemplate.

This change allows bibtex/biblatex output to wrap as other
formats do, depending on the settings of `--wrap` and `--columns`.

It also introduces default templates for bibtex and biblatex,
which allow for using the variables `header-include`, `include-before`
or `include-after` (or alternatively the command line options
`--include-in-header`, `--include-before-body`, `--include-after-body`)
to insert content into the generated bibtex/biblatex.

This change requires a change in the return type of the unexported
`T.P.Citeproc.writeBibTeXString` from `Text` to `Doc Text`.

Closes #7068.
---
 data/templates/default.biblatex    | 10 ++++++++++
 pandoc.cabal                       |  2 ++
 src/Text/Pandoc/Citeproc/BibTeX.hs | 41 +++++++++++++++++++++-----------------
 src/Text/Pandoc/Templates.hs       |  2 --
 src/Text/Pandoc/Writers/BibTeX.hs  | 19 +++++++++++++++---
 test/command/7067.md               |  6 +++---
 6 files changed, 54 insertions(+), 26 deletions(-)
 create mode 100644 data/templates/default.biblatex

(limited to 'src')

diff --git a/data/templates/default.biblatex b/data/templates/default.biblatex
new file mode 100644
index 000000000..6bf2632d8
--- /dev/null
+++ b/data/templates/default.biblatex
@@ -0,0 +1,10 @@
+$for(header-includes)$
+$header-includes$
+$endfor$
+$for(include-before)$
+$include-before$
+$endfor$
+$body$
+$for(include-after)$
+$include-after$
+$endfor$
diff --git a/pandoc.cabal b/pandoc.cabal
index 1bdd2a6b3..2f8c255a8 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -56,6 +56,8 @@ data-files:
                  data/templates/default.icml
                  data/templates/default.opml
                  data/templates/default.latex
+                 data/templates/default.bibtex
+                 data/templates/default.biblatex
                  data/templates/default.context
                  data/templates/default.texinfo
                  data/templates/default.jira
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 2b43fffb6..416fe439e 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -52,6 +52,8 @@ import           Data.Char              (isAlphaNum, isDigit, isLetter,
 import           Data.List              (foldl', intercalate, intersperse)
 import           Safe                   (readMay)
 import           Text.Printf            (printf)
+import           Text.DocLayout         (literal, hsep, nest, hang, Doc(..),
+                                         braces, ($$), cr)
 
 data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
@@ -77,10 +79,11 @@ writeBibtexString :: WriterOptions       -- ^ options (for writing LaTex)
                   -> Variant             -- ^ bibtex or biblatex
                   -> Maybe Lang          -- ^ Language
                   -> Reference Inlines   -- ^ Reference to write
-                  -> Text
+                  -> Doc Text
 writeBibtexString opts variant mblang ref =
-  "@" <> bibtexType <> "{" <> unItemId (referenceId ref) <> ",\n  " <>
-  renderFields fs <> "\n}\n"
+  "@" <> bibtexType <> "{" <> literal (unItemId (referenceId ref)) <> ","
+  $$ nest 2 (renderFields fs)
+  $$ "}" <> cr
 
  where
   bibtexType =
@@ -231,10 +234,12 @@ writeBibtexString opts variant mblang ref =
   toLaTeX x =
     case runPure (writeLaTeX opts $ doc (B.plain x)) of
            Left _  -> Nothing
-           Right t -> Just t
+           Right t -> Just $ hsep . map literal $ T.words t
 
-  renderField name = (\contents -> name <> " = {" <> contents <> "}")
-                      <$> getContentsFor name
+  renderField :: Text -> Maybe (Doc Text)
+  renderField name =
+    (((literal name) <>) . hang 2 " = " . braces)
+      <$> getContentsFor name
 
   getVariable v = lookupVariable (toVariable v) ref
 
@@ -248,10 +253,10 @@ writeBibtexString opts variant mblang ref =
            Nothing ->
              case dateParts date of
                [DateParts (y1:_), DateParts (y2:_)] ->
-                 Just (T.pack (printf "%04d" y1) <> "--" <>
+                 Just $ literal (T.pack (printf "%04d" y1) <> "--" <>
                         T.pack (printf "%04d" y2))
                [DateParts (y1:_)] ->
-                 Just (T.pack (printf "%04d" y1))
+                 Just $ literal (T.pack (printf "%04d" y1))
                _ -> Nothing
        _ -> Nothing
 
@@ -274,19 +279,19 @@ writeBibtexString opts variant mblang ref =
        DateVal date ->
          case dateParts date of
            [DateParts (_:m1:_), DateParts (_:m2:_)] ->
-             Just (toMonth m1 <> "--" <> toMonth m2)
-           [DateParts (_:m1:_)] -> Just (toMonth m1)
+             Just $ literal (toMonth m1 <> "--" <> toMonth m2)
+           [DateParts (_:m1:_)] -> Just $ literal (toMonth m1)
            _ -> Nothing
        _ -> Nothing
 
-  getContentsFor :: Text -> Maybe Text
+  getContentsFor :: Text -> Maybe (Doc Text)
   getContentsFor "type" =
     getVariableAsText "genre" >>=
        \case
           "mathesis"  -> Just "mastersthesis"
           "phdthesis" -> Just "phdthesis"
           _           -> Nothing
-  getContentsFor "entrysubtype" = mbSubtype
+  getContentsFor "entrysubtype" = literal <$> mbSubtype
   getContentsFor "journal"
     | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"]
       = getVariable "container-title" >>= toLaTeX . valToInlines
@@ -314,7 +319,7 @@ writeBibtexString opts variant mblang ref =
 
   getContentsFor x = getVariable x >>=
     if isURL x
-       then Just . stringify . valToInlines
+       then Just . literal . stringify . valToInlines
        else toLaTeX .
             (if x == "title"
                 then titlecase
@@ -323,7 +328,7 @@ writeBibtexString opts variant mblang ref =
 
   isURL x = x `elem` ["url","doi","issn","isbn"]
 
-  renderFields = T.intercalate ",\n  " . mapMaybe renderField
+  renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
 defaultLang = Lang "en" (Just "US")
@@ -1038,14 +1043,14 @@ getOldDate prefix = do
   let dateparts = filter (\x -> x /= DateParts [])
                   $ map toDateParts [(year',month',day'),
                                      (endyear',endmonth',endday')]
-  literal <- if null dateparts
-                then Just <$> getRawField (prefix <> "year")
-                else return Nothing
+  literal' <- if null dateparts
+                 then Just <$> getRawField (prefix <> "year")
+                 else return Nothing
   return $
     Date { dateParts = dateparts
          , dateCirca = False
          , dateSeason = Nothing
-         , dateLiteral = literal }
+         , dateLiteral = literal' }
 
 getRawField :: Text -> Bib Text
 getRawField f = do
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 3e539bff7..e83f26329 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -81,8 +81,6 @@ getDefaultTemplate writer = do
   case format of
        "native"  -> return ""
        "csljson" -> return ""
-       "bibtex"  -> return ""
-       "biblatex" -> return ""
        "json"    -> return ""
        "docx"    -> return ""
        "fb2"     -> return ""
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
index e1cb47ca1..b9ae0c13a 100644
--- a/src/Text/Pandoc/Writers/BibTeX.hs
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -25,7 +25,11 @@ import Citeproc (parseLang)
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference)
-import Text.Pandoc.Writers.Shared (lookupMetaString)
+import Text.Pandoc.Writers.Shared (lookupMetaString, defField,
+                                    addVariablesToContext)
+import Text.DocLayout (render, vcat)
+import Text.DocTemplates (Context(..))
+import Text.Pandoc.Templates (renderTemplate)
 
 -- | Write BibTeX based on the references metadata from a Pandoc document.
 writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -43,6 +47,15 @@ writeBibTeX' variant opts (Pandoc meta _) = do
   let refs = case lookupMeta "references" meta of
                Just (MetaList xs) -> mapMaybe metaValueToReference xs
                _ -> []
-  return $ mconcat $
-    map (BibTeX.writeBibtexString opts variant mblang) refs
+  let main = vcat $ map (BibTeX.writeBibtexString opts variant mblang) refs
+  let context  = defField "body" main
+                 $ addVariablesToContext opts (mempty :: Context Text)
+  let colwidth = if writerWrapText opts == WrapAuto
+                    then Just $ writerColumns opts
+                    else Nothing
+  return $ render colwidth $
+    case writerTemplate opts of
+      Nothing  -> main
+      Just tpl -> renderTemplate tpl context
+
 
diff --git a/test/command/7067.md b/test/command/7067.md
index a66e3aa1f..34b3bb8ea 100644
--- a/test/command/7067.md
+++ b/test/command/7067.md
@@ -20,7 +20,7 @@ references:
 @article{garaud,
   author = {Garaud, Marcel},
   title = {Recherches sur les défrichements dans la Gâtine poitevine aux
-XI\textsuperscript{e} et XII\textsuperscript{e} siècles},
+    XI\textsuperscript{e} et XII\textsuperscript{e} siècles},
   journal = {Bulletin de la Societé des antiquaires de l’Ouest},
   series = {4},
   volume = {9},
@@ -51,8 +51,8 @@ references:
 ^D
 @article{garaud,
   author = {Garaud, Marcel},
-  title = {{Recherches sur les défrichements dans la Gâtine poitevine aux
-XI\textsuperscript{e} et XII\textsuperscript{e} siècles}},
+  title = {{Recherches sur les défrichements dans la Gâtine poitevine
+    aux XI\textsuperscript{e} et XII\textsuperscript{e} siècles}},
   journal = {Bulletin de la Société des antiquaires de l’Ouest},
   series = {4},
   volume = {9},
-- 
cgit v1.2.3


From ec8509295a8de19462ecd352a22b2784158e9ec6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 2 Feb 2021 17:00:03 -0800
Subject: Add parseOptionsFromArgs [API change, addition].

Exported by Text.Pandoc.App.
---
 src/Text/Pandoc/App.hs                    | 4 +++-
 src/Text/Pandoc/App/CommandLineOptions.hs | 7 ++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 437af3257..6a071ad5a 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.App (
           , Filter(..)
           , defaultOpts
           , parseOptions
+          , parseOptionsFromArgs
           , options
           , applyFilters
           ) where
@@ -47,7 +48,8 @@ import Text.Pandoc
 import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
 import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
                             IpynbOutput (..) )
-import Text.Pandoc.App.CommandLineOptions (parseOptions, options)
+import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
+                                           options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
 import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 307f28b5c..a4c510d97 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -17,6 +17,7 @@ Does a pandoc conversion based on command-line options.
 -}
 module Text.Pandoc.App.CommandLineOptions (
             parseOptions
+          , parseOptionsFromArgs
           , options
           , engines
           , lookupHighlightStyle
@@ -73,9 +74,13 @@ parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt
 parseOptions options' defaults = do
   rawArgs <- map UTF8.decodeArg <$> getArgs
   prg <- getProgName
+  parseOptionsFromArgs options' defaults prg rawArgs
 
+parseOptionsFromArgs
+  :: [OptDescr (Opt -> IO Opt)] -> Opt -> String -> [String] -> IO Opt
+parseOptionsFromArgs options' defaults prg rawArgs = do
   let (actions, args, unrecognizedOpts, errors) =
-           getOpt' Permute options' rawArgs
+           getOpt' Permute options' (map UTF8.decodeArg rawArgs)
 
   let unknownOptionErrors =
        foldr (handleUnrecognizedOption . takeWhile (/= '=')) []
-- 
cgit v1.2.3


From 61b108d52789f20fb03c4f8a74719c1d53021c91 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 27 Jan 2021 15:17:39 +0100
Subject: Lua: add module "pandoc.path"

The module allows to work with file paths in a convenient and
platform-independent manner.

Closes: #6001
Closes: #6565
---
 doc/lua-filters.md              | 172 ++++++++++++++++++++++++++++++++++++++++
 pandoc.cabal                    |   1 +
 src/Text/Pandoc/Lua/Packages.hs |   2 +
 stack.yaml                      |   4 +-
 test/Tests/Lua/Module.hs        |   2 +
 test/lua/module/pandoc-path.lua |  17 ++++
 6 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 test/lua/module/pandoc-path.lua

(limited to 'src')

diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index 787365212..33c0d27bd 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -3376,6 +3376,178 @@ methods and convenience functions.
     `comp`:
     :   Comparison function as described above.
 
+# Module pandoc.path
+
+Module for file path manipulations.
+
+## Static Fields {#pandoc.path-fields}
+
+### separator {#pandoc.path.separator}
+
+The character that separates directories.
+
+### search_path_separator {#pandoc.path.search_path_separator}
+
+The character that is used to separate the entries in the `PATH`
+environment variable.
+
+## Functions {#pandoc.path-functions}
+
+### directory (filepath) {#pandoc.path.directory}
+
+Get the directory name; move up one level.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   The filepath up to the last directory separator. (string)
+
+### filename (filepath) {#pandoc.path.filename}
+
+Get the file name.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   File name part of the input path. (string)
+
+### is_absolute (filepath) {#pandoc.path.is_absolute}
+
+Checks whether a path is absolute, i.e. not fixed to a root.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   `true` iff `filepath` is an absolute path, `false` otherwise.
+    (boolean)
+
+### is_relative (filepath) {#pandoc.path.is_relative}
+
+Checks whether a path is relative or fixed to a root.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   `true` iff `filepath` is a relative path, `false` otherwise.
+    (boolean)
+
+### join (filepaths) {#pandoc.path.join}
+
+Join path elements back together by the directory separator.
+
+Parameters:
+
+filepaths
+:   path components (list of strings)
+
+Returns:
+
+-   The joined path. (string)
+
+### make_relative (path, root[, unsafe]) {#pandoc.path.make_relative}
+
+Contract a filename, based on a relative path. Note that the
+resulting path will usually not introduce `..` paths, as the
+presence of symlinks means `../b` may not reach `a/b` if it starts
+from `a/c`. For a worked example see [this blog
+post](http://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html).
+
+Set `unsafe` to a truthy value to a allow `..` in paths.
+
+Parameters:
+
+path
+:   path to be made relative (string)
+
+root
+:   root path (string)
+
+unsafe
+:   whether to allow `..` in the result. (boolean)
+
+Returns:
+
+-   contracted filename (string)
+
+### normalize (filepath) {#pandoc.path.normalize}
+
+Normalizes a path.
+
+-   `//` outside of the drive can be made blank
+-   `/` becomes the `path.separator`
+-   `./` -\> ''
+-   an empty path becomes `.`
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   The normalized path. (string)
+
+### split (filepath) {#pandoc.path.split}
+
+Splits a path by the directory separator.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   List of all path components. (list of strings)
+
+### split_extension (filepath) {#pandoc.path.split_extension}
+
+Splits the last extension from a file path and returns the parts. The
+extension, if present, includes the leading separator; if the path has
+no extension, then the empty string is returned as the extension.
+
+Parameters:
+
+filepath
+:   path (string)
+
+Returns:
+
+-   filepath without extension (string)
+
+-   extension or empty string (string)
+
+### split_search_path (search_path) {#pandoc.path.split_search_path}
+
+Takes a string and splits it on the `search_path_separator` character.
+Blank items are ignored on Windows, and converted to `.` on Posix. On
+Windows path elements are stripped of quotes.
+
+Parameters:
+
+search_path
+:   platform-specific search path (string)
+
+Returns:
+
+-   list of directories in search path (list of strings)
+
 # Module pandoc.system
 
 Access to system information and functionality.
diff --git a/pandoc.cabal b/pandoc.cabal
index 2f8c255a8..4b808af84 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -429,6 +429,7 @@ library
                  filepath              >= 1.1      && < 1.5,
                  haddock-library       >= 1.8      && < 1.10,
                  hslua                 >= 1.1      && < 1.4,
+                 hslua-module-path     >= 0.0.1    && < 0.1.0,
                  hslua-module-system   >= 0.2      && < 0.3,
                  hslua-module-text     >= 0.2.1    && < 0.4,
                  http-client           >= 0.4.30   && < 0.8,
diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs
index 5949a1a7d..2f1c139db 100644
--- a/src/Text/Pandoc/Lua/Packages.hs
+++ b/src/Text/Pandoc/Lua/Packages.hs
@@ -17,6 +17,7 @@ import Foreign.Lua (NumResults)
 import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule)
 
 import qualified Foreign.Lua as Lua
+import qualified Foreign.Lua.Module.Path as Path
 import qualified Foreign.Lua.Module.Text as Text
 import qualified Text.Pandoc.Lua.Module.Pandoc as Pandoc
 import qualified Text.Pandoc.Lua.Module.MediaBag as MediaBag
@@ -43,6 +44,7 @@ pandocPackageSearcher pkgName =
   case pkgName of
     "pandoc"          -> pushWrappedHsFun Pandoc.pushModule
     "pandoc.mediabag" -> pushWrappedHsFun MediaBag.pushModule
+    "pandoc.path"     -> pushWrappedHsFun Path.pushModule
     "pandoc.system"   -> pushWrappedHsFun System.pushModule
     "pandoc.types"    -> pushWrappedHsFun Types.pushModule
     "pandoc.utils"    -> pushWrappedHsFun Utils.pushModule
diff --git a/stack.yaml b/stack.yaml
index dc6fa6517..da7acd8db 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -10,7 +10,9 @@ extra-deps:
 - haddock-library-1.9.0
 - skylighting-0.10.2
 - skylighting-core-0.10.2
-- hslua-1.1.2
+- hslua-1.3.0
+- hslua-module-path-0.0.1
+- hslua-module-text-0.3.0.1
 - jira-wiki-markup-1.3.2
 - HsYAML-aeson-0.2.0.0
 - commonmark-0.1.1.3
diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs
index d88633cf8..8be445f65 100644
--- a/test/Tests/Lua/Module.hs
+++ b/test/Tests/Lua/Module.hs
@@ -25,6 +25,8 @@ tests =
                   ("lua" </> "module" </> "pandoc-list.lua")
   , testPandocLua "pandoc.mediabag"
                   ("lua" </> "module" </> "pandoc-mediabag.lua")
+  , testPandocLua "pandoc.path"
+                  ("lua" </> "module" </> "pandoc-path.lua")
   , testPandocLua "pandoc.types"
                   ("lua" </> "module" </> "pandoc-types.lua")
   , testPandocLua "pandoc.util"
diff --git a/test/lua/module/pandoc-path.lua b/test/lua/module/pandoc-path.lua
new file mode 100644
index 000000000..9a5a3f6c8
--- /dev/null
+++ b/test/lua/module/pandoc-path.lua
@@ -0,0 +1,17 @@
+local tasty = require 'tasty'
+local path = require 'pandoc.path'
+
+local assert = tasty.assert
+local test = tasty.test_case
+local group = tasty.test_group
+
+return {
+  group 'path separator' {
+    test('is string', function ()
+      assert.are_same(type(path.separator), 'string')
+    end),
+    test('is slash or backslash', function ()
+      assert.is_truthy(path.separator:match '^[/\\]$')
+    end),
+  },
+}
-- 
cgit v1.2.3


From b79aba6ea1ecb6a1f619128638369ef47f64d26d Mon Sep 17 00:00:00 2001
From: Nick Berendsen <58348935+Desbeers@users.noreply.github.com>
Date: Wed, 3 Feb 2021 18:00:18 +0100
Subject: ePub writer: `belongs-to-collection` metadata (#7063)

---
 MANUAL.txt                      |  9 ++++
 src/Text/Pandoc/Writers/EPUB.hs | 99 ++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 41 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 4a82703a3..e771d3da4 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5931,6 +5931,15 @@ The following fields are recognized:
 
 `rights`
   ~ A string value.
+  
+`belongs-to-collection`
+  ~ A string value.  identifies the name of a collection to which
+    the EPUB Publication belongs.
+
+`group-position`
+  ~ The `group-position` field indicates the numeric position in which
+    the EPUB Publication belongs relative to other works belonging to
+    the same `belongs-to-collection` field.
 
 `cover-image`
   ~ A string value (path to cover image).
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 783f190f5..1f16f6772 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -74,26 +74,28 @@ data EPUBState = EPUBState {
 type E m = StateT EPUBState m
 
 data EPUBMetadata = EPUBMetadata{
-    epubIdentifier    :: [Identifier]
-  , epubTitle         :: [Title]
-  , epubDate          :: [Date]
-  , epubLanguage      :: String
-  , epubCreator       :: [Creator]
-  , epubContributor   :: [Creator]
-  , epubSubject       :: [String]
-  , epubDescription   :: Maybe String
-  , epubType          :: Maybe String
-  , epubFormat        :: Maybe String
-  , epubPublisher     :: Maybe String
-  , epubSource        :: Maybe String
-  , epubRelation      :: Maybe String
-  , epubCoverage      :: Maybe String
-  , epubRights        :: Maybe String
-  , epubCoverImage    :: Maybe String
-  , epubStylesheets   :: [FilePath]
-  , epubPageDirection :: Maybe ProgressionDirection
-  , epubIbooksFields  :: [(String, String)]
-  , epubCalibreFields :: [(String, String)]
+    epubIdentifier          :: [Identifier]
+  , epubTitle               :: [Title]
+  , epubDate                :: [Date]
+  , epubLanguage            :: String
+  , epubCreator             :: [Creator]
+  , epubContributor         :: [Creator]
+  , epubSubject             :: [String]
+  , epubDescription         :: Maybe String
+  , epubType                :: Maybe String
+  , epubFormat              :: Maybe String
+  , epubPublisher           :: Maybe String
+  , epubSource              :: Maybe String
+  , epubRelation            :: Maybe String
+  , epubCoverage            :: Maybe String
+  , epubRights              :: Maybe String
+  , epubBelongsToCollection :: Maybe String
+  , epubGroupPosition       :: Maybe String
+  , epubCoverImage          :: Maybe String
+  , epubStylesheets         :: [FilePath]
+  , epubPageDirection       :: Maybe ProgressionDirection
+  , epubIbooksFields        :: [(String, String)]
+  , epubCalibreFields       :: [(String, String)]
   } deriving Show
 
 data Date = Date{
@@ -235,6 +237,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
   | name == "relation" = md { epubRelation = Just $ strContent e }
   | name == "coverage" = md { epubCoverage = Just $ strContent e }
   | name == "rights" = md { epubRights = Just $ strContent e }
+  | name == "belongs-to-collection" = md { epubBelongsToCollection = Just $ strContent e }
+  | name == "group-position" = md { epubGroupPosition = Just $ strContent e }  
   | otherwise = md
   where getAttr n = lookupAttr (opfName n) attrs
 addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
@@ -313,26 +317,28 @@ simpleList s meta =
 
 metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
 metadataFromMeta opts meta = EPUBMetadata{
-      epubIdentifier         = identifiers
-    , epubTitle              = titles
-    , epubDate               = date
-    , epubLanguage           = language
-    , epubCreator            = creators
-    , epubContributor        = contributors
-    , epubSubject            = subjects
-    , epubDescription        = description
-    , epubType               = epubtype
-    , epubFormat             = format
-    , epubPublisher          = publisher
-    , epubSource             = source
-    , epubRelation           = relation
-    , epubCoverage           = coverage
-    , epubRights             = rights
-    , epubCoverImage         = coverImage
-    , epubStylesheets        = stylesheets
-    , epubPageDirection      = pageDirection
-    , epubIbooksFields       = ibooksFields
-    , epubCalibreFields      = calibreFields
+      epubIdentifier           = identifiers
+    , epubTitle                = titles
+    , epubDate                 = date
+    , epubLanguage             = language
+    , epubCreator              = creators
+    , epubContributor          = contributors
+    , epubSubject              = subjects
+    , epubDescription          = description
+    , epubType                 = epubtype
+    , epubFormat               = format
+    , epubPublisher            = publisher
+    , epubSource               = source
+    , epubRelation             = relation
+    , epubCoverage             = coverage
+    , epubRights               = rights
+    , epubBelongsToCollection  = belongsToCollection
+    , epubGroupPosition        = groupPosition 
+    , epubCoverImage           = coverImage
+    , epubStylesheets          = stylesheets
+    , epubPageDirection        = pageDirection
+    , epubIbooksFields         = ibooksFields
+    , epubCalibreFields        = calibreFields
     }
   where identifiers = getIdentifier meta
         titles = getTitle meta
@@ -350,6 +356,8 @@ metadataFromMeta opts meta = EPUBMetadata{
         relation = metaValueToString <$> lookupMeta "relation" meta
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
+        belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta
+        groupPosition = metaValueToString <$> lookupMeta "group-position" meta       
         coverImage =
             (TS.unpack <$> lookupContext "epub-cover-image"
                               (writerVariables opts))
@@ -931,7 +939,7 @@ metadataElement version md currentTime =
                   ++ descriptionNodes ++ typeNodes ++ formatNodes
                   ++ publisherNodes ++ sourceNodes ++ relationNodes
                   ++ coverageNodes ++ rightsNodes ++ coverImageNodes
-                  ++ modifiedNodes
+                  ++ modifiedNodes ++ belongsToCollectionNodes
         withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
                          ([1..] :: [Int]))
         identifierNodes = withIds "epub-id" toIdentifierNode $
@@ -970,6 +978,15 @@ metadataElement version md currentTime =
             $ epubCoverImage md
         modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
                showDateTimeISO8601 currentTime | version == EPUB3 ]
+        belongsToCollectionNodes = 
+            maybe []
+                (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection )
+                :
+                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ])
+                (epubBelongsToCollection md)++
+            maybe []
+                (\groupPosition -> [unode "meta" !  [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ])
+                (epubGroupPosition md)
         dcTag n s = unode ("dc:" ++ n) s
         dcTag' n s = [dcTag n s]
         toIdentifierNode id' (Identifier txt scheme)
-- 
cgit v1.2.3


From a5169f68b251e04b0a68a7d93a30bafcb3f85e78 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 4 Feb 2021 19:07:59 +0100
Subject: Lua filters: use same function names in Haskell and Lua

---
 .hlint.yaml                            |  1 +
 src/Text/Pandoc/Lua/Module/MediaBag.hs | 27 ++++++++++++++-------------
 src/Text/Pandoc/Lua/Module/Pandoc.hs   | 31 ++++++++++++++++---------------
 3 files changed, 31 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/.hlint.yaml b/.hlint.yaml
index 6b74014d4..d5ebffd34 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -60,6 +60,7 @@
       - Text.Pandoc.Citeproc
       - Text.Pandoc.Extensions
       - Text.Pandoc.Lua.Marshaling.Version
+      - Text.Pandoc.Lua.Module.Pandoc
       - Text.Pandoc.Lua.Module.Utils
       - Text.Pandoc.Readers.Odt.ContentReader
       - Text.Pandoc.Readers.Odt.Namespaces
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index 715e53885..78b699176 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -13,6 +13,7 @@ module Text.Pandoc.Lua.Module.MediaBag
   ( pushModule
   ) where
 
+import Prelude hiding (lookup)
 import Control.Monad (zipWithM_)
 import Foreign.Lua (Lua, NumResults, Optional)
 import Text.Pandoc.Class.CommonState (CommonState (..))
@@ -36,10 +37,10 @@ pushModule = do
   liftPandocLua Lua.newtable
   addFunction "delete" delete
   addFunction "empty" empty
-  addFunction "insert" insertMediaFn
+  addFunction "insert" insert
   addFunction "items" items
-  addFunction "lookup" lookupMediaFn
-  addFunction "list" mediaDirectoryFn
+  addFunction "lookup" lookup
+  addFunction "list" list
   addFunction "fetch" fetch
   return 1
 
@@ -53,11 +54,11 @@ empty :: PandocLua NumResults
 empty = 0 <$ modifyCommonState (\st -> st { stMediaBag = mempty })
 
 -- | Insert a new item into the media bag.
-insertMediaFn :: FilePath
-              -> Optional MimeType
-              -> BL.ByteString
-              -> PandocLua NumResults
-insertMediaFn fp optionalMime contents = do
+insert :: FilePath
+       -> Optional MimeType
+       -> BL.ByteString
+       -> PandocLua NumResults
+insert fp optionalMime contents = do
   mb <- getMediaBag
   setMediaBag $ MB.insertMedia fp (Lua.fromOptional optionalMime) contents mb
   return (Lua.NumResults 0)
@@ -66,9 +67,9 @@ insertMediaFn fp optionalMime contents = do
 items :: PandocLua NumResults
 items = getMediaBag >>= liftPandocLua . pushIterator
 
-lookupMediaFn :: FilePath
-              -> PandocLua NumResults
-lookupMediaFn fp = do
+lookup :: FilePath
+       -> PandocLua NumResults
+lookup fp = do
   res <- MB.lookupMedia fp <$> getMediaBag
   liftPandocLua $ case res of
     Nothing -> 1 <$ Lua.pushnil
@@ -77,8 +78,8 @@ lookupMediaFn fp = do
       Lua.push contents
       return 2
 
-mediaDirectoryFn :: PandocLua NumResults
-mediaDirectoryFn = do
+list :: PandocLua NumResults
+list = do
   dirContents <- MB.mediaDirectory <$> getMediaBag
   liftPandocLua $ do
     Lua.newtable
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index a8afecd2e..8d30f9a0c 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -14,6 +14,7 @@ module Text.Pandoc.Lua.Module.Pandoc
   ( pushModule
   ) where
 
+import Prelude hiding (read)
 import Control.Monad (when)
 import Control.Monad.Except (throwError)
 import Data.Default (Default (..))
@@ -43,10 +44,10 @@ import Text.Pandoc.Error
 pushModule :: PandocLua NumResults
 pushModule = do
   loadDefaultModule "pandoc"
-  addFunction "read" readDoc
-  addFunction "pipe" pipeFn
-  addFunction "walk_block" walkBlock
-  addFunction "walk_inline" walkInline
+  addFunction "read" read
+  addFunction "pipe" pipe
+  addFunction "walk_block" walk_block
+  addFunction "walk_inline" walk_inline
   return 1
 
 walkElement :: (Walkable (SingletonsList Inline) a,
@@ -54,14 +55,14 @@ walkElement :: (Walkable (SingletonsList Inline) a,
             => a -> LuaFilter -> PandocLua a
 walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f
 
-walkInline :: Inline -> LuaFilter -> PandocLua Inline
-walkInline = walkElement
+walk_inline :: Inline -> LuaFilter -> PandocLua Inline
+walk_inline = walkElement
 
-walkBlock :: Block -> LuaFilter -> PandocLua Block
-walkBlock = walkElement
+walk_block :: Block -> LuaFilter -> PandocLua Block
+walk_block = walkElement
 
-readDoc :: T.Text -> Optional T.Text -> PandocLua NumResults
-readDoc content formatSpecOrNil = liftPandocLua $ do
+read :: T.Text -> Optional T.Text -> PandocLua NumResults
+read content formatSpecOrNil = liftPandocLua $ do
   let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil)
   res <- Lua.liftIO . runIO $
            getReader formatSpec >>= \(rdr,es) ->
@@ -79,11 +80,11 @@ readDoc content formatSpecOrNil = liftPandocLua $ do
     Left  e      -> Lua.raiseError $ show e
 
 -- | Pipes input through a command.
-pipeFn :: String
-       -> [String]
-       -> BL.ByteString
-       -> PandocLua NumResults
-pipeFn command args input = liftPandocLua $ do
+pipe :: String           -- ^ path to executable
+     -> [String]         -- ^ list of arguments
+     -> BL.ByteString    -- ^ input passed to process via stdin
+     -> PandocLua NumResults
+pipe command args input = liftPandocLua $ do
   (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input
   case ec of
     ExitSuccess -> 1 <$ Lua.push output
-- 
cgit v1.2.3


From 8e9131db4edfda6deafdf94939b907e4dce3cbaa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 6 Feb 2021 21:52:12 -0800
Subject: Markdown reader: improved handling of mmd link attributes in
 references.

Previously they only worked for links that had titles.  Closes #7080.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 ++
 test/command/7080.md                | 8 ++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 test/command/7080.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5c3a21bb7..e46553dd8 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -334,6 +334,8 @@ referenceKey = try $ do
                     skipMany spaceChar
                     notFollowedBy' referenceTitle
                     notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
+                    notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >>
+                                     try (spnl <* keyValAttr)
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
diff --git a/test/command/7080.md b/test/command/7080.md
new file mode 100644
index 000000000..0727c1083
--- /dev/null
+++ b/test/command/7080.md
@@ -0,0 +1,8 @@
+```
+% pandoc -f markdown_mmd -t native
+![][image]
+
+[image]: image.png width=100px height=150px
+^D
+[Para [Image ("",[],[("width","100px"),("height","150px")]) [] ("image.png","")]]
+```
-- 
cgit v1.2.3


From d202f7eb77242bd2d9395b950b74fc9b22f9ae13 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 7 Feb 2021 19:02:35 +0100
Subject: Avoid unnecessary use of NoImplicitPrelude pragma (#7089)

---
 app/pandoc.hs                             | 2 --
 benchmark/benchmark-pandoc.hs             | 2 --
 benchmark/weigh-pandoc.hs                 | 2 --
 pandoc.cabal                              | 2 +-
 src/Text/Pandoc/UTF8.hs                   | 1 -
 test/Tests/Command.hs                     | 2 --
 test/Tests/Helpers.hs                     | 2 --
 test/Tests/Lua.hs                         | 2 --
 test/Tests/Old.hs                         | 2 --
 test/Tests/Readers/Creole.hs              | 2 --
 test/Tests/Readers/Docx.hs                | 2 --
 test/Tests/Readers/DokuWiki.hs            | 2 --
 test/Tests/Readers/EPUB.hs                | 2 --
 test/Tests/Readers/FB2.hs                 | 2 --
 test/Tests/Readers/HTML.hs                | 2 --
 test/Tests/Readers/JATS.hs                | 2 --
 test/Tests/Readers/Jira.hs                | 1 -
 test/Tests/Readers/LaTeX.hs               | 2 --
 test/Tests/Readers/Man.hs                 | 1 -
 test/Tests/Readers/Markdown.hs            | 2 --
 test/Tests/Readers/Muse.hs                | 2 --
 test/Tests/Readers/Odt.hs                 | 2 --
 test/Tests/Readers/Org/Block.hs           | 2 --
 test/Tests/Readers/Org/Block/CodeBlock.hs | 2 --
 test/Tests/Readers/Org/Block/Figure.hs    | 2 --
 test/Tests/Readers/Org/Block/Header.hs    | 2 --
 test/Tests/Readers/Org/Block/List.hs      | 2 --
 test/Tests/Readers/Org/Block/Table.hs     | 2 --
 test/Tests/Readers/Org/Directive.hs       | 2 --
 test/Tests/Readers/Org/Inline.hs          | 2 --
 test/Tests/Readers/Org/Inline/Citation.hs | 2 --
 test/Tests/Readers/Org/Inline/Note.hs     | 2 --
 test/Tests/Readers/Org/Inline/Smart.hs    | 2 --
 test/Tests/Readers/Org/Meta.hs            | 2 --
 test/Tests/Readers/Org/Shared.hs          | 2 --
 test/Tests/Readers/RST.hs                 | 2 --
 test/Tests/Readers/Txt2Tags.hs            | 2 --
 test/Tests/Shared.hs                      | 2 --
 test/Tests/Writers/AnnotatedTable.hs      | 1 -
 test/Tests/Writers/AsciiDoc.hs            | 2 --
 test/Tests/Writers/ConTeXt.hs             | 2 --
 test/Tests/Writers/Docbook.hs             | 2 --
 test/Tests/Writers/Docx.hs                | 2 --
 test/Tests/Writers/FB2.hs                 | 2 --
 test/Tests/Writers/HTML.hs                | 2 --
 test/Tests/Writers/JATS.hs                | 2 --
 test/Tests/Writers/LaTeX.hs               | 2 --
 test/Tests/Writers/Markdown.hs            | 2 --
 test/Tests/Writers/Ms.hs                  | 2 --
 test/Tests/Writers/Muse.hs                | 1 -
 test/Tests/Writers/Native.hs              | 2 --
 test/Tests/Writers/OOXML.hs               | 2 --
 test/Tests/Writers/Org.hs                 | 2 --
 test/Tests/Writers/Plain.hs               | 2 --
 test/Tests/Writers/Powerpoint.hs          | 2 --
 test/Tests/Writers/RST.hs                 | 2 --
 test/Tests/Writers/TEI.hs                 | 2 --
 test/test-pandoc.hs                       | 2 --
 trypandoc/trypandoc.hs                    | 2 --
 59 files changed, 1 insertion(+), 112 deletions(-)

(limited to 'src')

diff --git a/app/pandoc.hs b/app/pandoc.hs
index 162570f18..0e30f45d1 100644
--- a/app/pandoc.hs
+++ b/app/pandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Main
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
@@ -12,7 +11,6 @@ Parses command-line options and calls the appropriate readers and
 writers.
 -}
 module Main where
-import Prelude
 import qualified Control.Exception as E
 import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions)
 import Text.Pandoc.Error (handleError)
diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs
index 045573273..496732693 100644
--- a/benchmark/benchmark-pandoc.hs
+++ b/benchmark/benchmark-pandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-
 Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu>
@@ -17,7 +16,6 @@ You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 -}
-import Prelude
 import Text.Pandoc
 import Text.Pandoc.MIME
 import Control.Monad.Except (throwError, liftIO)
diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs
index ad4c83ad7..b77fa9ee9 100644
--- a/benchmark/weigh-pandoc.hs
+++ b/benchmark/weigh-pandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
@@ -11,7 +10,6 @@
 
 Benchmarks to determine resource use of readers and writers.
 -}
-import Prelude
 import Weigh
 import Text.Pandoc
 import Data.Text (Text, unpack)
diff --git a/pandoc.cabal b/pandoc.cabal
index 7696e22b3..72e7c2da5 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -400,7 +400,6 @@ common common-options
   default-language: Haskell2010
   build-depends:    base         >= 4.9 && < 5,
                     text         >= 1.1.1.0 && < 1.3
-  other-extensions: NoImplicitPrelude
   ghc-options:      -Wall -fno-warn-unused-do-bind
                     -Wincomplete-record-updates
                     -Wnoncanonical-monad-instances
@@ -409,6 +408,7 @@ common common-options
     hs-source-dirs:   prelude
     other-modules:    Prelude
     build-depends:    base-compat >= 0.9
+    other-extensions: NoImplicitPrelude
 
   if os(windows)
     cpp-options:      -D_WINDOWS
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index b583dbbdb..567f5abe5 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.UTF8
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 8118e9759..07d825f73 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Tests.Command
@@ -14,7 +13,6 @@ Run commands, and test results, defined in markdown files.
 module Tests.Command (runTest, tests)
 where
 
-import Prelude
 import Data.Algorithm.Diff
 import System.Environment.Executable (getExecutablePath)
 import qualified Data.ByteString as BS
diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs
index 813c80adb..31e727a66 100644
--- a/test/Tests/Helpers.hs
+++ b/test/Tests/Helpers.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE CPP #-}
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE FlexibleInstances    #-}
 {- |
    Module      : Tests.Helpers
@@ -22,7 +21,6 @@ module Tests.Helpers ( test
                      )
                      where
 
-import Prelude
 import Data.Algorithm.Diff
 import qualified Data.Map as M
 import Data.Text (Text, unpack)
diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs
index 1dfbbd053..31c011900 100644
--- a/test/Tests/Lua.hs
+++ b/test/Tests/Lua.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
@@ -14,7 +13,6 @@ Unit and integration tests for pandoc's Lua subsystem.
 -}
 module Tests.Lua ( runLuaTest, tests ) where
 
-import Prelude
 import Control.Monad (when)
 import System.FilePath ((</>))
 import Test.Tasty (TestTree, localOption)
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index a031700a6..17ece49fd 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE TupleSections #-}
 {- |
    Module      : Tests.Old
@@ -13,7 +12,6 @@
 -}
 module Tests.Old (tests) where
 
-import Prelude
 import Data.Algorithm.Diff
 import System.Exit
 import System.FilePath ((<.>), (</>))
diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs
index 1fc0e62d7..3320b78e8 100644
--- a/test/Tests/Readers/Creole.hs
+++ b/test/Tests/Readers/Creole.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Creole
@@ -14,7 +13,6 @@ Tests for the creole reader.
 -}
 module Tests.Readers.Creole (tests) where
 
-import Prelude
 import Data.Text (Text)
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 12007f502..263e04173 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Docx
@@ -13,7 +12,6 @@ Tests for the word docx reader.
 -}
 module Tests.Readers.Docx (tests) where
 
-import Prelude
 import Codec.Archive.Zip
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs
index d5f0c45a9..84ba86d46 100644
--- a/test/Tests/Readers/DokuWiki.hs
+++ b/test/Tests/Readers/DokuWiki.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
@@ -14,7 +13,6 @@ Tests for DokuWiki reader.
 -}
 module Tests.Readers.DokuWiki (tests) where
 
-import Prelude
 import Data.Text (Text)
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs
index 3c75dd08d..13e239911 100644
--- a/test/Tests/Readers/EPUB.hs
+++ b/test/Tests/Readers/EPUB.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Readers.EPUB
    Copyright   : © 2006-2021 John MacFarlane
@@ -12,7 +11,6 @@ Tests for the EPUB mediabag.
 -}
 module Tests.Readers.EPUB (tests) where
 
-import Prelude
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/FB2.hs b/test/Tests/Readers/FB2.hs
index fbb2e2150..42054a235 100644
--- a/test/Tests/Readers/FB2.hs
+++ b/test/Tests/Readers/FB2.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {- |
    Module      : Tests.Readers.FB2
    Copyright   : © 2018-2020 Alexander Krotov
@@ -12,7 +11,6 @@ Tests for the EPUB mediabag.
 -}
 module Tests.Readers.FB2 (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Test.Tasty.Golden (goldenVsString)
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index 578c76860..f23af2cb1 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.HTML
@@ -13,7 +12,6 @@ Tests for the HTML reader.
 -}
 module Tests.Readers.HTML (tests) where
 
-import Prelude
 import Data.Text (Text)
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs
index 3c61f602f..525499c86 100644
--- a/test/Tests/Readers/JATS.hs
+++ b/test/Tests/Readers/JATS.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.JATS
@@ -13,7 +12,6 @@ Tests for the JATS reader.
 -}
 module Tests.Readers.JATS (tests) where
 
-import Prelude
 import Data.Text (Text)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index bf78fe1fe..e170d2aaa 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 2a52ffd18..77104c853 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.LaTeX
@@ -13,7 +12,6 @@ Tests for the LaTeX reader.
 -}
 module Tests.Readers.LaTeX (tests) where
 
-import Prelude
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Text.Pandoc.UTF8 as UTF8
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index 4f3ab5a28..d36151d58 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -13,7 +13,6 @@ Tests for the Man reader.
 -}
 module Tests.Readers.Man (tests) where
 
-import Prelude
 import Data.Text (Text)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 18f909583..0930deae6 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Markdown
@@ -13,7 +12,6 @@ Tests for the Markdown reader.
 -}
 module Tests.Readers.Markdown (tests) where
 
-import Prelude
 import Data.Text (Text, unpack)
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs
index 4ec1631e0..68bdc87b4 100644
--- a/test/Tests/Readers/Muse.hs
+++ b/test/Tests/Readers/Muse.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Muse
@@ -13,7 +12,6 @@ Tests for the Muse reader.
 -}
 module Tests.Readers.Muse (tests) where
 
-import Prelude
 import Data.List (intersperse)
 import Data.Monoid (Any (..))
 import Data.Text (Text)
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index f5e427ba2..9b5ec6b9e 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Odt
@@ -14,7 +13,6 @@ Tests for the ODT reader.
 -}
 module Tests.Readers.Odt (tests) where
 
-import Prelude
 import Control.Monad (liftM)
 import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as B
diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs
index 2ce07c4bb..779563794 100644
--- a/test/Tests/Readers/Org/Block.hs
+++ b/test/Tests/Readers/Org/Block.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block
@@ -13,7 +12,6 @@ Tests parsing of org blocks.
 -}
 module Tests.Readers.Org.Block (tests) where
 
-import Prelude
 import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs
index d40c3bc1d..6b83ec6a9 100644
--- a/test/Tests/Readers/Org/Block/CodeBlock.hs
+++ b/test/Tests/Readers/Org/Block/CodeBlock.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.CodeBlock
@@ -13,7 +12,6 @@ Test parsing of org code blocks.
 -}
 module Tests.Readers.Org.Block.CodeBlock (tests) where
 
-import Prelude
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs
index 8822f5b03..eb5be1c2b 100644
--- a/test/Tests/Readers/Org/Block/Figure.hs
+++ b/test/Tests/Readers/Org/Block/Figure.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Figure
@@ -13,7 +12,6 @@ Test parsing of org figures.
 -}
 module Tests.Readers.Org.Block.Figure (tests) where
 
-import Prelude
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:))
diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs
index 887055451..1344ad79b 100644
--- a/test/Tests/Readers/Org/Block/Header.hs
+++ b/test/Tests/Readers/Org/Block/Header.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
@@ -13,7 +12,6 @@ Test parsing of org header blocks.
 -}
 module Tests.Readers.Org.Block.Header (tests) where
 
-import Prelude
 import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep, tagSpan)
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index ac03c583b..9686b5148 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Header
@@ -13,7 +12,6 @@ Test parsing of org lists.
 -}
 module Tests.Readers.Org.Block.List (tests) where
 
-import Prelude
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs
index cb38fcc12..ce18e6a5b 100644
--- a/test/Tests/Readers/Org/Block/Table.hs
+++ b/test/Tests/Readers/Org/Block/Table.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Block.Table
@@ -13,7 +12,6 @@ Test parsing of org tables.
 -}
 module Tests.Readers.Org.Block.Table (tests) where
 
-import Prelude
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs
index 00cb9762b..85d1bc088 100644
--- a/test/Tests/Readers/Org/Directive.hs
+++ b/test/Tests/Readers/Org/Directive.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Directive
@@ -13,7 +12,6 @@ Tests parsing of org directives (like @#+OPTIONS@).
 -}
 module Tests.Readers.Org.Directive (tests) where
 
-import Prelude
 import Data.Time (UTCTime (UTCTime), secondsToDiffTime)
 import Data.Time.Calendar (Day (ModifiedJulianDay))
 import Test.Tasty (TestTree, testGroup)
diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs
index 13e9fef21..111d74879 100644
--- a/test/Tests/Readers/Org/Inline.hs
+++ b/test/Tests/Readers/Org/Inline.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline
@@ -13,7 +12,6 @@ Tests parsing of org inlines.
 -}
 module Tests.Readers.Org.Inline (tests) where
 
-import Prelude
 import Data.List (intersperse)
 import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 87bb3ca75..5538f1ec8 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Citation
@@ -13,7 +12,6 @@ Test parsing of citations in org input.
 -}
 module Tests.Readers.Org.Inline.Citation (tests) where
 
-import Prelude
 import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:))
diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs
index 20157d2ae..c37133d54 100644
--- a/test/Tests/Readers/Org/Inline/Note.hs
+++ b/test/Tests/Readers/Org/Inline/Note.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Note
@@ -13,7 +12,6 @@ Test parsing of footnotes in org input.
 -}
 module Tests.Readers.Org.Inline.Note (tests) where
 
-import Prelude
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:))
diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs
index 7fde380af..db96eb2ca 100644
--- a/test/Tests/Readers/Org/Inline/Smart.hs
+++ b/test/Tests/Readers/Org/Inline/Smart.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Inline.Smart
@@ -13,7 +12,6 @@ Test smart parsing of quotes, apostrophe, etc.
 -}
 module Tests.Readers.Org.Inline.Smart (tests) where
 
-import Prelude
 import Data.Text (Text)
 import Test.Tasty (TestTree)
 import Tests.Helpers ((=?>), purely, test)
diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs
index 3c50f891b..6363d84b0 100644
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Meta
@@ -13,7 +12,6 @@ Tests parsing of org meta data (mostly lines starting with @#+@).
 -}
 module Tests.Readers.Org.Meta (tests) where
 
-import Prelude
 import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs
index 4d0848575..c584eff19 100644
--- a/test/Tests/Readers/Org/Shared.hs
+++ b/test/Tests/Readers/Org/Shared.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Org.Shared
@@ -18,7 +17,6 @@ module Tests.Readers.Org.Shared
   , tagSpan
   ) where
 
-import Prelude
 import Data.List (intersperse)
 import Data.Text (Text)
 import Tests.Helpers (ToString, purely, test)
diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs
index 68241b7f9..a12b59fc2 100644
--- a/test/Tests/Readers/RST.hs
+++ b/test/Tests/Readers/RST.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
@@ -14,7 +13,6 @@ Tests for the RST reader.
 -}
 module Tests.Readers.RST (tests) where
 
-import Prelude
 import Data.Text (Text)
 import qualified Data.Text as T
 import Test.Tasty
diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs
index 62f336690..013f29d68 100644
--- a/test/Tests/Readers/Txt2Tags.hs
+++ b/test/Tests/Readers/Txt2Tags.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Readers.Txt2Tags
@@ -14,7 +13,6 @@ Tests for the Txt2Tags reader.
 -}
 module Tests.Readers.Txt2Tags (tests) where
 
-import Prelude
 import Data.List (intersperse)
 import Data.Text (Text)
 import qualified Data.Text as T
diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs
index 72a59fec0..e415ea153 100644
--- a/test/Tests/Shared.hs
+++ b/test/Tests/Shared.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Shared
@@ -13,7 +12,6 @@ Tests for functions used in many parts of the library.
 -}
 module Tests.Shared (tests) where
 
-import Prelude
 import System.FilePath.Posix (joinPath)
 import Test.Tasty
 import Test.Tasty.HUnit (assertBool, testCase, (@?=))
diff --git a/test/Tests/Writers/AnnotatedTable.hs b/test/Tests/Writers/AnnotatedTable.hs
index 7e16cf8e0..53cca80a6 100644
--- a/test/Tests/Writers/AnnotatedTable.hs
+++ b/test/Tests/Writers/AnnotatedTable.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Tests.Writers.AnnotatedTable
diff --git a/test/Tests/Writers/AsciiDoc.hs b/test/Tests/Writers/AsciiDoc.hs
index 75f6e5e97..04655635f 100644
--- a/test/Tests/Writers/AsciiDoc.hs
+++ b/test/Tests/Writers/AsciiDoc.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.AsciiDoc (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs
index c747e5d2f..5c1c98d4e 100644
--- a/test/Tests/Writers/ConTeXt.hs
+++ b/test/Tests/Writers/ConTeXt.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.ConTeXt (tests) where
 
-import Prelude
 import Data.Text (unpack, pack)
 import Test.Tasty
 import Test.Tasty.QuickCheck
diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs
index 621c1280b..842aed7ae 100644
--- a/test/Tests/Writers/Docbook.hs
+++ b/test/Tests/Writers/Docbook.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Docbook (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 66a5c3d36..2e0f1e3fb 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -1,7 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 module Tests.Writers.Docx (tests) where
 
-import Prelude
 import Text.Pandoc
 import Test.Tasty
 import Tests.Writers.OOXML
diff --git a/test/Tests/Writers/FB2.hs b/test/Tests/Writers/FB2.hs
index 7699c58e9..2e10636fa 100644
--- a/test/Tests/Writers/FB2.hs
+++ b/test/Tests/Writers/FB2.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.FB2 (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs
index 6ff0a6e1d..328801e31 100644
--- a/test/Tests/Writers/HTML.hs
+++ b/test/Tests/Writers/HTML.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.HTML (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs
index 7d98f979b..2f501c890 100644
--- a/test/Tests/Writers/JATS.hs
+++ b/test/Tests/Writers/JATS.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.JATS (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/LaTeX.hs b/test/Tests/Writers/LaTeX.hs
index 44e23d48e..ae5879099 100644
--- a/test/Tests/Writers/LaTeX.hs
+++ b/test/Tests/Writers/LaTeX.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.LaTeX (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/Markdown.hs b/test/Tests/Writers/Markdown.hs
index 4b819de24..d4f927ebe 100644
--- a/test/Tests/Writers/Markdown.hs
+++ b/test/Tests/Writers/Markdown.hs
@@ -1,9 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
 module Tests.Writers.Markdown (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Tests.Helpers
diff --git a/test/Tests/Writers/Ms.hs b/test/Tests/Writers/Ms.hs
index d73603314..ad6849633 100644
--- a/test/Tests/Writers/Ms.hs
+++ b/test/Tests/Writers/Ms.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Ms (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs
index d0df0799f..5bddca3af 100644
--- a/test/Tests/Writers/Muse.hs
+++ b/test/Tests/Writers/Muse.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Muse (tests) where
 
diff --git a/test/Tests/Writers/Native.hs b/test/Tests/Writers/Native.hs
index 905e83b1e..d7771ca19 100644
--- a/test/Tests/Writers/Native.hs
+++ b/test/Tests/Writers/Native.hs
@@ -1,7 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 module Tests.Writers.Native (tests) where
 
-import Prelude
 import Data.Text (unpack)
 import Test.Tasty
 import Test.Tasty.QuickCheck
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index 628ea9409..376f02c55 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -1,10 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE OverloadedStrings #-}
 
 module Tests.Writers.OOXML (ooxmlTest) where
 
-import Prelude
 import Text.Pandoc
 import Test.Tasty
 import Test.Tasty.Golden.Advanced
diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs
index c99f7344d..9cbe360da 100644
--- a/test/Tests/Writers/Org.hs
+++ b/test/Tests/Writers/Org.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Org (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
diff --git a/test/Tests/Writers/Plain.hs b/test/Tests/Writers/Plain.hs
index b8d1f6693..17edc9dbd 100644
--- a/test/Tests/Writers/Plain.hs
+++ b/test/Tests/Writers/Plain.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Plain (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
diff --git a/test/Tests/Writers/Powerpoint.hs b/test/Tests/Writers/Powerpoint.hs
index be98fe0e7..87ebe990c 100644
--- a/test/Tests/Writers/Powerpoint.hs
+++ b/test/Tests/Writers/Powerpoint.hs
@@ -1,7 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 module Tests.Writers.Powerpoint (tests) where
 
-import Prelude
 import Tests.Writers.OOXML (ooxmlTest)
 import Text.Pandoc
 import Test.Tasty
diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs
index a52423fad..94745e9a2 100644
--- a/test/Tests/Writers/RST.hs
+++ b/test/Tests/Writers/RST.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.RST (tests) where
 
-import Prelude
 import Control.Monad.Identity
 import Test.Tasty
 import Test.Tasty.HUnit
diff --git a/test/Tests/Writers/TEI.hs b/test/Tests/Writers/TEI.hs
index 31e970495..fa372909f 100644
--- a/test/Tests/Writers/TEI.hs
+++ b/test/Tests/Writers/TEI.hs
@@ -1,8 +1,6 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.TEI (tests) where
 
-import Prelude
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 9973dffc8..4d9da525b 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -1,9 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# OPTIONS_GHC -Wall #-}
 
 module Main where
 
-import Prelude
 import System.Environment (getArgs)
 import qualified Control.Exception as E
 import Text.Pandoc.App (convertWithOpts, defaultOpts, options,
diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs
index 9a53aa18c..810752afa 100644
--- a/trypandoc/trypandoc.hs
+++ b/trypandoc/trypandoc.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Main
@@ -12,7 +11,6 @@
 Provides a webservice which allows to try pandoc in the browser.
 -}
 module Main where
-import Prelude
 import Network.Wai.Handler.CGI
 import Network.Wai
 import Control.Applicative ((<$>))
-- 
cgit v1.2.3


From 69b7401e31216ae52164950970cf88b778d8d198 Mon Sep 17 00:00:00 2001
From: Nils Carlson <nils@nilscarlson.se>
Date: Mon, 8 Feb 2021 17:36:58 +0000
Subject: DocBook reader: Support informalfigure (#7079)

Add support for informalfigure.
---
 src/Text/Pandoc/Readers/DocBook.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d3b2dd4d3..ada3e98ec 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -206,7 +206,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] info - A wrapper for information about a component or other block. (DocBook v5)
 [x] informalequation - A displayed mathematical equation without a title
 [x] informalexample - A displayed example without a title
-[ ] informalfigure - A untitled figure
+[x] informalfigure - An untitled figure
 [ ] informaltable - A table without a title
 [ ] initializer - The initializer for a FieldSynopsis
 [x] inlineequation - A mathematical equation or expression occurring inline
@@ -669,6 +669,7 @@ blockTags =
   , "index"
   , "info"
   , "informalexample"
+  , "informalfigure"
   , "informaltable"
   , "itemizedlist"
   , "linegroup"
@@ -855,6 +856,7 @@ parseBlock (Elem e) =
         "variablelist" -> definitionList <$> deflistitems
         "procedure" -> bulletList <$> steps
         "figure" -> getFigure e
+        "informalfigure" -> getFigure e
         "mediaobject" -> para <$> getMediaobject e
         "caption" -> skip
         "info" -> addMetadataFromElement e
-- 
cgit v1.2.3


From 5cd1c1001fbfc768fdb146f14ae056a9a7ba94eb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 09:15:43 -0800
Subject: ODT reader: give more information if zip can't be unpacked.

---
 src/Text/Pandoc/Readers/Odt.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 7b8bfd4b5..9b66b60ec 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -23,6 +23,8 @@ import System.FilePath
 
 import Control.Monad.Except (throwError)
 
+import qualified Data.Text as T
+
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -60,7 +62,8 @@ readOdt' _ bytes = bytesToOdt bytes-- of
 bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
 bytesToOdt bytes = case toArchiveOrFail bytes of
   Right archive -> archiveToOdt archive
-  Left _        -> Left $ PandocParseError "Couldn't parse odt file."
+  Left err      -> Left $ PandocParseError
+                        $ "Could not unzip ODT: " <> T.pack err
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-- 
cgit v1.2.3


From f70795dc5e431c0132acc8bb2d20bb5dec942de7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 09:39:29 -0800
Subject: ODT reader: finer-grained errors on parse failure.

See #7091.
---
 src/Text/Pandoc/Readers/Odt.hs | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 9b66b60ec..9943d3147 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt
@@ -67,29 +66,27 @@ bytesToOdt bytes = case toArchiveOrFail bytes of
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive
-  | Just contentEntry      <- findEntryByPath "content.xml" archive
-  , Just stylesEntry       <- findEntryByPath "styles.xml"  archive
-  , Just contentElem       <- entryToXmlElem contentEntry
-  , Just stylesElem        <- entryToXmlElem stylesEntry
-  , Right styles           <- chooseMax (readStylesAt stylesElem )
-                                        (readStylesAt contentElem)
-  , media                  <- filteredFilesFromArchive archive filePathIsOdtMedia
-  , startState             <- readerState styles media
-  , Right pandocWithMedia  <- runConverter' read_body
-                                            startState
-                                            contentElem
-
-  = Right pandocWithMedia
-
-  | otherwise
-    -- Not very detailed, but I don't think more information would be helpful
-  = Left $ PandocParseError "Couldn't parse odt file."
-    where
-      filePathIsOdtMedia :: FilePath -> Bool
+archiveToOdt archive = either (Left. PandocParseError) Right $ do
+  let onFailure msg Nothing = Left msg
+      onFailure _   (Just x) = Right x
+  contentEntry <- onFailure "Could not find content.xml"
+                   (findEntryByPath "content.xml" archive)
+  stylesEntry <- onFailure "Could not find styles.xml"
+                   (findEntryByPath "styles.xml" archive)
+  contentElem <- onFailure "Could not find content element"
+                   (entryToXmlElem contentEntry)
+  stylesElem <- onFailure "Could not find styles element"
+                   (entryToXmlElem stylesEntry)
+  styles <- either (\_ -> Left "Could not read styles") Right
+               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
+  let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
         let (dir, name) = splitFileName fp
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
+  let media = filteredFilesFromArchive archive filePathIsOdtMedia
+  let startState = readerState styles media
+  either (\_ -> Left "Could not convert opendocument") Right
+    (runConverter' read_body startState contentElem)
 
 
 --
-- 
cgit v1.2.3


From 8ca191604dcd13af27c11d2da225da646ebce6fc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 23:35:19 -0800
Subject: Add new unexported module T.P.XMLParser.

This exports functions that uses xml-conduit's parser to
produce an xml-light Element or [Content].  This allows
existing pandoc code to use a better parser without
much modification.

The new parser is used in all places where xml-light's
parser was previously used.  Benchmarks show a significant
performance improvement in parsing XML-based formats
(especially ODT and FB2).

Note that the xml-light types use String, so the
conversion from xml-conduit types involves a lot
of extra allocation.  It would be desirable to
avoid that in the future by gradually switching
to using xml-conduit directly. This can be done
module by module.

The new parser also reports errors, which we report
when possible.

A new constructor PandocXMLError has been added to
PandocError in T.P.Error [API change].

Closes #7091, which was the main stimulus.

These changes revealed the need for some changes
in the tests.  The docbook-reader.docbook test
lacked definitions for the entities it used; these
have been added. And the docx golden tests have been
updated, because the new parser does not preserve
the order of attributes.

Add entity defs to docbook-reader.docbook.

Update golden tests for docx.
---
 MANUAL.txt                                         |   1 +
 pandoc.cabal                                       |   2 +
 src/Text/Pandoc/Error.hs                           |   3 +
 src/Text/Pandoc/ImageSize.hs                       |   5 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  52 ++++++++++------
 src/Text/Pandoc/Readers/Docx/Parse.hs              |  21 ++++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  28 +++++----
 src/Text/Pandoc/Readers/EPUB.hs                    |  17 ++++--
 src/Text/Pandoc/Readers/FB2.hs                     |  10 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |   9 ++-
 src/Text/Pandoc/Readers/OPML.hs                    |  10 +++-
 src/Text/Pandoc/Readers/Odt.hs                     |  24 ++++----
 src/Text/Pandoc/Writers/EPUB.hs                    |  13 +++-
 src/Text/Pandoc/Writers/FB2.hs                     |  11 +++-
 src/Text/Pandoc/Writers/ODT.hs                     |  29 +++++----
 src/Text/Pandoc/Writers/OOXML.hs                   |   9 +--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       |   4 +-
 src/Text/Pandoc/XMLParser.hs                       |  66 +++++++++++++++++++++
 test/Tests/Readers/JATS.hs                         |   1 +
 test/command/5321.md                               |   4 +-
 test/docbook-reader.docbook                        |   7 ++-
 test/docx/golden/block_quotes.docx                 | Bin 10092 -> 10098 bytes
 test/docx/golden/codeblock.docx                    | Bin 9944 -> 9950 bytes
 test/docx/golden/comments.docx                     | Bin 10279 -> 10285 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10042 -> 10048 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10666 -> 10673 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12434 bytes
 test/docx/golden/definition_list.docx              | Bin 9941 -> 9947 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9947 -> 9953 bytes
 test/docx/golden/document-properties.docx          | Bin 10423 -> 10429 bytes
 test/docx/golden/headers.docx                      | Bin 10080 -> 10086 bytes
 test/docx/golden/image.docx                        | Bin 26758 -> 26764 bytes
 test/docx/golden/inline_code.docx                  | Bin 9880 -> 9886 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10060 -> 10066 bytes
 test/docx/golden/inline_images.docx                | Bin 26816 -> 26822 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10101 -> 10107 bytes
 test/docx/golden/links.docx                        | Bin 10276 -> 10282 bytes
 test/docx/golden/lists.docx                        | Bin 10352 -> 10358 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10143 -> 10149 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10232 -> 10238 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10144 -> 10150 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10239 -> 10245 bytes
 test/docx/golden/notes.docx                        | Bin 10046 -> 10052 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9980 -> 9986 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10115 -> 10121 bytes
 test/docx/golden/table_one_row.docx                | Bin 9932 -> 9938 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10249 -> 10255 bytes
 test/docx/golden/tables.docx                       | Bin 10266 -> 10272 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9924 -> 9930 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9907 -> 9913 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9941 -> 9947 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10053 -> 10059 bytes
 test/docx/golden/unicode.docx                      | Bin 9865 -> 9871 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9913 -> 9919 bytes
 test/jats-reader.native                            |   2 +-
 test/jats-reader.xml                               |   1 +
 test/pptx/code-custom.pptx                         | Bin 28230 -> 28221 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395524 -> 395516 bytes
 test/pptx/code.pptx                                | Bin 28229 -> 28220 bytes
 test/pptx/code_templated.pptx                      | Bin 395522 -> 395514 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27012 -> 27004 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394298 -> 394288 bytes
 test/pptx/document-properties.pptx                 | Bin 27417 -> 27408 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394701 -> 394691 bytes
 test/pptx/endnotes.pptx                            | Bin 26969 -> 26962 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394262 -> 394253 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27892 -> 27789 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395186 -> 395083 bytes
 test/pptx/images.pptx                              | Bin 44626 -> 44619 bytes
 test/pptx/images_templated.pptx                    | Bin 411916 -> 411909 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26156 -> 26148 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393447 -> 393438 bytes
 test/pptx/lists.pptx                               | Bin 27056 -> 27049 bytes
 test/pptx/lists_templated.pptx                     | Bin 394349 -> 394340 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26948 -> 26940 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394240 -> 394231 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44073 -> 44065 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411359 -> 411352 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28582 -> 28575 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27751 -> 27744 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395045 -> 395038 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395875 -> 395868 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29539 -> 29532 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396833 -> 396826 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35444 -> 35436 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31683 -> 31675 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398964 -> 398955 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30700 -> 30691 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397988 -> 397979 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51612 -> 51604 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418903 -> 418896 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402736 -> 402728 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27031 -> 27023 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394323 -> 394314 bytes
 test/pptx/tables.pptx                              | Bin 27573 -> 27566 bytes
 test/pptx/tables_templated.pptx                    | Bin 394868 -> 394859 bytes
 test/pptx/two_column.pptx                          | Bin 26075 -> 26065 bytes
 test/pptx/two_column_templated.pptx                | Bin 393366 -> 393355 bytes
 98 files changed, 238 insertions(+), 91 deletions(-)
 create mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 5b90d039a..dc3b4ca77 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings:
    24 PandocCiteprocError
    31 PandocEpubSubdirectoryError
    43 PandocPDFError
+   44 PandocXMLError
    47 PandocPDFProgramNotFoundError
    61 PandocHttpError
    62 PandocShouldNeverHappenError
diff --git a/pandoc.cabal b/pandoc.cabal
index 72e7c2da5..e56456c68 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -493,6 +493,7 @@ library
                  unicode-transforms    >= 0.3      && < 0.4,
                  unordered-containers  >= 0.2      && < 0.3,
                  xml                   >= 1.3.12   && < 1.4,
+                 xml-conduit           >= 1.7      && < 1.10,
                  zip-archive           >= 0.2.3.4  && < 0.5,
                  zlib                  >= 0.5      && < 0.7
   if os(windows) && arch(i386)
@@ -686,6 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
+                   Text.Pandoc.XMLParser,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 204cf15ca..831405f42 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError
                  | PandocFailOnWarningError
                  | PandocPDFProgramNotFoundError Text
                  | PandocPDFError Text
+                 | PandocXMLError Text Text
                  | PandocFilterError Text Text
                  | PandocLuaError Text
                  | PandocCouldNotFindDataFileError Text
@@ -103,6 +104,8 @@ handleError (Left e) =
     PandocPDFProgramNotFoundError pdfprog -> err 47 $
         pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
     PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <>
+        (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
     PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
         filtername <> ":\n" <> msg
     PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e19958f6a..e0a1af8e8 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -45,7 +45,9 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
+import Text.Pandoc.XMLParser (parseXMLElement)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Control.Applicative
 import qualified Data.Attoparsec.ByteString.Char8 as A
@@ -327,7 +329,8 @@ getSize img =
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
-  doc <- Xml.parseXMLDoc $ UTF8.toString img
+  doc <- either (const mzero) return $ parseXMLElement
+                                     $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
         vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
         [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ada3e98ec..ad0108843 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, toUpper, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
@@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 {-
 
@@ -537,22 +541,25 @@ instance Default DBState where
 
 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)
 
 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@@ -892,7 +899,11 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          codeBlockWithLang = do
@@ -964,7 +975,7 @@ parseBlock (Elem e) =
                                      cs -> map toAlignment cs
                       let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                    || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                         Just d  -> case attrValue "table-width" d of
                                                      "" -> 1.0
                                                      w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1165,12 +1176,15 @@ parseInline (Elem e) =
         "title" -> return mempty
         "affiliation" -> skip
         -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          innerInlines f = extractSpaces f . mconcat <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..056dab6c2 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
 import Text.XML.Light
 import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do
     Right doc -> Right (Docx doc, stateWarnings st)
     Left e    -> Left e
 
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
   entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
   let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
   rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                        Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   docPath <- asks envDocXmlPath
   entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
   let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
@@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
   let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
@@ -415,7 +422,7 @@ archiveToNotes zf =
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
       cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
   in
@@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+  , Just relElems <- parseXMLFromEntry entry =
   mapMaybe (relElemToRelationship relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
@@ -527,7 +534,7 @@ archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
     Nothing -> Just $ Numbering [] [] []
     Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
           numElems = findChildrenByName namespaces "w" "num" numberingElem
           absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..edade8654 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -53,6 +53,7 @@ import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                            , rParentStyle = Nothing
                            }
 
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)
 
 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..369c4f0c9 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB
   (readEPUB)
   where
 
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                           toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
@@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type Items = M.Map String (FilePath, MimeType)
 
@@ -181,7 +182,7 @@ renameMeta s         = T.pack s
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
   metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
   let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
@@ -190,7 +191,7 @@ getManifest archive = do
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest
 
 -- Fixup
 
@@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
   mkE ("No entry on path: " ++ path) $ findEntryByPath path a
 
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
 findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..b804eab4f 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -32,6 +32,7 @@ import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -42,6 +43,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type FB2 m = StateT FB2State m
 
@@ -64,10 +66,10 @@ instance HasMeta FB2State where
 
 readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
       let authors = if null $ fb2Authors st
                     then id
                     else setMeta "author" (map text $ reverse $ fb2Authors st)
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..dfd343b7a 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
 import Data.Char (isDigit, isSpace, toUpper)
 import Data.Default
 import Data.Generics
@@ -22,6 +24,7 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -29,6 +32,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -51,8 +55,9 @@ instance Default JATSState where
 
 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  tree <- either (throwError . PandocXMLError "")
+                 (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict $ crFilter inp)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5b8996025..bdadc4dd9 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -19,14 +19,18 @@ import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
+import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
 
@@ -49,8 +53,10 @@ instance Default OPMLState where
 readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readOPML opts inp  = do
   (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                 def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
     setAuthors (opmlDocAuthors st') $
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 9943d3147..85308deb1 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
 import qualified Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 import qualified Data.ByteString.Lazy as B
 
@@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive = either (Left. PandocParseError) Right $ do
-  let onFailure msg Nothing = Left msg
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
       onFailure _   (Just x) = Right x
   contentEntry <- onFailure "Could not find content.xml"
                    (findEntryByPath "content.xml" archive)
   stylesEntry <- onFailure "Could not find styles.xml"
                    (findEntryByPath "styles.xml" archive)
-  contentElem <- onFailure "Could not find content element"
-                   (entryToXmlElem contentEntry)
-  stylesElem <- onFailure "Could not find styles element"
-                   (entryToXmlElem stylesEntry)
-  styles <- either (\_ -> Left "Could not read styles") Right
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
                (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
   let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
@@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
   let media = filteredFilesFromArchive archive filePathIsOdtMedia
   let startState = readerState styles media
-  either (\_ -> Left "Could not convert opendocument") Right
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
     (runConverter' read_body startState contentElem)
 
 
 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f16f6772..e99fa2567 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
 import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems, parseXML,
+                       add_attrs, lookupAttr, node, onlyElems,
                        ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XMLParser (parseXMLContents)
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -160,7 +161,12 @@ mkEntry path content = do
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
-  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+  elts <- case writerEpubMetadata opts of
+            Nothing -> return []
+            Just t -> case parseXMLContents (TL.fromStrict t) of
+                          Left msg -> throwError $
+                            PandocXMLError "epub metadata" msg
+                          Right ns -> return (onlyElems ns)
   let md' = foldr addMetadataFromXML md elts
   let addIdentifier m =
        if null (epubIdentifier m)
@@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do
                                             : case subs of
                                                  []    -> []
                                                  (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-          where titElements = parseXML titRendered
+          where titElements = either (const []) id $
+                                parseXMLContents (TL.fromStrict titRendered)
                 titRendered = case P.runPure
                                (writeHtmlStringForEPUB version
                                  opts{ writerTemplate = Nothing
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 25b1f28d1..9334d6e9a 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see:
 module Text.Pandoc.Writers.FB2 (writeFB2)  where
 
 import Control.Monad (zipWithM)
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
@@ -27,16 +27,18 @@ import Data.Either (lefts, rights)
 import Data.List (intercalate)
 import Data.Text (Text, pack)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
 import Text.XML.Light
 import qualified Text.XML.Light as X
 import qualified Text.XML.Light.Cursor as XC
-import qualified Text.XML.Light.Input as XI
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
@@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
                              map (el "p" . el "code" . T.unpack) . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
-    then return $ XI.parseXML str
+    then
+      case parseXMLContents (TL.fromStrict str) of
+         Left msg  -> throwError $ PandocXMLError "" msg
+         Right nds -> return nds
     else return []
 blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 05dfad5eb..a32ff618c 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Codec.Archive.Zip
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
 import Data.Generics (everywhere', mkT)
@@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
@@ -35,10 +36,11 @@ import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
                                    fixDisplayMath)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.TeXMath
 import Text.XML.Light
 
@@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
 updateStyleWithLang Nothing arch = return arch
 updateStyleWithLang (Just lang) arch = do
   epochtime <- floor `fmap` lift P.getPOSIXTime
-  return arch{ zEntries = [if eRelativePath e == "styles.xml"
-                              then case parseXMLDoc
-                                      (toStringLazy (fromEntry e)) of
-                                      Nothing -> e
-                                      Just d ->
-                                        toEntry "styles.xml" epochtime
-                                        ( fromStringLazy
-                                        . ppTopElement
-                                        . addLang lang $ d )
-                              else e
-                            | e <- zEntries arch] }
+  entries <- mapM (\e -> if eRelativePath e == "styles.xml"
+                            then case parseXMLElement
+                                    (toTextLazy (fromEntry e)) of
+                                    Left msg -> throwError $
+                                        PandocXMLError "styles.xml" msg
+                                    Right d -> return $
+                                      toEntry "styles.xml" epochtime
+                                      ( fromStringLazy
+                                      . ppTopElement
+                                      . addLang lang $ d )
+                            else return e) (zEntries arch)
+  return arch{ zEntries = entries }
 
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 3ac007f4e..8f60e70d5 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -35,6 +35,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 mknode :: Node t => String -> [(String,String)] -> t -> Element
 mknode s attrs =
@@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath =
          findEntryByPath relpath distArchive of
             Nothing -> throwError $ PandocSomeError $
                         T.pack relpath <> " missing in reference file"
-            Just e  -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
-                       Nothing -> throwError $ PandocSomeError $
-                                   T.pack relpath <> " corrupt in reference file"
-                       Just d  -> return d
+            Just e  -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of
+                       Left msg ->
+                         throwError $ PandocXMLError (T.pack relpath) msg
+                       Right d  -> return d
 
 -- Copied from Util
 
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 8554db622..cd092969b 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
 getPresentationSize refArchive distArchive = do
   entry <- findEntryByPath "ppt/presentation.xml" refArchive  `mplus`
            findEntryByPath "ppt/presentation.xml" distArchive
-  presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+  presElement <- either (const Nothing) return $
+                   parseXMLElement $ UTF8.toTextLazy $ fromEntry entry
   let ns = elemToNameSpaces presElement
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
new file mode 100644
index 000000000..8ad22a66a
--- /dev/null
+++ b/src/Text/Pandoc/XMLParser.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XMLParser
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Bridge to allow using xml-conduit's parser with xml-light's types.
+-}
+module Text.Pandoc.XMLParser
+  ( parseXMLElement
+  , parseXMLContents
+  , module Text.XML.Light.Types
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import qualified Text.XML.Light as Light
+import Text.XML.Light.Types
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe)
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Light.Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Light.Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing | "xmlns:" `T.isPrefixOf` localName ->
+           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
+                       (Just "xmlns")
+      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
+                       (T.unpack <$> mbpref)
+
+nodeToContent :: Conduit.Node -> Maybe Light.Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Light.Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
+nodeToContent _ = Nothing
+
diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs
index 525499c86..a9c9a0586 100644
--- a/test/Tests/Readers/JATS.hs
+++ b/test/Tests/Readers/JATS.hs
@@ -88,6 +88,7 @@ tests = [ testGroup "inline code"
             "<p>\n\
             \  <inline-formula><alternatives>\n\
             \  <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
+            \  </alternatives></inline-formula>\n\
             \</p>"
             =?> para (math "\\sigma|_{\\{x\\}}")
           , test jats "math ml only" $
diff --git a/test/command/5321.md b/test/command/5321.md
index 081abe2a0..83404632a 100644
--- a/test/command/5321.md
+++ b/test/command/5321.md
@@ -4,7 +4,7 @@
   <caption>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]]
@@ -17,7 +17,7 @@
     <title>foo</title>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]]
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 02568d8de..5717d78d0 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -1,6 +1,11 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
-                  "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"
+[
+<!ENTITY GHC "GHC" >
+<!ENTITY let "LET" >
+<!ENTITY case "CASE" >
+]>
 <article>
   <articleinfo>
     <title>Pandoc Test Suite</title>
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index 3e1bf16e7..d3b16d0f2 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 66f055063..6293ef493 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index fb3a02a0a..4205a1516 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index bc6c2702a..adb3f23db 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 8c555a5bd..92c8137fe 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 5f96cc911..f53470617 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index c21b3a5b3..d6af90a72 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 92ce144e9..e18dbe853 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index d21b67309..820299043 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index 3558a47bf..ae0f41d12 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 606df92a3..94cd35dfa 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 759269cac..879f2a25b 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index c37777080..93f86478f 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 9450b1a73..967d297f2 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 6f0b830e6..c5614e2fa 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index e53889cfb..0f39a831f 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 5dbe298b7..07046f223 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 194181288..3656618e6 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 6e0b634f7..8798253d5 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 477178e77..0a24d1840 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 51110356e..52bb7a217 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index b6206cdf5..182c06c64 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 07b576080..7b69a56a3 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index d46095eb7..3d3a35701 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 7caba4e93..5ae37b406 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 6aaa6da61..c29aa6716 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 5746c5ad0..664493246 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 5f22dccc6..b6d15340e 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index ab5c4f56d..f8e1092d2 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 085f33162..b4cda82f2 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 1ac86d5c8..ee222efa0 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c2c443b19..c6f8d9c96 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 5ea18d32e..ea8146690 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/jats-reader.native b/test/jats-reader.native
index ab77dd1a0..0715ea8cc 100644
--- a/test/jats-reader.native
+++ b/test/jats-reader.native
@@ -1,4 +1,4 @@
-Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
+Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
 [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
diff --git a/test/jats-reader.xml b/test/jats-reader.xml
index f75b3e95a..f33cb9ab3 100644
--- a/test/jats-reader.xml
+++ b/test/jats-reader.xml
@@ -20,6 +20,7 @@
       <surname>MacFarlane</surname>
       <given-names>John</given-names>
     </name>
+  </contrib>
   <contrib contrib-type="author">
     <name>
       <surname>Anonymous</surname>
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index aa9b7692a..58070eb3f 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index 9aaef4cb5..db9b7e371 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index 1737ec757..c7b1ed7d5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 87fb560ef..6944d92bf 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index 961c31020..ae0d28429 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 894738ef7..37c74c69a 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 188e8d826..324e443a1 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index 253e8c0a7..c81b983e3 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index e230420d2..30ce33db6 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index 49384fd65..d6c604968 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index cdf1be4ad..000e17ecd 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index c4fcbad45..fdcd2e29b 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index 4a13b5b7f..e73126376 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index 7a6e9700e..e3f968e9e 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index 926c8ff3f..eadb9372e 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 16f48e182..8ca6bab2b 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index f47b17a74..ae188ee68 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 88109a95e..60301fa50 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 84020708f..17124a50d 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index a2f77e945..19ae7dd4e 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index 48bf7bc8a..b650b7585 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 23b134a5f..0ab029614 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index d6eebeffb..2a6e35080 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a6c76a187..a7bcf6a4b 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 1fbde815b..21b018c25 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index cb3af4aa1..4ec4772a4 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index dff386885..5983657b6 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index 43b125f5e..dd54c7082 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index 3314a1c65..b3e5ed5b9 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 27a136838..1078854bb 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 7aa3b6a87..5116c6c4e 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index d43709ca7..0c8e49bd9 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 793ea10f6..68695939d 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 2f4d3b820..7ed9b946d 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 94a221398..79fc82345 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 22040c88c..9f943c279 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index 18477380b..ac72d8ced 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 4b9d0ba4d..15c7b5469 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 1c5b54185..926c5e699 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index 1314f4de4..a37e72d2c 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 9018be36e..7f86533fe 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 35e93af67..89e3db0ab 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From acc9afaf6f6afa14cbe7cd06798275728e78a529 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 11 Feb 2021 09:16:25 -0800
Subject: Correctly parse "raw" date value in markdown references metadata.

See jgm/citeproc#53.
---
 src/Text/Pandoc/Citeproc/MetaValue.hs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs
index f5a49f49e..b43ca7314 100644
--- a/src/Text/Pandoc/Citeproc/MetaValue.hs
+++ b/src/Text/Pandoc/Citeproc/MetaValue.hs
@@ -135,12 +135,13 @@ metaValueToVal k v
       MetaMap  _   -> TextVal mempty
 
 metaValueToDate :: MetaValue -> Date
-metaValueToDate (MetaMap m) =
-  Date
+metaValueToDate (MetaMap m) = fromMaybe
+  (Date
    { dateParts = dateparts
    , dateCirca = circa
    , dateSeason = season
-   , dateLiteral = literal }
+   , dateLiteral = literal })
+  rawdate
  where
   dateparts = case M.lookup "date-parts" m of
                 Just (MetaList xs) ->
@@ -152,6 +153,7 @@ metaValueToDate (MetaMap m) =
             M.lookup "circa" m >>= metaValueToBool
   season = M.lookup "season" m >>= metaValueToInt
   literal = M.lookup "literal" m >>= metaValueToText
+  rawdate = M.lookup "raw" m >>= metaValueToText >>= rawDateEDTF
 metaValueToDate (MetaList xs) =
   Date{ dateParts = mapMaybe metaValueToDateParts xs
       , dateCirca = False
-- 
cgit v1.2.3


From 3c4a58bad03ef56ae9c82b7e7a6ae027514e2bd6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 10 Feb 2021 23:01:08 -0800
Subject: T.P.Class: Add getTimestamp [API change].

This attempts to read the SOURCE_DATE_EPOCH environment variable
and parse a UTC time from it (treating it as a unix date stamp,
see https://reproducible-builds.org/specs/source-date-epoch/).
If the variable is not set or can't be parsed as a unix date
stamp, then the function returns the current date.
---
 src/Text/Pandoc/Class/PandocMonad.hs | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 374da161b..86c8de79e 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -51,6 +51,7 @@ module Text.Pandoc.Class.PandocMonad
   , setTranslations
   , translateTerm
   , makeCanonical
+  , getTimestamp
   ) where
 
 import Codec.Archive.Zip
@@ -59,7 +60,8 @@ import Control.Monad.Except (MonadError (catchError, throwError),
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Data.Maybe (fromMaybe)
 import Data.Time (UTCTime)
-import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds)
+import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
+                             posixSecondsToUTCTime)
 import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
 import Network.URI ( escapeURIString, nonStrictRelativeTo,
                      unEscapeString, parseURIReference, isAllowedInURI,
@@ -74,7 +76,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
 import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import Text.Pandoc.Shared (uriPathToPath)
+import Text.Pandoc.Shared (uriPathToPath, safeRead)
 import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
                                  readTranslations)
 import Text.Pandoc.Walk (walkM)
@@ -175,6 +177,21 @@ report msg = do
   when (level <= verbosity) $ logOutput msg
   modifyCommonState $ \st -> st{ stLog = msg : stLog st }
 
+-- | Get the time from the @SOURCE_DATE_EPOCH@
+-- environment variable. The variable should contain a
+-- unix time stamp, the number of seconds since midnight Jan 01
+-- 1970 UTC.  If the variable is not set or cannot be
+-- parsed as a unix time stamp, the current time is returned.
+-- This function is designed to make possible reproducible
+-- builds in formats that include a creation timestamp.
+getTimestamp :: PandocMonad m => m UTCTime
+getTimestamp = do
+  mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH"
+  case mbSourceDateEpoch >>= safeRead of
+    Just (epoch :: Integer) ->
+      return $ posixSecondsToUTCTime $ fromIntegral epoch
+    Nothing -> getCurrentTime
+
 -- | Determine whether tracing is enabled.  This affects
 -- the behavior of 'trace'.  If tracing is not enabled,
 -- 'trace' does nothing.
-- 
cgit v1.2.3


From 390d5e65b2d66078b2f9b9db142dbe2167ea29a9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 10 Feb 2021 23:13:33 -0800
Subject: Use getTimestamp instead of getCurrentTime in writers.

Setting SOURCE_DATE_EPOCH will allow reproducible builds.

Partially addresses #7093.  This does not suffice to fully enable
reproducible in EPUB, since a unique id is being generated for each
build.
---
 src/Text/Pandoc/Readers/RST.hs               | 4 ++--
 src/Text/Pandoc/Writers/Docx.hs              | 2 +-
 src/Text/Pandoc/Writers/EPUB.hs              | 4 ++--
 src/Text/Pandoc/Writers/ODT.hs               | 2 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 0f32d993c..29f81b046 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -28,7 +28,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getCurrentTime)
+                                      readFileFromDirs, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -685,7 +685,7 @@ directive' = do
         "replace" -> B.para <$>  -- consumed by substKey
                    parseInlineFromText (trim top)
         "date" -> B.para <$> do  -- consumed by substKey
-                     t <- getCurrentTime
+                     t <- getTimestamp
                      let format = case T.unpack (T.strip top) of
                                     [] -> "%Y-%m-%d"
                                     x  -> x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 8f498775d..da990e4d3 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -217,7 +217,7 @@ writeDocx opts doc = do
   let doc' = Pandoc meta blocks'
 
   username <- P.lookupEnv "USERNAME"
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   oldUserDataDir <- P.getUserDataDir
   P.setUserDataDir Nothing
   res <- P.readDefaultDataFile "reference.docx"
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index e99fa2567..171ffe582 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -191,7 +191,7 @@ getEPUBMetadata opts meta = do
   let fixDate m =
        if null (epubDate m)
           then do
-            currentTime <- lift P.getCurrentTime
+            currentTime <- lift P.getTimestamp
             return $ m{ epubDate = [ Date{
                              dateText = showDateTimeISO8601 currentTime
                            , dateEvent = Nothing } ] }
@@ -709,7 +709,7 @@ pandocToEPUB version opts doc = do
   uuid <- case epubIdentifier metadata of
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
-  currentTime <- lift P.getCurrentTime
+  currentTime <- lift P.getTimestamp
   let contentsData = UTF8.fromStringLazy $ ppTopElement $
         unode "package" !
           ([("version", case version of
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index a32ff618c..06369b4db 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -68,7 +68,7 @@ pandocToODT :: PandocMonad m
 pandocToODT opts doc@(Pandoc meta _) = do
   let title = docTitle meta
   let authors = docAuthors meta
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
   lang <- toLang (getLang opts meta)
   refArchive <-
        case writerReferenceDoc opts of
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index cd092969b..0a7060895 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -312,7 +312,7 @@ presentationToArchive opts pres = do
                      Nothing -> toArchive . BL.fromStrict <$>
                         P.readDataFile "reference.pptx"
 
-  utctime <- P.getCurrentTime
+  utctime <- P.getTimestamp
 
   presSize <- case getPresentationSize refArchive distArchive of
                 Just sz -> return sz
-- 
cgit v1.2.3


From d9322629a36ad50035912ee56df876c345039225 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Feb 2021 17:45:05 -0800
Subject: LaTeX reader improvements.

* Rewrote `withRaw` so it doesn't rely on fragile assumptions
  about token positions (which break when macros are expanded).
  This requires the addition of `sEnableWithRaw` and `sRawTokens`
  in `LaTeXState`, and a new combinator `disablingWithRaw` to
  disable collecting of raw tokens in certain contexts.
* Add `parseFromToks` to T.P.Readers.LaTeX.Parsing.
* Fix parsing of single character tokens so it doesn't mess
  up the new raw token collecting.
* These changes slightly increase allocations and have a small
  performance impact, but it's minor.

Closes #7092.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  6 +--
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 84 +++++++++++++++++++++++++-------
 2 files changed, 68 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 91c71c000..0a66b7f39 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1054,7 +1054,7 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
       symbol '}'
       return res
     parser = do
-      Tok _ Word s <- satisfyTok isWordTok
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
       let (digits, rest) = T.span isDigit s
       unless (T.null rest) $
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
@@ -2208,9 +2208,7 @@ parseTableRow envname prefsufs = do
         option [] (count 1 amp)
         return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
   rawcells <- mapM celltoks prefsufs
-  oldInput <- getInput
-  cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
-  setInput oldInput
+  cells <- mapM (parseFromToks parseTableCell) rawcells
   spaces
   return $ Row nullAttr cells
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 12a3ba2f6..313aa6c51 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -32,6 +32,8 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , totoks
   , toksToString
   , satisfyTok
+  , parseFromToks
+  , disablingWithRaw
   , doMacros
   , doMacros'
   , setpos
@@ -87,13 +89,15 @@ import Control.Monad.Trans (lift)
 import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord)
 import Data.Default
 import Data.List (intercalate)
+import qualified Data.IntMap as IntMap
 import qualified Data.Map as M
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
-import Text.Pandoc.Error (PandocError (PandocMacroLoop))
+import Text.Pandoc.Error
+         (PandocError (PandocMacroLoop,PandocShouldNeverHappenError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
@@ -153,6 +157,8 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sToggles       :: M.Map Text Bool
                             , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
+                            , sEnableWithRaw :: Bool
+                            , sRawTokens     :: IntMap.IntMap [Tok]
                             }
      deriving Show
 
@@ -179,6 +185,8 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sToggles       = M.empty
                               , sExpanded      = False
                               , sFileContents  = M.empty
+                              , sEnableWithRaw = True
+                              , sRawTokens     = IntMap.empty
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
@@ -404,11 +412,31 @@ untoken t = untokenAccum t mempty
 toksToString :: [Tok] -> String
 toksToString = T.unpack . untokenize
 
+parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
+parseFromToks parser toks = do
+  oldInput <- getInput
+  setInput toks
+  result <- disablingWithRaw parser
+  setInput oldInput
+  return result
+
+disablingWithRaw :: PandocMonad m => LP m a -> LP m a
+disablingWithRaw parser = do
+  oldEnableWithRaw <- sEnableWithRaw <$> getState
+  updateState $ \st -> st{ sEnableWithRaw = False }
+  result <- parser
+  updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw }
+  return result
+
 satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False }
+    updateState $ \st -> st{ sExpanded = False
+                           , sRawTokens =
+                              if sEnableWithRaw st
+                                 then IntMap.map (res:) $ sRawTokens st
+                                 else sRawTokens st }
     return res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
@@ -594,18 +622,22 @@ isCommentTok _                 = False
 anyTok :: PandocMonad m => LP m Tok
 anyTok = satisfyTok (const True)
 
+singleCharTok :: PandocMonad m => LP m Tok
+singleCharTok =
+  satisfyTok $ \case
+     Tok _ Word  t   -> T.length t == 1
+     Tok _ Symbol t  -> not (T.any (`Set.member` specialChars) t)
+     _               -> False
+
 singleChar :: PandocMonad m => LP m Tok
-singleChar = try $ do
-  Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol])
-  guard $ not $ toktype == Symbol &&
-                T.any (`Set.member` specialChars) t
-  if T.length t > 1
-     then do
-       let (t1, t2) = (T.take 1 t, T.drop 1 t)
-       inp <- getInput
-       setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp
-       return $ Tok pos toktype t1
-     else return $ Tok pos toktype t
+singleChar = singleCharTok <|> singleCharFromWord
+ where
+  singleCharFromWord = do
+    Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok
+    let (t1, t2) = (T.take 1 t, T.drop 1 t)
+    inp <- getInput
+    setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp
+    anyTok
 
 specialChars :: Set.Set Char
 specialChars = Set.fromList "#$%&~_^\\{}"
@@ -725,11 +757,23 @@ ignore raw = do
 
 withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok])
 withRaw parser = do
-  inp <- getInput
+  rawTokensMap <- sRawTokens <$> getState
+  let key = case IntMap.lookupMax rawTokensMap of
+               Nothing     -> 0
+               Just (n,_)  -> n + 1
+  -- insert empty list at key
+  updateState $ \st -> st{ sRawTokens =
+                             IntMap.insert key [] $ sRawTokens st }
   result <- parser
-  nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok)
-  let raw = takeWhile (\(Tok pos _ _) -> maybe True
-                  (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp
+  mbRevToks <- IntMap.lookup key . sRawTokens <$> getState
+  raw <- case mbRevToks of
+           Just revtoks -> do
+             updateState $ \st -> st{ sRawTokens =
+                                        IntMap.delete key $ sRawTokens st}
+             return $ reverse revtoks
+           Nothing      ->
+             throwError $ PandocShouldNeverHappenError $
+                "sRawTokens has nothing at key " <> T.pack (show key)
   return (result, raw)
 
 keyval :: PandocMonad m => LP m (Text, Text)
@@ -794,7 +838,7 @@ getRawCommand name txt = do
   (_, rawargs) <- withRaw $
       case name of
            "write" -> do
-             void $ satisfyTok isWordTok -- digits
+             void $ many $ satisfyTok isDigitTok -- digits
              void braced
            "titleformat" -> do
              void braced
@@ -809,6 +853,10 @@ getRawCommand name txt = do
                void $ many braced
   return $ txt <> untokenize rawargs
 
+isDigitTok :: Tok -> Bool
+isDigitTok (Tok _ Word t) = T.all isDigit t
+isDigitTok _              = False
+
 skipopts :: PandocMonad m => LP m ()
 skipopts = skipMany (void overlaySpecification <|> void rawopt)
 
-- 
cgit v1.2.3


From eb0c63b00263263e3dc0f74c0bf19e93c7afeb13 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Feb 2021 19:29:48 -0800
Subject: Avoid an unnecessary withRaw.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0a66b7f39..2a949cbdc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1869,9 +1869,12 @@ rawEnv name = do
   pos1 <- getPosition
   (bs, raw) <- withRaw $ env name blocks
   if parseRaw
-     then return $ rawBlock "latex"
+     then do
+       (_, raw) <- withRaw $ env name blocks
+       return $ rawBlock "latex"
                  $ beginCommand <> untokenize raw
      else do
+       bs <- env name blocks
        report $ SkippedContent beginCommand pos1
        pos2 <- getPosition
        report $ SkippedContent ("\\end{" <> name <> "}") pos2
-- 
cgit v1.2.3


From 25b7df7c2a1798b5861b7ddc084e74b926299630 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 00:18:40 -0800
Subject: Remove Ext_fenced_code_attributes from allowed commonmark attributes.

This attribute was listed as allowed, but it didn't actually
do anything. Use `attributes` for code attributes and more.

Closes #7097.
---
 src/Text/Pandoc/Extensions.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 7aa32c52c..82eb0e957 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -378,7 +378,6 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   , Ext_raw_attribute
   , Ext_implicit_header_references
   , Ext_attributes
-  , Ext_fenced_code_attributes
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
@@ -509,7 +508,6 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_raw_attribute
     , Ext_implicit_header_references
     , Ext_attributes
-    , Ext_fenced_code_attributes
     , Ext_sourcepos
     ]
   getAll "commonmark_x"    = getAll "commonmark"
-- 
cgit v1.2.3


From 6323250bad5e0bf9ef0ec007e16c869d99828080 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 00:22:22 -0800
Subject: LaTeX reader: remove unnecessary line

---
 src/Text/Pandoc/Readers/LaTeX.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2a949cbdc..7d8dfab0e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1867,7 +1867,6 @@ rawEnv name = do
   rawOptions <- mconcat <$> many rawopt
   let beginCommand = "\\begin{" <> name <> "}" <> rawOptions
   pos1 <- getPosition
-  (bs, raw) <- withRaw $ env name blocks
   if parseRaw
      then do
        (_, raw) <- withRaw $ env name blocks
-- 
cgit v1.2.3


From 2d60a5127cc28bb6b55c19309d6e8fb6e81fbe66 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Feb 2021 09:36:19 +0100
Subject: T.P.Shared: export `handleTaskListItem`. [API change]

---
 src/Text/Pandoc/Shared.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index b908a0172..6d5d4c97d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -70,6 +70,7 @@ module Text.Pandoc.Shared (
                      isTightList,
                      taskListItemFromAscii,
                      taskListItemToAscii,
+                     handleTaskListItem,
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
-- 
cgit v1.2.3


From a3beed9db874517fa57b55380658f4e019e809b2 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Feb 2021 09:37:43 +0100
Subject: Org: support task_lists extension

The tasks lists extension is now supported by the org reader and writer;
the extension is turned on by default.

Closes: #6336
---
 src/Text/Pandoc/Extensions.hs         |  2 ++
 src/Text/Pandoc/Readers/Org/Blocks.hs | 41 +++++++++++++++++++++++--
 src/Text/Pandoc/Writers/Org.hs        | 16 ++++++++--
 test/Tests/Readers/Org/Block/List.hs  | 13 ++++++++
 test/Tests/Writers/Org.hs             | 57 ++++++++++++++++++++++++++++-------
 5 files changed, 113 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 82eb0e957..3b96f9e04 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -381,6 +381,7 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
+                                           Ext_task_lists,
                                            Ext_auto_identifiers]
 getDefaultExtensions "html"            = extensionsFromList
                                           [Ext_auto_identifiers,
@@ -515,6 +516,7 @@ getAllExtensions f = universalExtensions <> getAll f
     extensionsFromList
     [ Ext_citations
     , Ext_smart
+    , Ext_task_lists
     ]
   getAll "html"            = autoIdExtensions <>
     extensionsFromList
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 6bd046e04..d1aff701e 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE FlexibleContexts  #-}
-{-# LANGUAGE RecordWildCards   #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards   #-}
 {- |
    Module      : Text.Pandoc.Readers.Org.Blocks
    Copyright   : Copyright (C) 2014-2021 Albert Krewinkel
@@ -850,16 +851,52 @@ definitionListItem parseIndentedMarker = try $ do
    definitionMarker =
      spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
 
+-- | Checkbox for tasks.
+data Checkbox
+  = UncheckedBox
+  | CheckedBox
+  | SemicheckedBox
+
+-- | Parses a checkbox in a plain list.
+checkbox :: PandocMonad m
+         => OrgParser m Checkbox
+checkbox = do
+  guardEnabled Ext_task_lists
+  try (char '[' *> status <* char ']') <?> "checkbox"
+  where
+    status = choice
+      [ UncheckedBox   <$ char ' '
+      , CheckedBox     <$ char 'X'
+      , SemicheckedBox <$ char '-'
+      ]
+
+checkboxToInlines :: Checkbox -> Inline
+checkboxToInlines = B.Str . \case
+  UncheckedBox   -> "☐"
+  SemicheckedBox -> "☐"
+  CheckedBox     -> "☒"
+
 -- | parse raw text for one list item
 listItem :: PandocMonad m
          => OrgParser m Int
          -> OrgParser m (F Blocks)
 listItem parseIndentedMarker = try . withContext ListItemState $ do
   markerLength <- try parseIndentedMarker
+  box <- optionMaybe checkbox
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   rest <- T.concat <$> many (listContinuation markerLength)
-  parseFromString blocks $ firstLine <> blank <> rest
+  contents <- parseFromString blocks $ firstLine <> blank <> rest
+  return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
+
+-- | Prepend inlines to blocks, adding them to the first paragraph or
+-- creating a new Plain element if necessary.
+prependInlines :: Inline -> Blocks -> Blocks
+prependInlines inlns = B.fromList . prepend . B.toList
+  where
+    prepend (Plain is : bs) = Plain (inlns : Space : is) : bs
+    prepend (Para  is : bs) = Para  (inlns : Space : is) : bs
+    prepend bs              = Plain [inlns, Space] : bs
 
 -- continuation of a list item - indented and separated by blankline or endline.
 -- Note: nested lists are parsed as continuations.
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 43ebf1807..8dfc2749c 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -213,25 +213,35 @@ blockToOrg (DefinitionList items) = do
 -- | Convert bullet list item (list of blocks) to Org.
 bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text)
 bulletListItemToOrg items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang 2 "- " contents $$
           if endsWithPlain items
              then cr
              else blankline
 
-
 -- | Convert ordered list item (a list of blocks) to Org.
 orderedListItemToOrg :: PandocMonad m
                      => Text   -- ^ marker for list item
                      -> [Block]  -- ^ list item (list of blocks)
                      -> Org m (Doc Text)
 orderedListItemToOrg marker items = do
-  contents <- blockListToOrg items
+  exts <- gets $ writerExtensions . stOptions
+  contents <- blockListToOrg (taskListItemToOrg exts items)
   return $ hang (T.length marker + 1) (literal marker <> space) contents $$
           if endsWithPlain items
              then cr
              else blankline
 
+-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
+-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@).
+taskListItemToOrg :: Extensions -> [Block] -> [Block]
+taskListItemToOrg = handleTaskListItem toOrg
+  where
+    toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is
+    toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is
+    toOrg is = is
+
 -- | Convert definition list item (label, list of blocks) to Org.
 definitionListItemToOrg :: PandocMonad m
                         => ([Inline], [[Block]]) -> Org m (Doc Text)
diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs
index 9686b5148..2ee37081e 100644
--- a/test/Tests/Readers/Org/Block/List.hs
+++ b/test/Tests/Readers/Org/Block/List.hs
@@ -118,6 +118,19 @@ tests =
                 ] =?>
       bulletList [ plain "", plain "" ]
 
+  , "Task list" =:
+    T.unlines [ "- [ ] nope"
+              , "- [X] yup"
+              , "- [-] started"
+              , "  1. [X] sure"
+              , "  2. [ ] nuh-uh"
+              ] =?>
+    bulletList [ plain "☐ nope", plain "☒ yup"
+               , mconcat [ plain "☐ started"
+                         , orderedList [plain "☒ sure", plain "☐ nuh-uh"]
+                         ]
+               ]
+
   , "Simple Ordered List" =:
       ("1. Item1\n" <>
        "2. Item2\n") =?>
diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs
index 9cbe360da..bd6c9b7ab 100644
--- a/test/Tests/Writers/Org.hs
+++ b/test/Tests/Writers/Org.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.Org (tests) where
 
+import Data.Text as T
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
@@ -9,17 +10,51 @@ import Text.Pandoc.Builder
 
 infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
-     => String -> (a, String) -> TestTree
-(=:) = test (purely (writeOrg def . toPandoc))
+     => String -> (a, Text) -> TestTree
+(=:) = test org
+
+defopts :: WriterOptions
+defopts = def
+  { writerExtensions = getDefaultExtensions "org"
+  }
+
+org :: (ToPandoc a) => a -> Text
+org = orgWithOpts defopts
+
+orgWithOpts :: (ToPandoc a) => WriterOptions -> a -> Text
+orgWithOpts opts x = purely (writeOrg opts) $ toPandoc x
+
 
 tests :: [TestTree]
-tests = [ testGroup "links"
-          -- See http://orgmode.org/manual/Internal-links.html#Internal-links
-          [ "simple link"
-              =: link "/url" "" "foo"
-              =?> "[[/url][foo]]"
-          , "internal link to anchor"
-              =: link "#my-custom-id" "" "#my-custom-id"
-              =?> "[[#my-custom-id]]"
+tests =
+  [ testGroup "links"
+    -- See http://orgmode.org/manual/Internal-links.html#Internal-links
+    [ "simple link"
+      =: link "/url" "" "foo"
+      =?> "[[/url][foo]]"
+    , "internal link to anchor"
+      =: link "#my-custom-id" "" "#my-custom-id"
+      =?> "[[#my-custom-id]]"
+    ]
+
+  , testGroup "lists"
+    [ "bullet task list"
+      =: bulletList [plain "☐ a", plain "☒ b"]
+      =?> T.unlines
+          [ "- [ ] a"
+          , "- [X] b"
+          ]
+    , "ordered task list"
+      =: orderedList [plain ("☐" <> space <> "a"), plain "☒ b"]
+      =?> T.unlines
+          [ "1. [ ] a"
+          , "2. [X] b"
+          ]
+    , test (orgWithOpts def) "bullet without task_lists" $
+      bulletList [plain "☐ a", plain "☒ b"]
+      =?> T.unlines
+          [ "- ☐ a"
+          , "- ☒ b"
           ]
-        ]
+    ]
+  ]
-- 
cgit v1.2.3


From 6e73273916a55448c1a12ece343454ef139648a8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 13:05:17 -0800
Subject: T.P.Error: export `renderError`.

Refactor `handleError` to use `renderError`. This allows us
render error messages without exiting.
---
 src/Text/Pandoc/Error.hs | 105 ++++++++++++++++++++++++++++++++---------------
 test/command/7099.md     |   8 ++++
 2 files changed, 80 insertions(+), 33 deletions(-)
 create mode 100644 test/command/7099.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 831405f42..50ad3c0e3 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -16,9 +16,10 @@ encountered during parsing.
 -}
 module Text.Pandoc.Error (
   PandocError(..),
+  renderError,
   handleError) where
 
-import Control.Exception (Exception)
+import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
@@ -68,19 +69,17 @@ data PandocError = PandocIOError Text IOError
 
 instance Exception PandocError
 
--- | Handle PandocError by exiting with an error message.
-handleError :: Either PandocError a -> IO a
-handleError (Right r) = return r
-handleError (Left e) =
+renderError :: PandocError -> Text
+renderError e =
   case e of
-    PandocIOError _ err' -> ioError err'
-    PandocHttpError u err' -> err 61 $
+    PandocIOError _ err' -> T.pack $ displayException err'
+    PandocHttpError u err' ->
       "Could not fetch " <> u <> "\n" <> tshow err'
-    PandocShouldNeverHappenError s -> err 62 $
+    PandocShouldNeverHappenError s ->
       "Something we thought was impossible happened!\n" <>
       "Please report this to pandoc's developers: " <> s
-    PandocSomeError s -> err 63 s
-    PandocParseError s -> err 64 s
+    PandocSomeError s -> s
+    PandocParseError s -> s
     PandocParsecError input err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
@@ -91,41 +90,41 @@ handleError (Left e) =
                                           ,"\n", T.replicate (errColumn - 1) " "
                                           ,"^"]
                         else ""
-        in  err 65 $ "\nError at " <> tshow  err' <>
+        in  "\nError at " <> tshow  err' <>
                      -- if error comes from a chunk or included file,
                      -- then we won't get the right text this way:
                      if sourceName errPos == "source"
                         then errorInFile
                         else ""
-    PandocMakePDFError s -> err 66 s
-    PandocOptionError s -> err 6 s
-    PandocSyntaxMapError s -> err 67 s
-    PandocFailOnWarningError -> err 3 "Failing because there were warnings."
-    PandocPDFProgramNotFoundError pdfprog -> err 47 $
+    PandocMakePDFError s -> s
+    PandocOptionError s -> s
+    PandocSyntaxMapError s -> s
+    PandocFailOnWarningError -> "Failing because there were warnings."
+    PandocPDFProgramNotFoundError pdfprog ->
         pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
-    PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
-    PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <>
+    PandocPDFError logmsg -> "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> "Invalid XML" <>
         (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
-    PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
+    PandocFilterError filtername msg -> "Error running filter " <>
         filtername <> ":\n" <> msg
-    PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
-    PandocCouldNotFindDataFileError fn -> err 97 $
+    PandocLuaError msg -> "Error running Lua:\n" <> msg
+    PandocCouldNotFindDataFileError fn ->
         "Could not find data file " <> fn
-    PandocResourceNotFound fn -> err 99 $
+    PandocResourceNotFound fn ->
         "File " <> fn <> " not found in resource path"
-    PandocTemplateError s -> err 5 $ "Error compiling template " <> s
-    PandocAppError s -> err 4 s
-    PandocEpubSubdirectoryError s -> err 31 $
+    PandocTemplateError s -> "Error compiling template " <> s
+    PandocAppError s -> s
+    PandocEpubSubdirectoryError s ->
       "EPUB subdirectory name '" <> s <> "' contains illegal characters"
-    PandocMacroLoop s -> err 91 $
+    PandocMacroLoop s ->
       "Loop encountered in expanding macro " <> s
-    PandocUTF8DecodingError f offset w -> err 92 $
+    PandocUTF8DecodingError f offset w ->
       "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <>
       " (" <> T.pack (printf "%2x" w) <> ").\n" <>
       "The input must be a UTF-8 encoded text."
-    PandocIpynbDecodingError w -> err 93 $
+    PandocIpynbDecodingError w ->
       "ipynb decoding error: " <> w
-    PandocUnknownReaderError r -> err 21 $
+    PandocUnknownReaderError r ->
       "Unknown input format " <> r <>
       case r of
         "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <>
@@ -133,7 +132,7 @@ handleError (Left e) =
                  " and convert that with pandoc."
         "pdf" -> "\nPandoc can convert to PDF, but not from PDF."
         _     -> ""
-    PandocUnknownWriterError w -> err 22 $
+    PandocUnknownWriterError w ->
        "Unknown output format " <> w <>
        case w of
          "pdf" -> "To create a pdf using pandoc, use" <>
@@ -142,14 +141,54 @@ handleError (Left e) =
                  ".pdf extension (-o filename.pdf)."
          "doc" -> "\nPandoc can convert to DOCX, but not to DOC."
          _     -> ""
-    PandocUnsupportedExtensionError ext f -> err 23 $
+    PandocUnsupportedExtensionError ext f ->
       "The extension " <> ext <> " is not supported " <>
       "for " <> f
-    PandocCiteprocError e' -> err 24 $
+    PandocCiteprocError e' ->
       prettyCiteprocError e'
-    PandocBibliographyError fp msg -> err 25 $
+    PandocBibliographyError fp msg ->
       "Error reading bibliography file " <> fp <> ":\n" <> msg
 
+
+-- | Handle PandocError by exiting with an error message.
+handleError :: Either PandocError a -> IO a
+handleError (Right r) = return r
+handleError (Left e) =
+  case e of
+    PandocIOError _ err' -> ioError err'
+    _ -> err exitCode (renderError e)
+ where
+  exitCode =
+    case e of
+      PandocIOError{} -> 1
+      PandocHttpError{} -> 61
+      PandocShouldNeverHappenError{} -> 62
+      PandocSomeError{} -> 63
+      PandocParseError{} -> 64
+      PandocParsecError{} -> 65
+      PandocMakePDFError{} -> 66
+      PandocOptionError{} -> 6
+      PandocSyntaxMapError{} -> 67
+      PandocFailOnWarningError{} -> 3
+      PandocPDFProgramNotFoundError{} -> 47
+      PandocPDFError{} -> 43
+      PandocXMLError{} -> 44
+      PandocFilterError{} -> 83
+      PandocLuaError{} -> 84
+      PandocCouldNotFindDataFileError{} -> 97
+      PandocResourceNotFound fn -> 99
+      PandocTemplateError{} -> 5
+      PandocAppError{} -> 4
+      PandocEpubSubdirectoryError{} -> 31
+      PandocMacroLoop s -> 91
+      PandocUTF8DecodingError{} -> 92
+      PandocIpynbDecodingError{} -> 93
+      PandocUnknownReaderError{} -> 21
+      PandocUnknownWriterError{} -> 22
+      PandocUnsupportedExtensionError{} -> 23
+      PandocCiteprocError{} -> 24
+      PandocBibliographyError{} -> 25
+
 err :: Int -> Text -> IO a
 err exitCode msg = do
   UTF8.hPutStrLn stderr (T.unpack msg)
diff --git a/test/command/7099.md b/test/command/7099.md
new file mode 100644
index 000000000..d9ff8e5ff
--- /dev/null
+++ b/test/command/7099.md
@@ -0,0 +1,8 @@
+```
+% pandoc -f html -t native --verbose
+<iframe src=""></iframe>
+^D
+[INFO] Fetching ...
+[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
+[]
+```
-- 
cgit v1.2.3


From d84a6041e12547331f3a252ea4e8b4d229ba1159 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Feb 2021 13:06:22 -0800
Subject: HTML reader: fix bad handling of empty src attribute in iframe.

- If src is empty, we simply skip the iframe.
- If src is invalid or cannot be fetched, we issue a warning
  and skip instead of failing with an error.
- Closes #7099.
---
 src/Text/Pandoc/Readers/HTML.hs | 18 ++++++++++++------
 test/command/7099.md            | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index afc7a3e25..cc60b5501 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
 
 import Control.Applicative ((<|>))
 import Control.Monad (guard, msum, mzero, unless, void)
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader (ask, asks, lift, local, runReaderT)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAlphaNum, isLetter)
@@ -393,11 +393,17 @@ pIframe = try $ do
   tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src"))
   pCloses "iframe" <|> eof
   url <- canonicalizeUrl $ fromAttrib "src" tag
-  (bs, _) <- openURL url
-  let inp = UTF8.toText bs
-  opts <- readerOpts <$> getState
-  Pandoc _ contents <- readHtml opts inp
-  return $ B.divWith ("",["iframe"],[]) $ B.fromList contents
+  if T.null url
+     then ignore $ renderTags' [tag, TagClose "iframe"]
+     else catchError
+       (do (bs, _) <- openURL url
+           let inp = UTF8.toText bs
+           opts <- readerOpts <$> getState
+           Pandoc _ contents <- readHtml opts inp
+           return $ B.divWith ("",["iframe"],[]) $ B.fromList contents)
+       (\e -> do
+         logMessage $ CouldNotFetchResource url (renderError e)
+         ignore $ renderTags' [tag, TagClose "iframe"])
 
 pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
 pRawHtmlBlock = do
diff --git a/test/command/7099.md b/test/command/7099.md
index d9ff8e5ff..33ac8aea1 100644
--- a/test/command/7099.md
+++ b/test/command/7099.md
@@ -2,7 +2,17 @@
 % pandoc -f html -t native --verbose
 <iframe src=""></iframe>
 ^D
-[INFO] Fetching ...
-[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1
+[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16
+[]
+```
+
+```
+% pandoc -f html -t native --verbose
+<iframe src="h:invalid@url"></iframe>
+^D
+[INFO] Fetching h:invalid@url...
+[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
+  InvalidUrlException "h:invalid@url" "Invalid scheme"
+[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
 []
 ```
-- 
cgit v1.2.3


From 8621ed600af6bdc984fe2095dac400887c3cda78 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 14 Feb 2021 15:49:12 +0100
Subject: T.P.Error: remove unused variables

---
 src/Text/Pandoc/Error.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 50ad3c0e3..94c013cdb 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -176,11 +176,11 @@ handleError (Left e) =
       PandocFilterError{} -> 83
       PandocLuaError{} -> 84
       PandocCouldNotFindDataFileError{} -> 97
-      PandocResourceNotFound fn -> 99
+      PandocResourceNotFound{} -> 99
       PandocTemplateError{} -> 5
       PandocAppError{} -> 4
       PandocEpubSubdirectoryError{} -> 31
-      PandocMacroLoop s -> 91
+      PandocMacroLoop{} -> 91
       PandocUTF8DecodingError{} -> 92
       PandocIpynbDecodingError{} -> 93
       PandocUnknownReaderError{} -> 21
-- 
cgit v1.2.3


From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 14 Feb 2021 22:29:21 -0800
Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...

..and add new definitions isomorphic to xml-light's, but with
Text instead of String.  This allows us to keep most of the code in
existing readers that use xml-light, but avoid lots of unnecessary
allocation.

We also add versions of the functions from xml-light's
Text.XML.Light.Output and Text.XML.Light.Proc that operate
on our modified XML types, and functions that convert
xml-light types to our types (since some of our dependencies,
like texmath, use xml-light).

Update golden tests for docx and pptx.

OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`.

Docx: Do a manual traversal to unwrap sdt and smartTag.
This is faster, and needed to pass the tests.

Benchmarks:

A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
C = this commit

| Reader  |  A    | B      | C     |
| ------- | ----- | ------ | ----- |
| docbook | 18 ms | 12 ms  | 10 ms |
| opml    | 65 ms | 62 ms  | 35 ms |
| jats    | 15 ms | 11 ms  |  9 ms |
| docx    | 72 ms | 69 ms  | 44 ms |
| odt     | 78 ms | 41 ms  | 28 ms |
| epub    | 64 ms | 61 ms  | 56 ms |
| fb2     | 14 ms | 5  ms  | 4 ms  |
---
 .hlint.yaml                                        |   1 +
 pandoc.cabal                                       |   2 +-
 src/Text/Pandoc/ImageSize.hs                       |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  76 +--
 src/Text/Pandoc/Readers/Docx/Parse.hs              | 163 +++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  31 +-
 src/Text/Pandoc/Readers/Docx/Util.hs               |  27 +-
 src/Text/Pandoc/Readers/EPUB.hs                    |  65 +--
 src/Text/Pandoc/Readers/FB2.hs                     |  93 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |  58 +-
 src/Text/Pandoc/Readers/OPML.hs                    |  29 +-
 src/Text/Pandoc/Readers/Odt.hs                     |   5 +-
 src/Text/Pandoc/Readers/Odt/ContentReader.hs       |  13 +-
 src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs  |   3 +-
 src/Text/Pandoc/Readers/Odt/Generic/Utils.hs       |  33 +-
 .../Pandoc/Readers/Odt/Generic/XMLConverter.hs     |  23 +-
 src/Text/Pandoc/Readers/Odt/Namespaces.hs          |  11 +-
 src/Text/Pandoc/Readers/Odt/StyleReader.hs         |  23 +-
 src/Text/Pandoc/Writers/Docx.hs                    | 263 +++++----
 src/Text/Pandoc/Writers/EPUB.hs                    | 356 ++++++-------
 src/Text/Pandoc/Writers/FB2.hs                     | 127 ++---
 src/Text/Pandoc/Writers/ODT.hs                     |  16 +-
 src/Text/Pandoc/Writers/OOXML.hs                   |  35 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 201 +++----
 src/Text/Pandoc/XML/Light.hs                       | 586 +++++++++++++++++++++
 src/Text/Pandoc/XMLParser.hs                       |  66 ---
 test/Tests/Writers/OOXML.hs                        |   3 +-
 test/docx/golden/block_quotes.docx                 | Bin 10098 -> 10071 bytes
 test/docx/golden/codeblock.docx                    | Bin 9950 -> 9920 bytes
 test/docx/golden/comments.docx                     | Bin 10285 -> 10258 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10048 -> 10021 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10673 -> 10650 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12403 bytes
 test/docx/golden/definition_list.docx              | Bin 9947 -> 9920 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9953 -> 9925 bytes
 test/docx/golden/document-properties.docx          | Bin 10429 -> 10404 bytes
 test/docx/golden/headers.docx                      | Bin 10086 -> 10059 bytes
 test/docx/golden/image.docx                        | Bin 26764 -> 26736 bytes
 test/docx/golden/inline_code.docx                  | Bin 9886 -> 9859 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10066 -> 10038 bytes
 test/docx/golden/inline_images.docx                | Bin 26822 -> 26793 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10107 -> 10081 bytes
 test/docx/golden/links.docx                        | Bin 10282 -> 10251 bytes
 test/docx/golden/lists.docx                        | Bin 10358 -> 10332 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10149 -> 10123 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10238 -> 10210 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10150 -> 10122 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10245 -> 10216 bytes
 test/docx/golden/notes.docx                        | Bin 10052 -> 10028 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9986 -> 9960 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10121 -> 10094 bytes
 test/docx/golden/table_one_row.docx                | Bin 9938 -> 9908 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10255 -> 10227 bytes
 test/docx/golden/tables.docx                       | Bin 10272 -> 10244 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9930 -> 9903 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9913 -> 9886 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9947 -> 9920 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10059 -> 10032 bytes
 test/docx/golden/unicode.docx                      | Bin 9871 -> 9845 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9919 -> 9892 bytes
 test/pptx/code-custom.pptx                         | Bin 28221 -> 28184 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395516 -> 395477 bytes
 test/pptx/code.pptx                                | Bin 28220 -> 28183 bytes
 test/pptx/code_templated.pptx                      | Bin 395514 -> 395477 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27004 -> 26967 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394288 -> 394253 bytes
 test/pptx/document-properties.pptx                 | Bin 27408 -> 27375 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394691 -> 394656 bytes
 test/pptx/endnotes.pptx                            | Bin 26962 -> 26928 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394253 -> 394219 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27789 -> 27747 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395083 -> 395041 bytes
 test/pptx/images.pptx                              | Bin 44619 -> 44579 bytes
 test/pptx/images_templated.pptx                    | Bin 411909 -> 411870 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26148 -> 26121 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393438 -> 393412 bytes
 test/pptx/lists.pptx                               | Bin 27049 -> 27015 bytes
 test/pptx/lists_templated.pptx                     | Bin 394340 -> 394307 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26940 -> 26908 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394231 -> 394198 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44065 -> 44025 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411352 -> 411311 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28575 -> 28531 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27744 -> 27705 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395868 -> 395825 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29532 -> 29481 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396826 -> 396776 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35436 -> 35360 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31675 -> 31636 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398955 -> 398915 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30691 -> 30657 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51604 -> 51548 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418896 -> 418834 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402728 -> 402650 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27023 -> 26991 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394314 -> 394283 bytes
 test/pptx/tables.pptx                              | Bin 27566 -> 27532 bytes
 test/pptx/tables_templated.pptx                    | Bin 394859 -> 394827 bytes
 test/pptx/two_column.pptx                          | Bin 26065 -> 26038 bytes
 test/pptx/two_column_templated.pptx                | Bin 393355 -> 393327 bytes
 102 files changed, 1388 insertions(+), 930 deletions(-)
 create mode 100644 src/Text/Pandoc/XML/Light.hs
 delete mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'src')

diff --git a/.hlint.yaml b/.hlint.yaml
index d5ebffd34..e482b2b37 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -14,6 +14,7 @@
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
 - ignore: {name: "Use &&&"}
 - ignore: {name: "Use String"}
+- ignore: {name: "Use camelCase"}
 - ignore: {name: "Use fmap"}  # specific for GHC 7.8 compat
 - ignore: {name: "Use isDigit"}
 
diff --git a/pandoc.cabal b/pandoc.cabal
index 3c7063f6c..22aebd55e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -687,7 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
-                   Text.Pandoc.XMLParser,
+                   Text.Pandoc.XML.Light,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e0a1af8e8..bb1aa6351 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -44,8 +44,7 @@ import Numeric (showFFloat)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Text.XML.Light as Xml
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light hiding (Attr)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
@@ -332,12 +331,12 @@ svgSize opts img = do
   doc <- either (const mzero) return $ parseXMLElement
                                      $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
-        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
-        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
+        vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc
+        [_,_,w,h] <- mapM safeRead (T.words vb)
         return (w,h)
   let dpi = fromIntegral $ writerDpi opts
   let dirToInt dir = do
-        dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
+        dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim
         return $ inPixel opts dim
   w <- dirToInt "width" <|> (fst <$> viewboxSize)
   h <- dirToInt "height" <|> (snd <$> viewboxSize)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ad0108843..e201b54fe 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -31,8 +31,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 
 {-
 
@@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <>
+                                    convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- convenience function
 named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
 
 --
 
@@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blockTags
 isBlockElement _ = False
 
-blockTags :: [String]
+blockTags :: [Text]
 blockTags =
   [ "abstract"
   , "ackno"
@@ -721,7 +721,7 @@ blockTags =
   , "variablelist"
   ] ++ admonitionTags
 
-admonitionTags :: [String]
+admonitionTags :: [Text]
 admonitionTags = ["important","caution","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
@@ -779,10 +779,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -837,7 +837,7 @@ parseBlock (Elem e) =
         "refsect2" -> sect 2
         "refsect3" -> sect 3
         "refsection" -> gets dbSectionLevel >>= sect . (+1)
-        l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+        l | l `elem` admonitionTags -> parseAdmonition l
         "area" -> skip
         "areaset" -> skip
         "areaspec" -> skip
@@ -899,7 +899,7 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -911,7 +911,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ T.pack $ strContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -965,7 +965,7 @@ parseBlock (Elem e) =
                             w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x ->
                                                      (x >= '0' && x <= '9')
-                                                      || x == '.') (T.pack w)
+                                                      || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = case bodyrows of
                                          [] -> 0
@@ -1048,12 +1048,12 @@ parseMixed container conts = do
       x <- parseMixed container rs
       return $ p <> b <> x
 
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
 parseRow cn = do
   let isEntry x  = named "entry" x || named "td" x || named "th" x
   mapM (parseEntry cn) . filterChildren isEntry
 
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
 parseEntry cn el = do
   let colDistance sa ea = do
         let iStrt = elemIndex sa cn
@@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = trimInlines . mconcat <$>
                  mapM parseInline (elContent e')
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
-  return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+  return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "anchor" -> do
@@ -1138,7 +1138,7 @@ parseInline (Elem e) =
         "userinput" -> codeWithLang
         "systemitem" -> codeWithLang
         "varargs" -> return $ code "(...)"
-        "keycap" -> return (str $ T.pack $ strContent e)
+        "keycap" -> return (str $ strContent e)
         "keycombo" -> keycombo <$>
                          mapM parseInline (elContent e)
         "menuchoice" -> menuchoice <$>
@@ -1150,17 +1150,17 @@ parseInline (Elem e) =
             let title = case attrValue "endterm" e of
                             ""      -> maybe "???" xrefTitleByElem
                                          (findElementById linkend content)
-                            endterm -> maybe "???" (T.pack . strContent)
+                            endterm -> maybe "???" strContent
                                          (findElementById endterm content)
             return $ link ("#" <> linkend) "" (text title)
-        "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
-                          $ str $ T.pack $ strContent e
-        "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+        "email" -> return $ link ("mailto:" <> strContent e) ""
+                          $ str $ strContent e
+        "uri" -> return $ link (strContent e) "" $ str $ strContent e
         "ulink" -> innerInlines (link (attrValue "url" e) "")
         "link" -> do
              ils <- innerInlines id
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "linkend" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1180,7 +1180,7 @@ parseInline (Elem e) =
         "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -1193,7 +1193,7 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
          simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
                          (filterChildren (named "member") e)
          segmentedList = do
@@ -1234,10 +1234,10 @@ parseInline (Elem e) =
                   "sect5"        -> descendantContent "title" el
                   "cmdsynopsis"  -> descendantContent "command" el
                   "funcsynopsis" -> descendantContent "function" el
-                  _              -> T.pack $ qName (elName el) ++ "_title"
+                  _              -> qName (elName el) <> "_title"
           where
             xrefLabel = attrValue "xreflabel" el
-            descendantContent name = maybe "???" (T.pack . strContent)
+            descendantContent name = maybe "???" strContent
                                    . filterElementName (\n -> qName n == name)
 
 -- | Extract a math equation from an element
@@ -1258,7 +1258,7 @@ equation e constructor =
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
       (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
-      (readMathML . T.pack . showElement)
+      (readMathML . showElement)
 
     latexEquations :: [Text]
     latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1272,8 +1272,8 @@ equation e constructor =
 -- | Get the actual text stored in a CData block. 'showContent'
 -- returns the text still surrounded by the [[CDATA]] tags.
 showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
 
 
 -- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 056dab6c2..c76f3c171 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar)
 import Data.List
 import qualified Data.Map as M
 import qualified Data.Text as T
+import Data.Text (Text)
 import Data.Maybe
 import System.FilePath
 import Text.Pandoc.Readers.Docx.Util
@@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.TeXMath (Exp)
 import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -128,37 +127,23 @@ mapD f xs =
   in
    concatMapM handler xs
 
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
   | isElem ns "w" "sdt" element
   , Just sdtContent <- findChildByName ns "w" "sdtContent" element
-  = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+  = concatMap (unwrapElement ns) (elChildren sdtContent)
   | isElem ns "w" "smartTag" element
-  = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+  = concatMap (unwrapElement ns) (elChildren element)
+  | otherwise
+  = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
 
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
-  Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
 
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
-  let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
-  in
-    case XMLC.nextDF modifiedCur of
-      Just cur' -> walkDocument' ns cur'
-      Nothing   -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
 walkDocument ns element =
-  let cur = XMLC.fromContent (Elem element)
-      cur' = walkDocument' ns cur
-  in
-    case XMLC.toTree cur' of
-      Elem element' -> Just element'
-      _             -> Nothing
-
+  element{ elContent = concatMap (unwrapContent ns) (elContent element) }
 
 newtype Docx = Docx Document
           deriving Show
@@ -361,9 +346,9 @@ getDocumentXmlPath zf = do
   fp <- findAttr (QName "Target" Nothing Nothing) rel
   -- sometimes there will be a leading slash, which windows seems to
   -- have trouble with.
-  return $ case fp of
+  return $ case T.unpack fp of
     '/' : fp' -> fp'
-    _         -> fp
+    fp'       -> fp'
 
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
@@ -372,7 +357,7 @@ archiveToDocument zf = do
   docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
-  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+  let bodyElem' = walkDocument namespaces bodyElem
   body <- elemToBody namespaces bodyElem'
   return $ Document namespaces body
 
@@ -414,8 +399,8 @@ archiveToNotes zf =
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
-      en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+      fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+      en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
    Notes ns fn en
 
@@ -424,7 +409,8 @@ archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
                >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
-      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+      cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+               cmtsElem
   in
     case cmts of
       Just c  -> Comments cmts_namespaces c
@@ -443,8 +429,8 @@ filePathToRelType path docXmlPath =
 relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
 relElemToRelationship relType element | qName (elName element) == "Relationship" =
   do
-    relId <- findAttrText (QName "Id" Nothing Nothing) element
-    target <- findAttrText (QName "Target" Nothing Nothing) element
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
     return $ Relationship relType relId target
 relElemToRelationship _ _ = Nothing
 
@@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
 loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
 loElemToLevelOverride ns element
   | isElem ns "w" "lvlOverride" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       let startOverride = findChildByName ns "w" "startOverride" element
                           >>= findAttrByName ns "w" "val"
-                          >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                          >>= stringToInteger
           lvl = findChildByName ns "w" "lvl" element
                 >>= levelElemToLevel ns
       return $ LevelOverride ilvl startOverride lvl
@@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing
 numElemToNum :: NameSpaces -> Element -> Maybe Numb
 numElemToNum ns element
   | isElem ns "w" "num" element = do
-      numId <- findAttrTextByName ns "w" "numId" element
+      numId <- findAttrByName ns "w" "numId" element
       absNumId <- findChildByName ns "w" "abstractNumId" element
-                  >>= findAttrTextByName ns "w" "val"
+                  >>= findAttrByName ns "w" "val"
       let lvlOverrides = mapMaybe
                          (loElemToLevelOverride ns)
                          (findChildrenByName ns "w" "lvlOverride" element)
@@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing
 absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
 absNumElemToAbsNum ns element
   | isElem ns "w" "abstractNum" element = do
-      absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+      absNumId <- findAttrByName ns "w" "abstractNumId" element
       let levelElems = findChildrenByName ns "w" "lvl" element
           levels = mapMaybe (levelElemToLevel ns) levelElems
       return $ AbstractNumb absNumId levels
@@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing
 levelElemToLevel :: NameSpaces -> Element -> Maybe Level
 levelElemToLevel ns element
   | isElem ns "w" "lvl" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       fmt <- findChildByName ns "w" "numFmt" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       txt <- findChildByName ns "w" "lvlText" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       let start = findChildByName ns "w" "start" element
                   >>= findAttrByName ns "w" "val"
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                  >>= stringToInteger
       return (Level ilvl fmt txt start)
 levelElemToLevel _ _ = Nothing
 
@@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
   fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
   | isElem ns "w" (notetype <> "s") element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" notetype element)
       in
@@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
 elemToComments ns element
   | isElem ns "w" "comments" element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" "comment" element)
       in
@@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
       stringToInteger
     , hangingParIndent =
       findAttrByName ns "w" "hanging" element >>=
-      stringToInteger}
+      stringToInteger }
 elemToParIndentation _ _ = Nothing
 
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+  case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> (n' .|. n) /= 0
 
@@ -642,7 +628,7 @@ elemToBodyPart ns element
   | isElem ns "w" "p" element
   , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
       do
-        expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
+        expsLst <- eitherToD $ readOMML $ showElement c
         return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
@@ -666,7 +652,7 @@ elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
     let caption' = findChildByName ns "w" "tblPr" element
                    >>= findChildByName ns "w" "tblCaption"
-                   >>= findAttrTextByName ns "w" "val"
+                   >>= findAttrByName ns "w" "val"
         caption = fromMaybe "" caption'
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
@@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
 getTitleAndAlt ns element =
   let mbDocPr = findChildByName ns "wp" "inline" element >>=
                 findChildByName ns "wp" "docPr"
-      title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
-      alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+      title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+      alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -718,7 +704,7 @@ elemToParPart ns element
   = let (title, alt) = getTitleAndAlt ns drawingElem
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrTextByName ns "r" "embed"
+                  >>= findAttrByName ns "r" "embed"
     in
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -728,7 +714,7 @@ elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
     let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrTextByName ns "r" "id"
+                  >>= findAttrByName ns "r" "id"
     in
      case drawing of
        -- Todo: check out title and attr for deprecated format.
@@ -797,7 +783,7 @@ elemToParPart ns element
       fldCharState <- gets stateFldCharState
       case fldCharState of
         FldCharOpen -> do
-          info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+          info <- eitherToD $ parseFieldInfo $ strContent instrText
           modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
           return NullParPart
         _ -> return NullParPart
@@ -818,48 +804,48 @@ elemToParPart ns element
       return $ ChangedRuns change runs
 elemToParPart ns element
   | isElem ns "w" "bookmarkStart" element
-  , Just bmId <- findAttrTextByName ns "w" "id" element
-  , Just bmName <- findAttrTextByName ns "w" "name" element =
+  , Just bmId <- findAttrByName ns "w" "id" element
+  , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just relId <- findAttrTextByName ns "r" "id" element = do
+  , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
     runs <- mapD (elemToRun ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
-         case findAttrTextByName ns "w" "anchor" element of
+         case findAttrByName ns "w" "anchor" element of
              Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
              Nothing -> return $ ExternalHyperLink target runs
       Nothing     -> return $ ExternalHyperLink "" runs
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+  , Just anchor <- findAttrByName ns "w" "anchor" element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ InternalHyperLink anchor runs
 elemToParPart ns element
   | isElem ns "w" "commentRangeStart" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "commentRangeEnd" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element =
+  , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
 elemToParPart ns element
   | isElem ns "m" "oMath" element =
-    fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+    fmap PlainOMath (eitherToD $ readOMML $ showElement element)
 elemToParPart _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
   | isElem ns "w" "comment" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element
-  , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
-  , cmtDate <- findAttrTextByName ns "w" "date" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element
+  , Just cmtAuthor <- findAttrByName ns "w" "author" element
+  , cmtDate <- findAttrByName ns "w" "date" element = do
       bps <- mapD (elemToBodyPart ns) (elChildren element)
       return $ CommentStart cmtId cmtAuthor cmtDate bps
 elemToCommentStart _ _ = throwError WrongElem
@@ -878,7 +864,7 @@ elemToExtent drawingElem =
     where
       wp_ns  = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
-                    >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+                    >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
 
 childElemToRun :: NameSpaces -> Element -> D Run
@@ -889,7 +875,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
@@ -902,7 +888,7 @@ childElemToRun ns element
   = return InlineChart
 childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
-  , Just fnId <- findAttrTextByName ns "w" "id" element = do
+  , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -910,7 +896,7 @@ childElemToRun ns element
       Nothing  -> return $ Footnote []
 childElemToRun ns element
   | isElem ns "w" "endnoteReference" element
-  , Just enId <- findAttrTextByName ns "w" "id" element = do
+  , Just enId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing
 getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
 getTrackedChange ns element
   | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange ns element
   | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange _ _ = Nothing
 
@@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty
   | Just pPr <- findChildByName ns "w" "pPr" element =
     let style =
           mapMaybe
-          (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+          (fmap ParaStyleId . findAttrByName ns "w" "val")
           (findChildrenByName ns "w" "pStyle" pPr)
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
@@ -1012,7 +998,7 @@ elemToRunStyleD ns element
     charStyles <- asks envCharStyles
     let parentSty =
           findChildByName ns "w" "rStyle" rPr >>=
-          findAttrTextByName ns "w" "val" >>=
+          findAttrByName ns "w" "val" >>=
           flip M.lookup charStyles . CharStyleId
     return $ elemToRunStyle ns element parentSty
 elemToRunStyleD _ _ = return defaultRunStyle
@@ -1022,7 +1008,7 @@ elemToRunElem ns element
   | isElem ns "w" "t" element
     || isElem ns "w" "delText" element
     || isElem ns "m" "t" element = do
-    let str = T.pack $ strContent element
+    let str = strContent element
     font <- asks envFont
     case font of
       Nothing -> return $ TextRun str
@@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-    case readLitChar ("\\x" ++ s) of
+    case readLitChar ("\\x" ++ T.unpack s) of
          [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
          _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
-    getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
-    lowerFromPrivate ('F':xs) = '0':xs
-    lowerFromPrivate xs       = xs
+    getFont = textToFont =<< findAttrByName ns "w" "font" element
+    lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+                       | otherwise             = t
 getSymChar _ _ = TextRun ""
 
 elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1061,8 +1047,9 @@ elemToRunElems ns element
        let qualName = elemName ns "w"
        let font = do
                     fontElem <- findElement (qualName "rFonts") element
-                    textToFont . T.pack =<<
-                       foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+                    textToFont =<<
+                       foldr ((<|>) . (flip findAttr fontElem . qualName))
+                         Nothing ["ascii", "hAnsi"]
        local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
 elemToRunElems _ _ = throwError WrongElem
 
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index edade8654..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,12 +48,13 @@ import Data.Function (on)
 import Data.String (IsString(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
 import Data.Maybe
 import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          , isRTL        :: Maybe Bool
                          , isForceCTL   :: Maybe Bool
                          , rVertAlign   :: Maybe VertAlign
-                         , rUnderline   :: Maybe String
+                         , rUnderline   :: Maybe Text
                          , rParentStyle :: Maybe CharStyle
                          }
                 deriving Show
@@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle
   , Just styleType <- findAttrByName ns "w" "type" element
   , styleType == cStyleType parentStyle
   , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
-                       findAttrTextByName ns "w" "val"
+                       findAttrByName ns "w" "val"
   , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
   | isElem ns "w" "style" element
   , Just styleType <- findAttrByName ns "w" "type" element
@@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle
   | otherwise = False
 
 class HasStyleId a => ElemToStyle a where
-  cStyleType  :: Maybe a -> String
+  cStyleType  :: Maybe a -> Text
   elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
 
 class FromStyleId (StyleId a) => HasStyleId a where
@@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle =
     stys -> stys ++
             concatMap (buildBasedOnList ns element . Just) stys
 
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+                      Right (x,_) -> Just x
+                      Left _      -> Nothing
 
 checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
 checkOnOff ns rPr tag
@@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing
 elemToCharStyle :: NameSpaces
                 -> Element -> Maybe CharStyle -> Maybe CharStyle
 elemToCharStyle ns element parentStyle
-  = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+  = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
               <*> getElementStyleName ns element
               <*> Just (elemToRunStyle ns element parentStyle)
 
@@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
 getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
 getHeaderLevel ns element
   | Just styleName <- getElementStyleName ns element
-  , Just n <- stringToInteger . T.unpack =<<
+  , Just n <- stringToInteger =<<
               (T.stripPrefix "heading " . T.toLower $
                 fromStyleName styleName)
   , n > 0 = Just (styleName, fromInteger n)
@@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing
 
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
-  ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
-  <|> findAttrTextByName ns "w" "styleId" el)
+  ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+  <|> findAttrByName ns "w" "styleId" el)
 
 getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
 getNumInfo ns element = do
@@ -298,15 +301,15 @@ getNumInfo ns element = do
               findChildByName ns "w" "numPr"
       lvl = fromMaybe "0" (numPr >>=
                            findChildByName ns "w" "ilvl" >>=
-                           findAttrTextByName ns "w" "val")
+                           findAttrByName ns "w" "val")
   numId <- numPr >>=
            findChildByName ns "w" "numId" >>=
-           findAttrTextByName ns "w" "val"
+           findAttrByName ns "w" "val"
   return (numId, lvl)
 
 elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
 elemToParStyleData ns element parentStyle
-  | Just styleId <- findAttrTextByName ns "w" "styleId" element
+  | Just styleId <- findAttrByName ns "w" "styleId" element
   , Just styleName <- getElementStyleName ns element
   = Just $ ParStyle
       {
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f9c9a8e26..21df03d9e 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
@@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
-                                      , findAttrText
                                       , findAttrByName
-                                      , findAttrTextByName
                                       ) where
 
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 369c4f0c9..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Map as M (Map, elems, fromList, lookup)
 import Data.Maybe (mapMaybe)
 import qualified Data.Text.Lazy as TL
@@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
 
 readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
 readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
 imageMimes :: [MimeType]
 imageMimes = ["image/gif", "image/jpeg", "image/png"]
 
-type CoverId = String
+type CoverId = Text
 
 type CoverImage = FilePath
 
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
 parseManifest content coverId = do
   manifest <- findElementE (dfName "manifest") content
   let items = findChildren (dfName "item") manifest
   r <- mapM parseItem items
   let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
-  return (cover `mplus` coverId, M.fromList r)
+  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
   where
-    findCover e = maybe False (isInfixOf "cover-image")
+    findCover e = maybe False (T.isInfixOf "cover-image")
                   (findAttr (emptyName "properties") e)
                || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
     parseItem e = do
       uid <- findAttrE (emptyName "id") e
       href <- findAttrE (emptyName "href") e
       mime <- findAttrE (emptyName "media-type") e
-      return (uid, (href, T.pack mime))
+      return (uid, (T.unpack href, mime))
 
 parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
 parseSpine is e = do
@@ -173,11 +173,11 @@ parseMeta content = do
 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
 parseMetaItem :: Element -> Meta -> Meta
 parseMetaItem e@(stripNamespace . elName -> field) meta =
-  addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
 
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
 renameMeta "creator" = "author"
-renameMeta s         = T.pack s
+renameMeta s         = s
 
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
@@ -187,7 +187,7 @@ getManifest archive = do
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
     (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
-  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
@@ -201,7 +201,8 @@ fixInternalReferences pathToFile =
   . walk (fixBlockIRs filename)
   . walk (fixInlineIRs filename)
   where
-    (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+    (root, T.unpack . escapeURI . T.pack -> filename) =
+      splitFileName pathToFile
 
 fixInlineIRs :: String -> Inline -> Inline
 fixInlineIRs s (Span as v) =
@@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) =
   Link (fixAttrs s as) is t
 fixInlineIRs _ v = v
 
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
 prependHash ps l@(Link attr is (url, tit))
   | or [s `T.isPrefixOf` url | s <- ps] =
     Link attr is ("#" <> url, tit)
@@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
 fixBlockIRs _ b = b
 
 fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+  (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
 
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
 addHash _ ""    = ""
 addHash s ident = T.pack (takeFileName s) <> "#" <> ident
 
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
 
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
 isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
 
 -- Library
@@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c
 
 -- Utility
 
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
 stripNamespace (QName v _ _) = v
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
 attrToNSPair _                              = Nothing
 
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
 attrToPair (Attr (QName name _ _) val) = (name, val)
 
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
 defaultNameSpace = Just "http://www.idpf.org/2007/opf"
 
-dfName :: String -> QName
+dfName :: Text -> QName
 dfName s = QName s defaultNameSpace Nothing
 
-emptyName :: String -> QName
+emptyName :: Text -> QName
 emptyName s = QName s Nothing Nothing
 
 -- Convert Maybe interface to Either
 
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
 findAttrE q e = mkE "findAttr" $ findAttr q e
 
 findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
-  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+  mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
 
 parseXMLDocE :: PandocMonad m => Entry -> m Element
 parseXMLDocE entry =
@@ -293,7 +295,8 @@ parseXMLDocE entry =
   fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+  mkE ("Unable to find element: " <> tshow e) $ findElement e x
 
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b804eab4f..66e390bd7 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,7 +25,6 @@ TODO:
 module Text.Pandoc.Readers.FB2 ( readFB2 ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
 import Data.ByteString.Base64.Lazy
 import Data.Functor
 import Data.List (intersperse)
@@ -42,8 +41,8 @@ import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
 
 type FB2 m = StateT FB2State m
 
@@ -85,12 +84,12 @@ removeHash t = case T.uncons t of
   Just ('#', xs) -> xs
   _              -> t
 
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
 
 parseInline :: PandocMonad m => Content -> FB2 m Inlines
 parseInline (Elem e) =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -98,12 +97,12 @@ parseInline (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement name
       pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
 parseInline (CRef r) = pure $ str $ convertEntity r
 
 parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
 
 parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
 parseRootElement e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "FictionBook" -> do
       -- Parse notes before parsing the rest of the content.
       case filterChild isNotesBody e of
@@ -146,7 +145,7 @@ parseNote e =
     Just sectionId -> do
       content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
       oldNotes <- gets fb2Notes
-      modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+      modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
       pure ()
   where
     isTitle x = qName (elName x) == "title"
@@ -158,7 +157,7 @@ parseNote e =
 -- | Parse a child of @\<FictionBook>@ element.
 parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
 parseFictionBookChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "stylesheet" -> pure mempty -- stylesheet is ignored
     "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
     "body" -> if isNotesBody e
@@ -170,7 +169,7 @@ parseFictionBookChild e =
 -- | Parse a child of @\<description>@ element.
 parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
 parseDescriptionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
     "src-title-info" -> pure () -- ignore
     "document-info" -> pure ()
@@ -184,7 +183,7 @@ parseDescriptionChild e =
 -- | Parse a child of @\<body>@ element.
 parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
 parseBodyChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "image" -> parseImageElement e
     "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
     "epigraph" -> parseEpigraph e
@@ -198,7 +197,10 @@ parseBinaryElement e =
     (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
     (Just _, Nothing) ->
       report $ IgnoredElement "binary without content-type attribute"
-    (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+    (Just filename, contentType) ->
+      insertMedia (T.unpack filename) contentType
+                    (decodeLenient
+                      (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
 
 -- * Type parsers
 
@@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
 
 parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
 parseAuthorChild e =
-  case T.pack $ qName $ elName e of
-    "first-name" -> pure $ Just $ T.pack $ strContent e
-    "middle-name" -> pure $ Just $ T.pack $ strContent e
-    "last-name" -> pure $ Just $ T.pack $ strContent e
-    "nickname" -> pure $ Just $ T.pack $ strContent e
-    "home-page" -> pure $ Just $ T.pack $ strContent e
-    "email" -> pure $ Just $ T.pack $ strContent e
+  case qName $ elName e of
+    "first-name" -> pure $ Just $ strContent e
+    "middle-name" -> pure $ Just $ strContent e
+    "last-name" -> pure $ Just $ strContent e
+    "nickname" -> pure $ Just $ strContent e
+    "home-page" -> pure $ Just $ strContent e
+    "email" -> pure $ Just $ strContent e
     name -> do
       report $ IgnoredElement $ name <> " in author"
       pure Nothing
@@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing
 parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
 parseImageElement e =
   case href of
-    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
     Nothing -> do
       report $ IgnoredElement " image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        title = maybe "" T.pack $ findAttr (unqual "title") e
-        imgId = maybe "" T.pack $ findAttr (unqual "id") e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        title = fromMaybe "" $ findAttr (unqual "title") e
+        imgId = fromMaybe "" $ findAttr (unqual "id") e
         href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
 
 -- | Parse @pType@
@@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
 -- | Parse @citeType@ child
 parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
 parseCiteChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "empty-line" -> pure horizontalRule
@@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
 
 parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
 parsePoemChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "epigraph" -> parseEpigraph e
     "stanza" -> parseStanza e
     "text-author" -> para <$> parsePType e
-    "date" -> pure $ para $ text $ T.pack $ strContent e
+    "date" -> pure $ para $ text $ strContent e
     name -> report (UnexpectedXmlElement name "poem") $> mempty
 
 parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -292,7 +294,7 @@ joinLineBlocks [] = []
 
 parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
 parseStanzaChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "v" -> lineBlock . (:[]) <$> parsePType e
@@ -302,11 +304,11 @@ parseStanzaChild e =
 parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraph e =
   divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
-  where divId = maybe "" T.pack $ findAttr (unqual "id") e
+  where divId = fromMaybe "" $ findAttr (unqual "id") e
 
 parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraphChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
 
 parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
 parseAnnotationChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
 parseSection e = do
   n <- gets fb2SectionLevel
   modify $ \st -> st{ fb2SectionLevel = n + 1 }
-  let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+  let sectionId = fromMaybe "" $ findAttr (unqual "id") e
   bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
   modify $ \st -> st{ fb2SectionLevel = n }
   pure bs
 
 parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
 parseSectionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseBodyChild e
     "epigraph" -> parseEpigraph e
     "image" -> parseImageElement e
@@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
 parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
 parseNamedStyle e = do
   content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
-  let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+  let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
   case findAttr (unqual "name") e of
-    Just name -> pure $ spanWith ("", [T.pack name], lang) content
+    Just name -> pure $ spanWith ("", [name], lang) content
     Nothing -> do
       report $ IgnoredElement "link without required name"
       pure mempty
 
 parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
 parseNamedStyleChild (Elem e) =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement $ name <> " in style"
@@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
 parseLinkType e = do
   content <- mconcat <$> mapM parseStyleLinkType (elContent e)
   notes <- gets fb2Notes
-  case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+  case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
     Just href -> case findAttr (QName "type" Nothing Nothing) e of
                    Just "note" -> case M.lookup href notes of
                                     Nothing -> pure $ link href "" content
@@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
 -- | Parse @title-infoType@
 parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
 parseTitleInfoChild e =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "genre" -> pure ()
     "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
-    "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+    "book-title" -> modify (setMeta "title" (text $ strContent e))
     "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
     "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
-                                                                      $ T.pack
                                                                       $ strContent e))
-    "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+    "date" -> modify (setMeta "date" (text $ strContent e))
     "coverpage" -> parseCoverPage e
     "lang" -> pure ()
     "src-lang" -> pure ()
@@ -441,7 +442,7 @@ parseCoverPage e =
     Just img -> case href of
                   Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
                   Nothing -> pure ()
-                where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+                where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
     Nothing -> pure ()
 
 -- | Parse @inlineImageType@ element
@@ -454,5 +455,5 @@ parseInlineImageElement e =
     Nothing -> do
       report $ IgnoredElement "inline image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index dfd343b7a..5353f2001 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Error (PandocError(..))
-import Data.Char (isDigit, isSpace, toUpper)
+import Data.Char (isDigit, isSpace)
 import Data.Default
 import Data.Generics
 import Data.List (foldl', intersperse)
@@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr =
   fromMaybe "" . maybeAttrValue attr
 
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
 maybeAttrValue attr elt =
-  T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+  lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
 
 -- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
 named s e = qName (elName e) == s
 
 --
@@ -155,10 +154,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> JATS m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "p" -> parseMixed para (elContent e)
@@ -207,7 +206,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ textContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -271,7 +270,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = do
-                            w <- findAttrText (unqual "colwidth") c
+                            w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = foldl' max 0 $ map length bodyrows
@@ -442,16 +441,10 @@ parseRef e = do
        Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
        -- TODO handle mixed-citation
 
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
 textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
-  return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return . text . convertEntity $ ref
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
@@ -507,9 +499,9 @@ parseInline (Elem e) =
                         else linkWith attr ("#" <> rid) "" ils
         "ext-link" -> do
              ils <- innerInlines id
-             let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+             let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "rid" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, [], [])
@@ -529,7 +521,7 @@ parseInline (Elem e) =
    where innerInlines f = extractSpaces f . mconcat <$>
                           mapM parseInline (elContent e)
          mathML x =
-            case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+            case readMathML . showElement $ everywhere (mkT removePrefix) x of
                 Left _ -> mempty
                 Right m -> writeTeX m
          formula constructor = do
@@ -547,4 +539,4 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index bdadc4dd9..184d5a63f 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
-import Data.Char (toUpper)
 import Data.Default
 import Data.Generics
 import Data.Maybe (fromMaybe)
@@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
@@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
 -- exceptT = either throwError return
@@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
 parseBlock (Elem e) =
   case qName (elName e) of
         "ownerName"    -> mempty <$ modify (\st ->
-                              st{opmlDocAuthors = [text $ textContent e]})
+                              st{opmlDocAuthors = [text $ strContent e]})
         "dateModified" -> mempty <$ modify (\st ->
-                              st{opmlDocDate = text $ textContent e})
+                              st{opmlDocDate = text $ strContent e})
         "title"        -> mempty <$ modify (\st ->
-                              st{opmlDocTitle = text $ textContent e})
+                              st{opmlDocTitle = text $ strContent e})
         "outline" -> gets opmlSectionLevel >>= sect . (+1)
         "?xml"  -> return mempty
         _       -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 85308deb1..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -14,8 +14,7 @@ Entry point to the odt reader.
 module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 import qualified Data.ByteString.Lazy as B
 
@@ -91,7 +90,7 @@ archiveToOdt archive = do
 
 
 --
-entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem :: Entry -> Either PandocError Element
 entryToXmlElem entry =
   case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
     Right x  -> Right x
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..df90880fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -29,14 +29,14 @@ import Control.Monad ((<=<))
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
 import Data.Semigroup (First(..), Option(..))
 
 import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import Text.Pandoc.Builder hiding (underline)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -557,7 +557,7 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                        >>?% mappend
     --
     extractText     :: XML.Content -> Fallible T.Text
-    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -777,14 +777,14 @@ read_frame_img =
       ""   -> returnV mempty -< ()
       src' -> do
         let exts = extensionsFromList [Ext_auto_identifiers]
-        resource   <- lookupResource                          -< src'
+        resource   <- lookupResource                          -< T.unpack src'
         _          <- updateMediaWithResource                 -< resource
         w          <- findAttrText' NsSVG "width"             -< ()
         h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +804,8 @@ read_frame_mathml =
     case fold src of
       ""   -> returnV mempty -< ()
       src' -> do
-        let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+        let path = T.unpack $
+                    fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
         (_, mathml) <- lookupResource -< path
         case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,11 +61,11 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt.Namespaces
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
 module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
                                           ) where
 
-import Data.List (isPrefixOf)
 import qualified Data.Map as M (empty, insert)
 import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Pandoc.Readers.Odt.Generic.Namespaces
 
 
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
 
 
 findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
 
 nsIDmap :: NameSpaceIRIs Namespace
 nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
                  -- Core XML (basically only for the 'id'-attribute)
                | NsXML
                  -- Fallback
-               | NsOther String
+               | NsOther Text
   deriving ( Eq, Ord, Show )
 
 -- | Not the actual iri's, but large prefixes of them - this way there are
 -- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
 nsIDs = [
   ("urn:oasis:names:tc:opendocument:xmlns:animation"        , NsAnim         ),
   ("urn:oasis:names:tc:opendocument:xmlns:chart"            , NsChart        ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..5e10f896c 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE Arrows          #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE TupleSections   #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.StyleReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -46,11 +47,13 @@ import qualified Data.Foldable as F
 import Data.List (unfoldr)
 import qualified Data.Map as M
 import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
 import qualified Data.Set as S
 
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
 
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 
@@ -90,7 +93,7 @@ instance Default FontPitch where
 --
 -- Thus, we want
 
-type FontFaceName = String
+type FontFaceName = Text
 
 type FontPitches = M.Map FontFaceName FontPitch
 
@@ -151,7 +154,7 @@ findPitch =     ( lookupAttr NsStyle "font-pitch"
 -- Definitions of main data
 --------------------------------------------------------------------------------
 
-type StyleName        = String
+type StyleName        = Text
 
 -- | There are two types of styles: named styles with a style family and an
 -- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
   -- \^ simpler, but in general less efficient
 
 data ListLevelStyle = ListLevelStyle { listLevelType  :: ListLevelType
-                                     , listItemPrefix :: Maybe String
-                                     , listItemSuffix :: Maybe String
+                                     , listItemPrefix :: Maybe Text
+                                     , listItemSuffix :: Maybe Text
                                      , listItemFormat :: ListItemNumberFormat
                                      , listItemStart  :: Int
                                      }
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
   show ListLevelStyle{..} =    "<LLS|"
                             ++ show listLevelType
                             ++ "|"
-                            ++ maybeToString listItemPrefix
+                            ++ maybeToString (T.unpack <$> listItemPrefix)
                             ++ show listItemFormat
-                            ++ maybeToString listItemSuffix
+                            ++ maybeToString (T.unpack <$> listItemSuffix)
                             ++ ">"
     where maybeToString = fromMaybe ""
 
@@ -471,7 +474,7 @@ readTextProperties =
      )
   where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
         isFontBold = ("normal",False):("bold",True)
-                    :map ((,True).show) ([100,200..900]::[Int])
+                    :map ((,True) . tshow) ([100,200..900]::[Int])
 
 readUnderlineMode     :: StyleReaderSafe _x (Maybe UnderlineMode)
 readUnderlineMode     = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
 readStrikeThroughMode = readLineMode "text-line-through-mode"
                                      "text-line-through-style"
 
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
 readLineMode modeAttr styleAttr = proc x -> do
   isUL <- searchAttr  NsStyle styleAttr False isLinePresent -< x
   mode <- lookupAttr' NsStyle  modeAttr                     -< x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index da990e4d3..89c71d773 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -31,6 +31,7 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
@@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import Text.TeXMath
-import Text.XML.Light as XML
-import Text.XML.Light.Cursor as XMLC
 import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import Data.Generics (mkT, everywhere)
 
 data ListMarker = NoMarker
                 | BulletMarker
                 | NumberMarker ListNumberStyle ListNumberDelim Int
                 deriving (Show, Read, Eq, Ord)
 
-listMarkerToId :: ListMarker -> String
+listMarkerToId :: ListMarker -> Text
 listMarkerToId NoMarker = "990"
 listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) =
+listMarkerToId (NumberMarker sty delim n) = T.pack $
   '9' : '9' : styNum : delimNum : show n
   where styNum = case sty of
                       DefaultStyle -> '2'
@@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
                           , envListLevel      :: Int
                           , envListNumId      :: Int
                           , envInDel          :: Bool
-                          , envChangesAuthor  :: T.Text
-                          , envChangesDate    :: T.Text
+                          , envChangesAuthor  :: Text
+                          , envChangesDate    :: Text
                           , envPrintWidth     :: Integer
                           }
 
@@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty
 
 data WriterState = WriterState{
          stFootnotes      :: [Element]
-       , stComments       :: [([(T.Text, T.Text)], [Inline])]
-       , stSectionIds     :: Set.Set T.Text
-       , stExternalLinks  :: M.Map String String
+       , stComments       :: [([(Text, Text)], [Inline])]
+       , stSectionIds     :: Set.Set Text
+       , stExternalLinks  :: M.Map Text Text
        , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
        , stLists          :: [ListMarker]
        , stInsId          :: Int
@@ -164,18 +165,18 @@ defaultWriterState = WriterState{
 
 type WS m = ReaderT WriterEnv (StateT WriterState m)
 
-renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap :: Int -> [Element] -> M.Map Text Text
 renumIdMap _ [] = M.empty
 renumIdMap n (e:es)
   | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
-      M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es)
+      M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es)
   | otherwise = renumIdMap n es
 
-replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr]
 replaceAttr f val = map $
     \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a
 
-renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element
+renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element
 renumId f renumMap e
   | Just oldId <- findAttrBy f e
   , Just newId <- M.lookup oldId renumMap =
@@ -184,18 +185,12 @@ renumId f renumMap e
      e { elAttribs = attrs' }
   | otherwise = e
 
-renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element]
+renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element]
 renumIds f renumMap = map (renumId f renumMap)
 
-findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text
-findAttrTextBy x = fmap T.pack . findAttrBy x
-
-lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text
-lookupAttrTextBy x = fmap T.pack . lookupAttrBy x
-
 -- | Certain characters are invalid in XML even if escaped.
 -- See #1992
-stripInvalidChars :: T.Text -> T.Text
+stripInvalidChars :: Text -> Text
 stripInvalidChars = T.filter isValidChar
 
 -- | See XML reference
@@ -234,11 +229,11 @@ writeDocx opts doc = do
 
   -- Gets the template size
   let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz"))
-  let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs
+  let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs
 
   let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar"))
-  let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs
-  let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs
+  let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs
+  let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs
 
   -- Get the available area (converting the size and the margins to int and
   -- doing the difference
@@ -250,24 +245,21 @@ writeDocx opts doc = do
 
   -- styles
   mblang <- toLang $ getLang opts meta
+  -- TODO FIXME avoid this generic traversal!
+  -- lang is in w:docDefaults /  w:rPr  /  w:lang
   let addLang :: Element -> Element
-      addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $
-                                 XMLC.fromElement e) <$> mblang of
-                    Just (Elem e') -> e'
-                    _              -> e -- return original
-        where go :: String -> Cursor -> Cursor
-              go l cursor = case XMLC.findRec (isLangElt . current) cursor of
-                              Nothing -> cursor
-                              Just t  -> XMLC.modifyContent (setval l) t
-              setval :: String -> Content -> Content
-              setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $
-                                               elAttribs e' }
-              setval _ x         = x
-              setvalattr :: String -> XML.Attr -> XML.Attr
-              setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
-              setvalattr _ x                                 = x
-              isLangElt (Elem e') = qName (elName e') == "lang"
-              isLangElt _         = False
+      addLang = case mblang of
+                  Nothing -> id
+                  Just l  -> everywhere (mkT (go (renderLang l)))
+        where
+          go :: Text -> Element -> Element
+          go l e'
+            | qName (elName e') == "lang"
+                = e'{ elAttribs = map (setvalattr l) $ elAttribs e' }
+            | otherwise = e'
+
+          setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
+          setvalattr _ x                                 = x
 
   let stylepath = "word/styles.xml"
   styledoc <- addLang <$> parseXml refArchive distArchive stylepath
@@ -337,12 +329,13 @@ writeDocx opts doc = do
 
   -- [Content_Types].xml
   let mkOverrideNode (part', contentType') = mknode "Override"
-               [("PartName",part'),("ContentType",contentType')] ()
+               [("PartName", T.pack part')
+               ,("ContentType", contentType')] ()
   let mkImageOverride (_, imgpath, mbMimeType, _) =
-          mkOverrideNode ("/word/" ++ imgpath,
-                          maybe "application/octet-stream" T.unpack mbMimeType)
+          mkOverrideNode ("/word/" <> imgpath,
+                          fromMaybe "application/octet-stream" mbMimeType)
   let mkMediaOverride imgpath =
-          mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath)
+          mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath)
   let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -369,13 +362,14 @@ writeDocx opts doc = do
                   ,("/word/footnotes.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
                   ] ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
                     map mkImageOverride imgs ++
-                    [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive
-                                        , "word/media/" `isPrefixOf` eRelativePath e ]
+                    [ mkMediaOverride (eRelativePath e)
+                        | e <- zEntries refArchive
+                        , "word/media/" `isPrefixOf` eRelativePath e ]
 
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
@@ -421,7 +415,7 @@ writeDocx opts doc = do
   let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
   let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
   let baserels = baserels' ++ renumHeaders ++ renumFooters
-  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
   let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
@@ -489,10 +483,10 @@ writeDocx opts doc = do
   numbering <- parseXml refArchive distArchive numpath
   let newNumElts = mkNumbering (stLists st)
   let pandocAdded e =
-       case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of
+       case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of
          Just numid -> numid >= (990 :: Int)
          Nothing    ->
-           case findAttrTextBy ((== "numId") . qName) e >>= safeRead of
+           case findAttrBy ((== "numId") . qName) e >>= safeRead of
              Just numid -> numid >= (1000 :: Int)
              Nothing    -> False
   let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering)
@@ -514,7 +508,7 @@ writeDocx opts doc = do
   let extraCoreProps = ["subject","lang","category","description"]
   let extraCorePropsMap = M.fromList $ zip extraCoreProps
                        ["dc:subject","dc:language","cp:category","dc:description"]
-  let lookupMetaString' :: T.Text -> Meta -> T.Text
+  let lookupMetaString' :: Text -> Meta -> Text
       lookupMetaString' key' meta' =
         case key' of
              "description"    -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
@@ -530,21 +524,21 @@ writeDocx opts doc = do
           : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta))
           : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
             | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
-          ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords)
+          ++ mknode "cp:keywords" [] (T.intercalate ", " keywords)
           : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-                   ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+                   ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
   -- docProps/custom.xml
-  let customProperties :: [(String, String)]
-      customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta)
+  let customProperties :: [(Text, Text)]
+      customProperties = [ (k, lookupMetaString k meta)
                          | k <- M.keys (unMeta meta)
                          , k `notElem` (["title", "author", "keywords"]
                                        ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
+         ,("pid", tshow pid)
          ,("name", k)] $ mknode "vt:lpwstr" [] v
   let customPropsPath = "docProps/custom.xml"
   let customProps = mknode "Properties"
@@ -594,7 +588,8 @@ writeDocx opts doc = do
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
   webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
   headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $
-                     mapMaybe extractTarget (headers ++ footers)
+                         mapMaybe (fmap T.unpack . extractTarget)
+                         (headers ++ footers)
   let miscRelEntries = [ e | e <- zEntries refArchive
                        , "word/_rels/" `isPrefixOf` eRelativePath e
                        , ".xml.rels" `isSuffixOf` eRelativePath e
@@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "paragraph")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyText")] ()
      , mknode "w:qFormat" [] ()
      ]
@@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "character")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
      ]
 
@@ -643,13 +638,14 @@ styleToOpenXml sm style =
         toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing
                         | otherwise = Just $
                           mknode "w:style" [("w:type","character"),
-                           ("w:customStyle","1"),("w:styleId",show toktype)]
-                             [ mknode "w:name" [("w:val",show toktype)] ()
+                           ("w:customStyle","1"),("w:styleId", tshow toktype)]
+                             [ mknode "w:name" [("w:val", tshow toktype)] ()
                              , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
                              , mknode "w:rPr" [] $
-                               [ mknode "w:color" [("w:val",tokCol toktype)] ()
+                               [ mknode "w:color" [("w:val", tokCol toktype)] ()
                                  | tokCol toktype /= "auto" ] ++
-                               [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+                               [ mknode "w:shd" [("w:val","clear")
+                                                ,("w:fill",tokBg toktype)] ()
                                  | tokBg toktype /= "auto" ] ++
                                [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
                                [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
@@ -657,10 +653,10 @@ styleToOpenXml sm style =
                              ]
         tokStyles = tokenStyles style
         tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles
-        tokCol toktype = maybe "auto" (drop 1 . fromColor)
+        tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenColor =<< M.lookup toktype tokStyles)
                            `mplus` defaultColor style
-        tokBg toktype = maybe "auto" (drop 1 . fromColor)
+        tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenBackground =<< M.lookup toktype tokStyles)
                            `mplus` backgroundColor style
         parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing
@@ -673,10 +669,11 @@ styleToOpenXml sm style =
                              , mknode "w:pPr" []
                                $ mknode "w:wordWrap" [("w:val","off")] ()
                                :
-                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style)
+                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style)
                              ]
 
-copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
+copyChildren :: (PandocMonad m)
+             => Archive -> Archive -> String -> Integer -> [Text] -> m Entry
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
@@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do
     }
   where
     strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p++":"++name
+      | Just p <- prefix = p <> ":" <> name
       | otherwise        = name
     shouldCopy = (`elem` elNames) . strName
     cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
@@ -706,35 +703,35 @@ maxListLevel = 8
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
-  mknode "w:num" [("w:numId",show numid)]
+  mknode "w:num" [("w:numId",tshow numid)]
    $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
    : case marker of
        NoMarker     -> []
        BulletMarker -> []
        NumberMarker _ _ start ->
-          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
-              $ mknode "w:startOverride" [("w:val",show start)] ())
+          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))]
+              $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
 mkAbstractNum :: ListMarker -> Integer -> Element
 mkAbstractNum marker nsid =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
 
 mkLvl :: ListMarker -> Int -> Element
 mkLvl marker lvl =
-  mknode "w:lvl" [("w:ilvl",show lvl)] $
+  mknode "w:lvl" [("w:ilvl",tshow lvl)] $
     [ mknode "w:start" [("w:val",start)] ()
       | marker /= NoMarker && marker /= BulletMarker ] ++
     [ mknode "w:numFmt" [("w:val",fmt)] ()
-    , mknode "w:lvlText" [("w:val",lvltxt)] ()
+    , mknode "w:lvlText" [("w:val", lvltxt)] ()
     , mknode "w:lvlJc" [("w:val","left")] ()
     , mknode "w:pPr" []
-      [ mknode "w:ind" [ ("w:left",show $ lvl * step + step)
-                       , ("w:hanging",show (hang :: Int))
+      [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step)
+                       , ("w:hanging",tshow (hang :: Int))
                        ] ()
       ]
     ]
@@ -743,8 +740,8 @@ mkLvl marker lvl =
                  NoMarker             -> ("bullet"," ","1")
                  BulletMarker         -> ("bullet",bulletFor lvl,"1")
                  NumberMarker st de n -> (styleFor st lvl
-                                         ,patternFor de ("%" ++ show (lvl + 1))
-                                         ,show n)
+                                         ,patternFor de ("%" <> tshow (lvl + 1))
+                                         ,tshow n)
           step = 720
           hang = 480
           bulletFor 0 = "\x2022"  -- filled circle
@@ -767,9 +764,9 @@ mkLvl marker lvl =
           styleFor DefaultStyle 5 = "lowerRoman"
           styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6)
           styleFor _ _            = "decimal"
-          patternFor OneParen s  = s ++ ")"
-          patternFor TwoParens s = "(" ++ s ++ ")"
-          patternFor _ s         = s ++ "."
+          patternFor OneParen s  = s <> ")"
+          patternFor TwoParens s = "(" <> s <> ")"
+          patternFor _ s         = s <> "."
 
 getNumId :: (PandocMonad m) => WS m Int
 getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
@@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
 
 makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
 makeTOC opts = do
-  let depth = "1-"++show (writerTOCDepth opts)
-  let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+  let depth = "1-" <> tshow (writerTOCDepth opts)
+  let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u"
   tocTitle <- gets stTocTitle
   title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
   return
@@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
   let toComment (kvs, ils) = do
         annotation <- inlinesToOpenXML opts ils
         return $
-          mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
+          mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs]
             [ mknode "w:p" [] $
               map Elem
               [ mknode "w:pPr" []
@@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
 pStyleM styleName = do
   pStyleMap <- gets (smParaStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName pStyleMap
-  return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
 
 rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
 rStyleM styleName = do
   cStyleMap <- gets (smCharStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName cStyleMap
-  return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] ()
 
-getUniqueId :: (PandocMonad m) => WS m String
+getUniqueId :: (PandocMonad m) => WS m Text
 -- the + 20 is to ensure that there are no clashes with the rIds
 -- already in word/document.xml.rel
 getUniqueId = do
   n <- gets stCurId
   modify $ \st -> st{stCurId = n + 1}
-  return $ show n
+  return $ tshow n
 
 -- | Key for specifying user-defined docx styles.
-dynamicStyleKey :: T.Text
+dynamicStyleKey :: Text
 dynamicStyleKey = "custom-style"
 
 -- | Convert a Pandoc block element to OpenXML.
@@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst)
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
   | format == Format "openxml" = return [
-        Text (CData CDataRaw (T.unpack str) Nothing)
+        Text (CData CDataRaw str Nothing)
       ]
   | otherwise                  = do
       report $ BlockNotRendered b
@@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
   let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", show (floor (textwidth * w) :: Integer))] ()
+                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
   modify $ \s -> s { stInTable = False }
   -- for compatibility with Word <= 2007, we include a val with a bitmask
@@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
      mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
                                ,("w:noVBand","0")
-                               ,("w:val", printf "%04x" tblLookVal)
+                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
                                ] () :
-          [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
+          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
           | not (null caption) ] )
       : mknode "w:tblGrid" []
         (if all (==0) widths
@@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do
   modify $ \st -> st{ stInList = oldInList }
   return $ first'' ++ rest''
 
-alignmentToString :: Alignment -> [Char]
+alignmentToString :: Alignment -> Text
 alignmentToString alignment = case alignment of
                                  AlignLeft    -> "left"
                                  AlignRight   -> "right"
@@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do
   listLevel <- asks envListLevel
   numid <- asks envListNumId
   let listPr = [mknode "w:numPr" []
-                [ mknode "w:ilvl" [("w:val",show listLevel)] ()
-                , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara]
+                [ mknode "w:ilvl" [("w:val",tshow listLevel)] ()
+                , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara]
   return $ case listPr ++ squashProps props of
                 [] -> []
                 ps -> [mknode "w:pPr" [] ps]
@@ -1185,7 +1182,7 @@ withParaPropM md p = do
   d <- md
   withParaProp d p
 
-formattedString :: PandocMonad m => T.Text -> WS m [Element]
+formattedString :: PandocMonad m => Text -> WS m [Element]
 formattedString str =
   -- properly handle soft hyphens
   case splitTextBy (=='\173') str of
@@ -1194,7 +1191,7 @@ formattedString str =
          sh <- formattedRun [mknode "w:softHyphen" [] ()]
          intercalate sh <$> mapM formattedString' ws
 
-formattedString' :: PandocMonad m => T.Text -> WS m [Element]
+formattedString' :: PandocMonad m => Text -> WS m [Element]
 formattedString' str = do
   inDel <- asks envInDel
   formattedRun [ mktnode (if inDel then "w:delText" else "w:t")
@@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
      mknode "w:r" []
      (mknode "w:t"
        [("xml:space","preserve")]
-       ("\t" :: String))] ++)
+       ("\t" :: Text))] ++)
     <$> inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
   inlinesToOpenXML opts ils
@@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
   let ident' = fromMaybe ident (lookup "id" kvs)
       kvs' = filter (("id" /=) . fst) kvs
   modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
-  return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
+  return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ]
 inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
   -- prefer the "id" in kvs, since that is the one produced by the docx
   -- reader.
   let ident' = fromMaybe ident (lookup "id" kvs)
   in return . map Elem $
-     [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
+     [ mknode "w:commentRangeEnd" [("w:id", ident')] ()
      , mknode "w:r" []
        [ mknode "w:rPr" []
          [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
-       , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
+       , mknode "w:commentReference" [("w:id", ident')] () ]
      ]
 inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
   stylemod <- case lookup dynamicStyleKey kvs of
@@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
         defaultAuthor <- asks envChangesAuthor
         let author = fromMaybe defaultAuthor (lookup "author" kvs)
         let mdate = lookup "date" kvs
-        return $ ("w:author", T.unpack author) :
-                   maybe [] (\date -> [("w:date", T.unpack date)]) mdate
+        return $ ("w:author", author) :
+                   maybe [] (\date -> [("w:date", date)]) mdate
   insmod <- if "insertion" `elem` classes
                then do
                  changeAuthorDate <- getChangeAuthorDate
@@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                    x <- f
                    return [Elem $
                            mknode "w:ins"
-                             (("w:id", show insId) : changeAuthorDate) x]
+                             (("w:id", tshow insId) : changeAuthorDate) x]
                else return id
   delmod <- if "deletion" `elem` classes
                then do
@@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  return $ \f -> local (\env->env{envInDel=True}) $ do
                    x <- f
                    return [Elem $ mknode "w:del"
-                             (("w:id", show delId) : changeAuthorDate) x]
+                             (("w:id", tshow delId) : changeAuthorDate) x]
                else return id
   contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
                      $ inlinesToOpenXML opts ils
@@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) =
 inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
   | f == Format "openxml" = return
-                            [Text (CData CDataRaw (T.unpack str) Nothing)]
+                            [Text (CData CDataRaw str Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
@@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do
   when (mathType == DisplayMath) setFirstPara
   res <- (lift . lift) (convertMath writeOMML mathType str)
   case res of
-       Right r -> return [Elem r]
+       Right r -> return [Elem $ fromXLElement r]
        Left il -> inlineToOpenXML' opts il
 inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Code attrs str) = do
@@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do
         mknode "w:r" []
           [ mknode "w:rPr" [] $
             maybeToList (lookup toktype tokTypesMap)
-            , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+            , mknode "w:t" [("xml:space","preserve")] tok ]
   withTextPropM (rStyleM "Verbatim Char")
     $ if isNothing (writerHighlightStyle opts)
           then unhighlighted
@@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do
   let notemarker = mknode "w:r" []
                    [ mknode "w:rPr" [] footnoteStyle
                    , mknode "w:footnoteRef" [] () ]
-  let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker
+  let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
   let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
       insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : Space : ils) : xs
       insertNoteRef xs               = Para [notemarkerXml] : xs
@@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do
 inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   return
-    [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
+    [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
 -- external link:
 inlineToOpenXML' opts (Link _ txt (src,_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   extlinks <- gets stExternalLinks
-  id' <- case M.lookup (T.unpack src) extlinks of
+  id' <- case M.lookup src extlinks of
             Just i   -> return i
             Nothing  -> do
-              i <- ("rId"++) `fmap` getUniqueId
+              i <- ("rId" <>) <$> getUniqueId
               modify $ \st -> st{ stExternalLinks =
-                        M.insert (T.unpack src) i extlinks }
+                        M.insert src i extlinks }
               return i
   return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
@@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
                                              ,("noChangeAspect","1")] ()
         nvPicPr  = mknode "pic:nvPicPr" []
                         [ mknode "pic:cNvPr"
-                            [("descr",T.unpack src),("id","0"),("name","Picture")] ()
+                            [("descr",src),("id","0"),("name","Picture")] ()
                         , cNvPicPr ]
         blipFill = mknode "pic:blipFill" []
-          [ mknode "a:blip" [("r:embed",ident)] ()
+          [ mknode "a:blip" [("r:embed",T.pack ident)] ()
           , mknode "a:stretch" [] $
               mknode "a:fillRect" [] ()
           ]
         xfrm =    mknode "a:xfrm" []
                         [ mknode "a:off" [("x","0"),("y","0")] ()
-                        , mknode "a:ext" [("cx",show xemu)
-                                         ,("cy",show yemu)] () ]
+                        , mknode "a:ext" [("cx",tshow xemu)
+                                         ,("cy",tshow yemu)] () ]
         prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                          mknode "a:avLst" [] ()
         ln =      mknode "a:ln" [("w","9525")]
@@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt = mknode "w:r" [] $
           mknode "w:drawing" [] $
             mknode "wp:inline" []
-              [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+              [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] ()
               , mknode "wp:effectExtent"
                 [("b","0"),("l","0"),("r","0"),("t","0")] ()
               , mknode "wp:docPr"
-                [ ("descr", T.unpack $ stringify alt)
-                , ("title", T.unpack title)
+                [ ("descr", stringify alt)
+                , ("title", title)
                 , ("id","1")
                 , ("name","Picture")
                 ] ()
@@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     Just imgData -> return [Elem $ generateImgElt imgData]
     Nothing -> ( do --try
       (img, mt) <- P.fetchItem src
-      ident <- ("rId"++) `fmap` getUniqueId
+      ident <- ("rId" <>) <$> getUniqueId
 
       let
         imgext = case mt >>= extensionFromMimeType of
@@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
             Just Svg  -> ".svg"
             Just Emf  -> ".emf"
             Nothing   -> ""
-        imgpath = "media/" <> ident <> T.unpack imgext
-        mbMimeType = mt <|> getMimeType imgpath
+        imgpath = "media/" <> ident <> imgext
+        mbMimeType = mt <|> getMimeType (T.unpack imgpath)
 
-        imgData = (ident, imgpath, mbMimeType, img)
+        imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img)
 
       if T.null imgext
          then -- without an extension there is no rule for content type
@@ -1538,20 +1535,20 @@ withDirection x = do
                                     , envTextProperties = EnvProps textStyle textProps'
                                     }
 
-wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content]
+wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content]
 wrapBookmark "" contents = return contents
 wrapBookmark ident contents = do
   id' <- getUniqueId
   let bookmarkStart = mknode "w:bookmarkStart"
                        [("w:id", id')
-                       ,("w:name", T.unpack $ toBookmarkName ident)] ()
+                       ,("w:name", toBookmarkName ident)] ()
       bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
   return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
 
 -- Word imposes a 40 character limit on bookmark names and requires
 -- that they begin with a letter.  So we just use a hash of the
 -- identifier when otherwise we'd have an illegal bookmark name.
-toBookmarkName :: T.Text -> T.Text
+toBookmarkName :: Text -> Text
 toBookmarkName s
   | Just (c, _) <- T.uncons s
   , isLetter c
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 171ffe582..3f10cb437 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
                                    gets, lift, modify)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
+import Data.Char (isAlphaNum, isAscii, isDigit)
 import Data.List (isInfixOf, isPrefixOf)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
 import qualified Data.Set as Set
-import qualified Data.Text as TS
+import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
 import System.FilePath (takeExtension, takeFileName, makeRelative)
@@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
                             ObfuscationMethod (NoObfuscation), WrapOption (..),
                             WriterOptions (..))
 import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags',
-                           safeRead, stringify, trim, uniqueIdent, tshow)
+                           stringify, uniqueIdent, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.UUID (getRandomUUID)
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
-import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems,
-                       ppElement, showElement, strContent, unode, unqual)
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block]
 data EPUBState = EPUBState {
         stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
       , stMediaNextId :: Int
-      , stEpubSubdir  :: String
+      , stEpubSubdir  :: FilePath
       }
 
 type E m = StateT EPUBState m
@@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{
     epubIdentifier          :: [Identifier]
   , epubTitle               :: [Title]
   , epubDate                :: [Date]
-  , epubLanguage            :: String
+  , epubLanguage            :: Text
   , epubCreator             :: [Creator]
   , epubContributor         :: [Creator]
-  , epubSubject             :: [String]
-  , epubDescription         :: Maybe String
-  , epubType                :: Maybe String
-  , epubFormat              :: Maybe String
-  , epubPublisher           :: Maybe String
-  , epubSource              :: Maybe String
-  , epubRelation            :: Maybe String
-  , epubCoverage            :: Maybe String
-  , epubRights              :: Maybe String
-  , epubBelongsToCollection :: Maybe String
-  , epubGroupPosition       :: Maybe String
-  , epubCoverImage          :: Maybe String
+  , epubSubject             :: [Text]
+  , epubDescription         :: Maybe Text
+  , epubType                :: Maybe Text
+  , epubFormat              :: Maybe Text
+  , epubPublisher           :: Maybe Text
+  , epubSource              :: Maybe Text
+  , epubRelation            :: Maybe Text
+  , epubCoverage            :: Maybe Text
+  , epubRights              :: Maybe Text
+  , epubBelongsToCollection :: Maybe Text
+  , epubGroupPosition       :: Maybe Text
+  , epubCoverImage          :: Maybe FilePath
   , epubStylesheets         :: [FilePath]
   , epubPageDirection       :: Maybe ProgressionDirection
-  , epubIbooksFields        :: [(String, String)]
-  , epubCalibreFields       :: [(String, String)]
+  , epubIbooksFields        :: [(Text, Text)]
+  , epubCalibreFields       :: [(Text, Text)]
   } deriving Show
 
 data Date = Date{
-    dateText  :: String
-  , dateEvent :: Maybe String
+    dateText  :: Text
+  , dateEvent :: Maybe Text
   } deriving Show
 
 data Creator = Creator{
-    creatorText   :: String
-  , creatorRole   :: Maybe String
-  , creatorFileAs :: Maybe String
+    creatorText   :: Text
+  , creatorRole   :: Maybe Text
+  , creatorFileAs :: Maybe Text
   } deriving Show
 
 data Identifier = Identifier{
-    identifierText   :: String
-  , identifierScheme :: Maybe String
+    identifierText   :: Text
+  , identifierScheme :: Maybe Text
   } deriving Show
 
 data Title = Title{
-    titleText   :: String
-  , titleFileAs :: Maybe String
-  , titleType   :: Maybe String
+    titleText   :: Text
+  , titleFileAs :: Maybe Text
+  , titleType   :: Maybe Text
   } deriving Show
 
 data ProgressionDirection = LTR | RTL deriving Show
 
-dcName :: String -> QName
+dcName :: Text -> QName
 dcName n = QName n Nothing (Just "dc")
 
-dcNode :: Node t => String -> t -> Element
+dcNode :: Node t => Text -> t -> Element
 dcNode = node . dcName
 
-opfName :: String -> QName
+opfName :: Text -> QName
 opfName n = QName n Nothing (Just "opf")
 
-toId :: FilePath -> String
-toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+toId :: FilePath -> Text
+toId = T.pack .
+       map (\x -> if isAlphaNum x || x == '-' || x == '_'
                      then x
                      else '_') . takeFileName
 
@@ -141,8 +140,8 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
-toVal' :: String -> Val TS.Text
-toVal' = toVal . TS.pack
+toVal' :: Text -> Val T.Text
+toVal' = toVal
 
 mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
 mkEntry path content = do
@@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do
        if null (epubIdentifier m)
           then do
             randomId <- getRandomUUID
-            return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] }
+            return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] }
           else return m
   let addLanguage m =
-       if null (epubLanguage m)
+       if T.null (epubLanguage m)
           then case lookupContext "lang" (writerVariables opts) of
-                     Just x  -> return m{ epubLanguage = TS.unpack x }
+                     Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
                        mLang <- lift $ P.lookupEnv "LANG"
                        let localeLang =
                              case mLang of
                                Just lang ->
-                                 TS.map (\c -> if c == '_' then '-' else c) $
-                                 TS.takeWhile (/='.') lang
+                                 T.map (\c -> if c == '_' then '-' else c) $
+                                 T.takeWhile (/='.') lang
                                Nothing -> "en-US"
-                       return m{ epubLanguage = TS.unpack localeLang }
+                       return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
        if null (epubDate m)
@@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do
           then return m
           else do
             let authors' = map stringify $ docAuthors meta
-            let toAuthor name = Creator{ creatorText = TS.unpack name
+            let toAuthor name = Creator{ creatorText = name
                                        , creatorRole = Just "aut"
                                        , creatorFileAs = Nothing }
             return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
@@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
   where getAttr n = lookupAttr (opfName n) attrs
 addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
   case getAttr "property" of
-       Just s | "ibooks:" `isPrefixOf` s ->
-                md{ epubIbooksFields = (drop 7 s, strContent e) :
+       Just s | "ibooks:" `T.isPrefixOf` s ->
+                md{ epubIbooksFields = (T.drop 7 s, strContent e) :
                        epubIbooksFields md }
        _ -> case getAttr "name" of
-                 Just s | "calibre:" `isPrefixOf` s ->
+                 Just s | "calibre:" `T.isPrefixOf` s ->
                    md{ epubCalibreFields =
-                         (drop 8 s, fromMaybe "" $ getAttr "content") :
+                         (T.drop 8 s, fromMaybe "" $ getAttr "content") :
                           epubCalibreFields md }
                  _ -> md
   where getAttr n = lookupAttr (unqual n) attrs
 addMetadataFromXML _ md = md
 
-metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s)    = TS.unpack s
-metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils
-metaValueToString (MetaBlocks bs)   = TS.unpack $ stringify bs
+metaValueToString :: MetaValue -> Text
+metaValueToString (MetaString s)    = s
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs)   = stringify bs
 metaValueToString (MetaBool True)   = "true"
 metaValueToString (MetaBool False)  = "false"
 metaValueToString _                 = ""
 
 metaValueToPaths :: MetaValue -> [FilePath]
-metaValueToPaths (MetaList xs) = map metaValueToString xs
-metaValueToPaths x             = [metaValueToString x]
+metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs
+metaValueToPaths x             = [T.unpack $ metaValueToString x]
 
-getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a]
+getList :: T.Text -> Meta -> (MetaValue -> a) -> [a]
 getList s meta handleMetaValue =
   case lookupMeta s meta of
        Just (MetaList xs) -> map handleMetaValue xs
@@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue
                 , titleType = metaValueToString <$> M.lookup "type" m }
         handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
 
-getCreator :: TS.Text -> Meta -> [Creator]
+getCreator :: T.Text -> Meta -> [Creator]
 getCreator s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
@@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue
                   , creatorRole = metaValueToString <$> M.lookup "role" m }
         handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
 
-getDate :: TS.Text -> Meta -> [Date]
+getDate :: T.Text -> Meta -> [Date]
 getDate s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Date{ dateText = fromMaybe "" $
@@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue
         handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
                                   , dateEvent = Nothing }
 
-simpleList :: TS.Text -> Meta -> [String]
+simpleList :: T.Text -> Meta -> [Text]
 simpleList s meta =
   case lookupMeta s meta of
        Just (MetaList xs) -> map metaValueToString xs
@@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     , epubCoverage             = coverage
     , epubRights               = rights
     , epubBelongsToCollection  = belongsToCollection
-    , epubGroupPosition        = groupPosition 
+    , epubGroupPosition        = groupPosition
     , epubCoverImage           = coverImage
     , epubStylesheets          = stylesheets
     , epubPageDirection        = pageDirection
@@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
         belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta
-        groupPosition = metaValueToString <$> lookupMeta "group-position" meta       
-        coverImage =
-            (TS.unpack <$> lookupContext "epub-cover-image"
-                              (writerVariables opts))
+        groupPosition = metaValueToString <$> lookupMeta "group-position" meta
+        coverImage = T.unpack <$>
+            lookupContext "epub-cover-image" (writerVariables opts)
             `mplus` (metaValueToString <$> lookupMeta "cover-image" meta)
         mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta
         stylesheets = maybe [] metaValueToPaths mCss ++
                       case lookupContext "css" (writerVariables opts) of
-                         Just xs -> map TS.unpack xs
+                         Just xs -> map T.unpack xs
                          Nothing ->
                            case lookupContext "css" (writerVariables opts) of
-                             Just x  -> [TS.unpack x]
+                             Just x  -> [T.unpack x]
                              Nothing -> []
-        pageDirection = case map toLower . metaValueToString <$>
+        pageDirection = case T.toLower . metaValueToString <$>
                              lookupMeta "page-progression-direction" meta of
                               Just "ltr" -> Just LTR
                               Just "rtl" -> Just RTL
                               _          -> Nothing
         ibooksFields = case lookupMeta "ibooks" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
         calibreFields = case lookupMeta "calibre" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
 
 -- | Produce an EPUB2 file from a Pandoc document.
@@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m
 writeEPUB epubVersion opts doc = do
   let epubSubdir = writerEpubSubdirectory opts
   -- sanity check on epubSubdir
-  unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+  unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
     throwError $ PandocEpubSubdirectoryError epubSubdir
-  let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir }
+  let initState = EPUBState { stMediaPaths = []
+                            , stMediaNextId = 0
+                            , stEpubSubdir = T.unpack epubSubdir }
   evalStateT (pandocToEPUB epubVersion opts doc) initState
 
 pandocToEPUB :: PandocMonad m
@@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do
                         [] -> case epubTitle metadata of
                                    []    -> "UNTITLED"
                                    (x:_) -> titleText x
-                        x  -> TS.unpack $ stringify x
+                        x  -> stringify x
 
   -- stylesheet
   stylesheets <- case epubStylesheets metadata of
@@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do
                            (ListVal $ map
                              (\e -> toVal' $
                                 (if useprefix then "../" else "") <>
-                                makeRelative epubSubdir (eRelativePath e))
+                                T.pack
+                                 (makeRelative epubSubdir (eRelativePath e)))
                              stylesheetEntries)
                              mempty
 
@@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do
                              case imageSize opts' (B.toStrict imgContent) of
                                Right sz  -> return $ sizeInPixels sz
                                Left err' -> (0, 0) <$ report
-                                 (CouldNotDetermineImageSize (TS.pack img) err')
+                                 (CouldNotDetermineImageSize (T.pack img) err')
                        cpContent <- lift $ writeHtml
                             opts'{ writerVariables =
                                    Context (M.fromList [
                                     ("coverpage", toVal' "true"),
                                     ("pagetitle", toVal $
-                                      escapeStringForXML $ TS.pack plainTitle),
-                                    ("cover-image", toVal' coverImageName),
+                                      escapeStringForXML plainTitle),
+                                    ("cover-image",
+                                       toVal' $ T.pack coverImageName),
                                     ("cover-image-width", toVal' $
-                                       show coverImageWidth),
+                                       tshow coverImageWidth),
                                     ("cover-image-height", toVal' $
-                                       show coverImageHeight)]) <>
+                                       tshow coverImageHeight)]) <>
                                      cssvars True <> vars }
                             (Pandoc meta [])
                        coverEntry <- mkEntry "text/cover.xhtml" cpContent
@@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do
                                         ("titlepage", toVal' "true"),
                                         ("body-type",  toVal' "frontmatter"),
                                         ("pagetitle", toVal $
-                                            escapeStringForXML $ TS.pack plainTitle)])
+                                            escapeStringForXML plainTitle)])
                                       <> cssvars True <> vars }
                                (Pandoc meta [])
   tpEntry <- mkEntry "text/title_page.xhtml" tpContent
@@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do
   let matchingGlob f = do
         xs <- lift $ P.glob f
         when (null xs) $
-          report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files"
+          report $ CouldNotFetchResource (T.pack f) "glob did not match any font files"
         return xs
   let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
                         lift (P.readFileLazy f)
@@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do
 
   let chapters' = secsToChapters secs
 
-  let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
+  let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)]
       extractLinkURL' num (Span (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Link (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Image (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (RawInline fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL' _ _ = []
 
-  let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
+  let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)]
       extractLinkURL num (Div (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Header _ (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (RawBlock fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL num b = query (extractLinkURL' num) b
@@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do
 
   let fixInternalReferences :: Inline -> Inline
       fixInternalReferences (Link attr lab (src, tit))
-        | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of
+        | Just ('#', xs) <- T.uncons src = case lookup xs reftable of
              Just ys -> Link attr lab (ys, tit)
              Nothing -> Link attr lab (src, tit)
       fixInternalReferences x = x
@@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do
                  chapters'
 
   let chapToEntry num (Chapter bs) =
-        mkEntry ("text/" ++ showChapter num) =<<
+        mkEntry ("text/" ++ T.unpack (showChapter num)) =<<
         writeHtml opts'{ writerVariables =
                             Context (M.fromList
                                      [("body-type", toVal' bodyType),
@@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do
   let chapterNode ent = unode "item" !
                            ([("id", toId $ makeRelative epubSubdir
                                          $ eRelativePath ent),
-                             ("href", makeRelative epubSubdir
+                             ("href", T.pack $ makeRelative epubSubdir
                                       $ eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
                                     [] -> []
-                                    xs -> [("properties", unwords xs)])
+                                    xs -> [("properties", T.unwords xs)])
                         $ ()
 
   let chapterRefNode ent = unode "itemref" !
@@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do
   let pictureNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
                             ("media-type",
-                               maybe "application/octet-stream" TS.unpack
+                               fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
-                            ("media-type", maybe "" TS.unpack $
+                            ("media-type", fromMaybe "" $
                                   getMimeType $ eRelativePath ent)] $ ()
 
   let tocTitle = maybe plainTitle
@@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
   currentTime <- lift P.getTimestamp
-  let contentsData = UTF8.fromStringLazy $ ppTopElement $
+  let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $
         unode "package" !
           ([("version", case version of
                              EPUB2 -> "2.0"
@@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do
                                ,("media-type","application/xhtml+xml")] ++
                                [("properties","nav") | epub3 ]) $ ()
              ] ++
-             [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp)
+             [ unode "item" ! [("id","stylesheet" <> tshow n)
+                              , ("href", T.pack fp)
                               ,("media-type","text/css")] $ () |
                              (n :: Int, fp) <- zip [1..] (map
                                (makeRelative epubSubdir . eRelativePath)
@@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do
   let tocLevel = writerTOCDepth opts
 
   let navPointNode :: PandocMonad m
-                   => (Int -> [Inline] -> TS.Text -> [Element] -> Element)
+                   => (Int -> [Inline] -> T.Text -> [Element] -> Element)
                    -> Block -> StateT Int m [Element]
       navPointNode formatter (Div (ident,_,_)
                                 (Header lvl (_,_,kvs) ils : children)) =
@@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do
              n <- get
              modify (+1)
              let num = fromMaybe "" $ lookup "number" kvs
-             let tit = if writerNumberSections opts && not (TS.null num)
+             let tit = if writerNumberSections opts && not (T.null num)
                           then Span ("", ["section-header-number"], [])
                                 [Str num] : Space : ils
                           else ils
@@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do
         concat <$> mapM (navPointNode formatter) bs
       navPointNode _ _ = return []
 
-  let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navMapFormatter n tit src subs = unode "navPoint" !
-               [("id", "navPoint-" ++ show n)] $
-                  [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit
-                  , unode "content" ! [("src", "text/" <> TS.unpack src)] $ ()
+               [("id", "navPoint-" <> tshow n)] $
+                  [ unode "navLabel" $ unode "text" $ stringify tit
+                  , unode "content" ! [("src", "text/" <> src)] $ ()
                   ] ++ subs
 
   let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
-                  [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta)
+                  [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
                   , unode "content" ! [("src", "text/title_page.xhtml")]
                   $ () ]
 
   navMap <- lift $ evalStateT
              (concat <$> mapM (navPointNode navMapFormatter) secs) 1
-  let tocData = UTF8.fromStringLazy $ ppTopElement $
+  let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "ncx" ! [("version","2005-1")
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
@@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do
           ]
   tocEntry <- mkEntry "toc.ncx" tocData
 
-  let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navXhtmlFormatter n tit src subs = unode "li" !
-                                       [("id", "toc-li-" ++ show n)] $
+                                       [("id", "toc-li-" <> tshow n)] $
                                             (unode "a" !
-                                                [("href", "text/" <> TS.unpack src)]
+                                                [("href", "text/" <> src)]
                                              $ titElements)
                                             : case subs of
                                                  []    -> []
@@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do
                                      , writerVariables =
                                          Context (M.fromList
                                            [("pagetitle", toVal $
-                                             escapeStringForXML $ TS.pack plainTitle)])
+                                             escapeStringForXML plainTitle)])
                                        <> writerVariables opts}
                                  (Pandoc nullMeta
                                    [Plain $ walk clean tit])) of
@@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do
   tocBlocks <- lift $ evalStateT
                  (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
   let navBlocks = [RawBlock (Format "html")
-                  $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces
+                  $ showElement $ -- prettyprinting introduces bad spaces
                    unode navtag ! ([("epub:type","toc") | epub3] ++
                                    [("id","toc")]) $
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
@@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do
                                 [ unode "a" ! [("href",
                                                   "text/title_page.xhtml")
                                                ,("epub:type", "titlepage")] $
-                                  ("Title Page" :: String) ] :
+                                  ("Title Page" :: Text) ] :
                               [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
-                                  ("Cover" :: String)] |
+                                  ("Cover" :: Text)] |
                                   isJust (epubCoverImage metadata)
                               ] ++
                               [ unode "li"
                                 [ unode "a" ! [("href", "#toc")
                                               ,("epub:type", "toc")] $
-                                    ("Table of Contents" :: String)
+                                    ("Table of Contents" :: Text)
                                 ] | writerTableOfContents opts
                               ]
                          else []
-  let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $
+  let landmarks = [RawBlock (Format "html") $ ppElement $
                     unode "nav" ! [("epub:type","landmarks")
                                   ,("id","landmarks")
                                   ,("hidden","hidden")] $
@@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do
                         UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
-  let containerData = UTF8.fromStringLazy $ ppTopElement $
+  let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
            unode "rootfile" ! [("full-path",
                     (if null epubSubdir
                         then ""
-                        else epubSubdir ++ "/") ++ "content.opf")
+                        else T.pack epubSubdir <> "/") <> "content.opf")
                ,("media-type","application/oebps-package+xml")] $ ()
   containerEntry <- mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
-  let apple = UTF8.fromStringLazy $ ppTopElement $
+  let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
-            unode "option" ! [("name","specified-fonts")] $ ("true" :: String)
+            unode "option" ! [("name","specified-fonts")] $ ("true" :: Text)
   appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
   -- construct archive
@@ -947,7 +950,8 @@ metadataElement version md currentTime =
                   ++ publisherNodes ++ sourceNodes ++ relationNodes
                   ++ coverageNodes ++ rightsNodes ++ coverImageNodes
                   ++ modifiedNodes ++ belongsToCollectionNodes
-        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+        withIds base f = concat . zipWith f (map (\x -> base <>
+                                                        T.cons '-' (tshow x))
                          ([1..] :: [Int]))
         identifierNodes = withIds "epub-id" toIdentifierNode $
                           epubIdentifier md
@@ -961,9 +965,9 @@ metadataElement version md currentTime =
                                  (x:_) -> [dcNode "date" ! [("id","epub-date")]
                                             $ dateText x]
         ibooksNodes = map ibooksNode (epubIbooksFields md)
-        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
+        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v
         calibreNodes = map calibreNode (epubCalibreFields md)
-        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k),
+        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k),
                                              ("content", v)] $ ()
         languageNodes = [dcTag "language" $ epubLanguage md]
         creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
@@ -989,12 +993,12 @@ metadataElement version md currentTime =
             maybe []
                 (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection )
                 :
-                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ])
+                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ])
                 (epubBelongsToCollection md)++
             maybe []
                 (\groupPosition -> [unode "meta" !  [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ])
                 (epubGroupPosition md)
-        dcTag n s = unode ("dc:" ++ n) s
+        dcTag n s = unode ("dc:" <> n) s
         dcTag' n s = [dcTag n s]
         toIdentifierNode id' (Identifier txt scheme)
           | version == EPUB2 = [dcNode "identifier" !
@@ -1002,7 +1006,7 @@ metadataElement version md currentTime =
               txt]
           | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) :
               maybe [] ((\x -> [unode "meta" !
-                                [ ("refines",'#':id')
+                                [ ("refines","#" <> id')
                                 , ("property","identifier-type")
                                 , ("scheme","onix:codelist5")
                                 ]
@@ -1018,10 +1022,10 @@ metadataElement version md currentTime =
                (creatorRole creator >>= toRelator)) $ creatorText creator]
           | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (creatorFileAs creator) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","role"),
+                   [("refines","#" <> id'),("property","role"),
                      ("scheme","marc:relators")] $ x])
                    (creatorRole creator >>= toRelator)
         toTitleNode id' title
@@ -1033,16 +1037,16 @@ metadataElement version md currentTime =
           | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
               ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (titleFileAs title) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","title-type")] $ x])
+                   [("refines","#" <> id'),("property","title-type")] $ x])
                    (titleType title)
         toDateNode id' date = [dcNode "date" !
              (("id",id') :
                 maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
                  dateText date]
-        schemeToOnix :: String -> String
+        schemeToOnix :: Text -> Text
         schemeToOnix "ISBN-10"              = "02"
         schemeToOnix "GTIN-13"              = "03"
         schemeToOnix "UPC"                  = "04"
@@ -1060,48 +1064,48 @@ metadataElement version md currentTime =
         schemeToOnix "OLCC"                 = "28"
         schemeToOnix _                      = "01"
 
-showDateTimeISO8601 :: UTCTime -> String
-showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+showDateTimeISO8601 :: UTCTime -> Text
+showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ"
 
 transformTag :: PandocMonad m
-             => Tag TS.Text
-             -> E m (Tag TS.Text)
+             => Tag T.Text
+             -> E m (Tag T.Text)
 transformTag tag@(TagOpen name attr)
   | name `elem` ["video", "source", "img", "audio"] &&
     isNothing (lookup "data-external" attr) = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
-  newsrc <- modifyMediaRef $ TS.unpack src
-  newposter <- modifyMediaRef $ TS.unpack poster
+  newsrc <- modifyMediaRef $ T.unpack src
+  newposter <- modifyMediaRef $ T.unpack poster
   let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
-              [("src", "../" <> newsrc) | not (TS.null newsrc)] ++
-              [("poster", "../" <> newposter) | not (TS.null newposter)]
+              [("src", "../" <> newsrc) | not (T.null newsrc)] ++
+              [("poster", "../" <> newposter) | not (T.null newposter)]
   return $ TagOpen name attr'
 transformTag tag = return tag
 
 modifyMediaRef :: PandocMonad m
                => FilePath
-               -> E m TS.Text
+               -> E m T.Text
 modifyMediaRef "" = return ""
 modifyMediaRef oldsrc = do
   media <- gets stMediaPaths
   case lookup oldsrc media of
-         Just (n,_) -> return $ TS.pack n
+         Just (n,_) -> return $ T.pack n
          Nothing    -> catchError
-           (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc
-               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack
+           (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc
+               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack
                          (("." <>) <$> (mbMime >>= extensionFromMimeType))
                newName <- getMediaNextNewName ext
                let newPath = "media/" ++ newName
                entry <- mkEntry newPath (B.fromChunks . (:[]) $ img)
                modify $ \st -> st{ stMediaPaths =
                             (oldsrc, (newPath, Just entry)):media}
-               return $ TS.pack newPath)
+               return $ T.pack newPath)
            (\e -> do
-                report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e)
-                return $ TS.pack oldsrc)
+                report $ CouldNotFetchResource (T.pack oldsrc) (tshow e)
+                return $ T.pack oldsrc)
 
-getMediaNextNewName :: PandocMonad m => String -> E m String
+getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath
 getMediaNextNewName ext = do
   nextId <- gets stMediaNextId
   modify $ \st -> st { stMediaNextId = nextId + 1 }
@@ -1128,11 +1132,11 @@ transformInline  :: PandocMonad m
                  -> Inline
                  -> E m Inline
 transformInline _opts (Image attr lab (src,tit)) = do
-    newsrc <- modifyMediaRef $ TS.unpack src
+    newsrc <- modifyMediaRef $ T.unpack src
     return $ Image attr lab ("../" <> newsrc, tit)
 transformInline opts x@(Math t m)
   | WebTeX url <- writerHTMLMathMethod opts = do
-    newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m))
+    newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m))
     let mathclass = if t == DisplayMath then "display" else "inline"
     return $ Span ("",["math",mathclass],[])
                 [Image nullAttr [x] ("../" <> newsrc, "")]
@@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw)
   return $ RawInline fmt (renderTags' tags')
 transformInline _ x = return x
 
-(!) :: (t -> Element) -> [(String, String)] -> t -> Element
+(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element
 (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
 
--- | Version of 'ppTopElement' that specifies UTF-8 encoding.
-ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
-  -- unEntity removes numeric  entities introduced by ppElement
-  -- (kindlegen seems to choke on these).
-  where unEntity [] = ""
-        unEntity ('&':'#':xs) =
-                   let (ds,ys) = break (==';') xs
-                       rest = drop 1 ys
-                   in  case safeRead (TS.pack $ "'\\" <> ds <> "'") of
-                          Just x  -> x : unEntity rest
-                          Nothing -> '&':'#':unEntity xs
-        unEntity (x:xs) = x : unEntity xs
-
 mediaTypeOf :: FilePath -> Maybe MimeType
 mediaTypeOf x =
   let mediaPrefixes = ["image", "video", "audio"] in
   case getMimeType x of
-    Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y
+    Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y
     _      -> Nothing
 
 -- Returns filename for chapter number.
-showChapter :: Int -> String
-showChapter = printf "ch%03d.xhtml"
+showChapter :: Int -> Text
+showChapter = T.pack . printf "ch%03d.xhtml"
 
 -- Add identifiers to any headers without them.
 addIdentifiers :: WriterOptions -> [Block] -> [Block]
 addIdentifiers opts bs = evalState (mapM go bs) Set.empty
  where go (Header n (ident,classes,kvs) ils) = do
          ids <- get
-         let ident' = if TS.null ident
+         let ident' = if T.null ident
                          then uniqueIdent (writerExtensions opts) ils ids
                          else ident
          modify $ Set.insert ident'
@@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty
        go x = return x
 
 -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
-normalizeDate' :: String -> Maybe String
-normalizeDate' = fmap TS.unpack . go . trim . TS.pack
+normalizeDate' :: Text -> Maybe Text
+normalizeDate' = go . T.strip
   where
     go xs
-      | TS.length xs == 4            -- YYY
-      , TS.all isDigit xs = Just xs
-      | (y, s) <- TS.splitAt 4 xs    -- YYY-MM
-      , Just ('-', m) <- TS.uncons s
-      , TS.length m == 2
-      , TS.all isDigit y && TS.all isDigit m = Just xs
+      | T.length xs == 4            -- YYY
+      , T.all isDigit xs = Just xs
+      | (y, s) <- T.splitAt 4 xs    -- YYY-MM
+      , Just ('-', m) <- T.uncons s
+      , T.length m == 2
+      , T.all isDigit y && T.all isDigit m = Just xs
       | otherwise = normalizeDate xs
 
-toRelator :: String -> Maybe String
+toRelator :: Text -> Maybe Text
 toRelator x
   | x `elem` relators = Just x
-  | otherwise         = lookup (map toLower x) relatorMap
+  | otherwise         = lookup (T.toLower x) relatorMap
 
-relators :: [String]
+relators :: [Text]
 relators = map snd relatorMap
 
-relatorMap :: [(String, String)]
+relatorMap :: [(Text, Text)]
 relatorMap =
            [("abridger", "abr")
            ,("actor", "act")
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 9334d6e9a..3b5d04427 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
 import Data.Either (lefts, rights)
 import Data.List (intercalate)
-import Data.Text (Text, pack)
+import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
-import Text.XML.Light
-import qualified Text.XML.Light as X
-import qualified Text.XML.Light.Cursor as XC
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light as X
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
@@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
                            makeSections, tshow, stringify)
 import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
+import Data.Generics (everywhere, mkT)
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do
      (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch)
      let body' = replaceImagesWithAlt missing body
      let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
-     return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
+     return $ xml_head <> showContent fb2_xml <> "\n"
   where
   xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
   fb2_attrs =
@@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do
 description :: PandocMonad m => Meta -> FBM m Content
 description meta' = do
   let genre = case lookupMetaString "genre" meta' of
-                "" -> el "genre" ("unrecognised" :: String)
-                s  -> el "genre" (T.unpack s)
+                "" -> el "genre" ("unrecognised" :: Text)
+                s  -> el "genre" s
   bt <- booktitle meta'
   let as = authors meta'
   dd <- docdate meta'
@@ -112,7 +110,7 @@ description meta' = do
                Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
                Just (MetaString s)        -> [el "lang" $ iso639 s]
                _                          -> []
-             where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
+             where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
   let coverimage url = do
         let img = Image nullAttr mempty (url, "")
         im <- insertImage InlineImage img
@@ -124,7 +122,7 @@ description meta' = do
   return $ el "description"
     [ el "title-info" (genre :
                       (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
-    , el "document-info" [el "program-used" ("pandoc" :: String)]
+    , el "document-info" [el "program-used" ("pandoc" :: Text)]
     ]
 
 booktitle :: PandocMonad m => Meta -> FBM m [Content]
@@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta')
 
 author :: [Inline] -> [Content]
 author ss =
-  let ws = words . cMap plain $ ss
-      email = el "email" <$> take 1 (filter ('@' `elem`) ws)
-      ws' = filter ('@' `notElem`) ws
+  let ws = T.words $ mconcat $ map plain ss
+      email = el "email" <$> take 1 (filter (T.any (=='@')) ws)
+      ws' = filter (not . T.any (== '@')) ws
       names = case ws' of
                 [nickname] -> [ el "nickname" nickname ]
                 [fname, lname] -> [ el "first-name" fname
                                     , el "last-name" lname ]
                 (fname:rest) -> [ el "first-name" fname
-                                , el "middle-name" (concat . init $ rest)
+                                , el "middle-name" (T.concat . init $ rest)
                                 , el "last-name" (last rest) ]
                 [] -> []
   in  list $ el "author" (names ++ email)
@@ -206,7 +204,7 @@ renderFootnotes = do
          el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
   where
     renderFN (n, idstr, cs) =
-        let fn_texts = el "title" (el "p" (show n)) : cs
+        let fn_texts = el "title" (el "p" (tshow n)) : cs
         in  el "section" ([uattr "id" idstr], fn_texts)
 
 -- | Fetch images and encode them for the FictionBook XML.
@@ -282,7 +280,7 @@ isMimeType s =
  where
    types =  ["text","image","audio","video","application","message","multipart"]
    valid c = isAscii c && not (isControl c) && not (isSpace c) &&
-             c `notElem` ("()<>@,;:\\\"/[]?=" :: String)
+             c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char])
 
 footnoteID :: Int -> Text
 footnoteID i = "n" <> tshow i
@@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)])
   = insertImage NormalImage (Image atr alt (src,tit))
 blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss
 blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
-                             map (el "p" . el "code" . T.unpack) . T.lines $ s
+                             map (el "p" . el "code") . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
     then
@@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do
     c <- el "emphasis" <$> cMapM toXml caption
     return [el "table" (hd <> bd), el "p" c]
     where
-      mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content
+      mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content
       mkrow tag cells aligns' =
         el "tr" <$> mapM (mkcell tag) (zip cells aligns')
       --
-      mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content
+      mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content
       mkcell tag (cell, align) = do
         cblocks <- cMapM blockToXml cell
         return $ el tag ([align_attr align], cblocks)
@@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do
   inner <- cMapM toXml ss
   return $ [txt "“"] ++ inner ++ [txt "”"]
 toXml (Cite _ ss) = cMapM toXml ss  -- FIXME: support citation styles
-toXml (Code _ s) = return [el "code" $ T.unpack s]
+toXml (Code _ s) = return [el "code" s]
 toXml Space = return [txt " "]
 toXml SoftBreak = return [txt "\n"]
 toXml LineBreak = return [txt "\n"]
@@ -456,7 +454,7 @@ insertMath immode formula = do
        let imgurl = url <> T.pack (urlEncode $ T.unpack formula)
        let img = Image nullAttr alt (imgurl, "")
        insertImage immode img
-    _ -> return [el "code" $ T.unpack formula]
+    _ -> return [el "code" formula]
 
 insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
 insertImage immode (Image _ alt (url,ttl)) = do
@@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do
          el "image" $
             [ attr ("l","href") ("#" <> fname)
             , attr ("l","type") (tshow immode)
-            , uattr "alt" (T.pack $ cMap plain alt) ]
+            , uattr "alt" (mconcat $ map plain alt) ]
             ++ ttlattr
 insertImage _ _ = error "unexpected inline instead of image"
 
 replaceImagesWithAlt :: [Text] -> Content -> Content
-replaceImagesWithAlt missingHrefs body =
-  let cur = XC.fromContent body
-      cur' = replaceAll cur
-  in  XC.toTree . XC.root $ cur'
+replaceImagesWithAlt missingHrefs = everywhere (mkT go)
   where
-  --
-    replaceAll :: XC.Cursor -> XC.Cursor
-    replaceAll c =
-        let n = XC.current c
-            c' = if isImage n && isMissing n
-                 then XC.modifyContent replaceNode c
-                 else c
-        in  case XC.nextDF c' of
-              (Just cnext) -> replaceAll cnext
-              Nothing      -> c'  -- end of document
-  --
-    isImage :: Content -> Bool
-    isImage (Elem e) = elName e == uname "image"
-    isImage _        = False
-  --
+    go c = if isMissing c
+              then replaceNode c
+              else c
     isMissing (Elem img@Element{}) =
         let imgAttrs = elAttribs img
             badAttrs = map (attr ("l","href")) missingHrefs
@@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body =
     replaceNode :: Content -> Content
     replaceNode n@(Elem img@Element{}) =
         let attrs = elAttribs img
-            alt = getAttrVal attrs (uname "alt")
+            alt = getAttrVal attrs (unqual "alt")
             imtype = getAttrVal attrs (qname "l" "type")
         in case (alt, imtype) of
              (Just alt', Just imtype') ->
-                 if imtype' == show NormalImage
+                 if imtype' == tshow NormalImage
                  then el "p" alt'
-                 else txt $ T.pack alt'
-             (Just alt', Nothing) -> txt $ T.pack alt'  -- no type attribute
+                 else txt alt'
+             (Just alt', Nothing) -> txt alt'  -- no type attribute
              _ -> n   -- don't replace if alt text is not found
     replaceNode n = n
   --
-    getAttrVal :: [X.Attr] -> QName -> Maybe String
+    getAttrVal :: [X.Attr] -> QName -> Maybe Text
     getAttrVal attrs name =
         case filter ((name ==) . attrKey) attrs of
            (a:_) -> Just (attrVal a)
@@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body =
 
 
 -- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
+wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content
 wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
 
 -- " Create a singleton list.
@@ -532,31 +515,31 @@ list :: a -> [a]
 list = (:[])
 
 -- | Convert an 'Inline' to plaintext.
-plain :: Inline -> String
-plain (Str s)               = T.unpack s
-plain (Emph ss)             = cMap plain ss
-plain (Underline ss)        = cMap plain ss
-plain (Span _ ss)           = cMap plain ss
-plain (Strong ss)           = cMap plain ss
-plain (Strikeout ss)        = cMap plain ss
-plain (Superscript ss)      = cMap plain ss
-plain (Subscript ss)        = cMap plain ss
-plain (SmallCaps ss)        = cMap plain ss
-plain (Quoted _ ss)         = cMap plain ss
-plain (Cite _ ss)           = cMap plain ss  -- FIXME
-plain (Code _ s)            = T.unpack s
+plain :: Inline -> Text
+plain (Str s)               = s
+plain (Emph ss)             = mconcat $ map plain ss
+plain (Underline ss)        = mconcat $ map plain ss
+plain (Span _ ss)           = mconcat $ map plain ss
+plain (Strong ss)           = mconcat $ map plain ss
+plain (Strikeout ss)        = mconcat $ map plain ss
+plain (Superscript ss)      = mconcat $ map plain ss
+plain (Subscript ss)        = mconcat $ map plain ss
+plain (SmallCaps ss)        = mconcat $ map plain ss
+plain (Quoted _ ss)         = mconcat $ map plain ss
+plain (Cite _ ss)           = mconcat $ map plain ss  -- FIXME
+plain (Code _ s)            = s
 plain Space                 = " "
 plain SoftBreak             = " "
 plain LineBreak             = "\n"
-plain (Math _ s)            = T.unpack s
+plain (Math _ s)            = s
 plain (RawInline _ _)       = ""
-plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"])
-plain (Image _ alt _)       = cMap plain alt
+plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"])
+plain (Image _ alt _)       = mconcat $ map plain alt
 plain (Note _)              = ""  -- FIXME
 
 -- | Create an XML element.
 el :: (Node t)
-   => String   -- ^ unqualified element name
+   => Text     -- ^ unqualified element name
    -> t        -- ^ node contents
    -> Content  -- ^ XML content
 el name cs = Elem $ unode name cs
@@ -569,22 +552,18 @@ spaceBeforeAfter cs =
 
 -- | Create a plain-text XML content.
 txt :: Text -> Content
-txt s = Text $ CData CDataText (T.unpack s) Nothing
+txt s = Text $ CData CDataText s Nothing
 
 -- | Create an XML attribute with an unqualified name.
-uattr :: String -> Text -> Text.XML.Light.Attr
-uattr name = Attr (uname name) . T.unpack
+uattr :: Text -> Text -> X.Attr
+uattr name = Attr (unqual name)
 
 -- | Create an XML attribute with a qualified name from given namespace.
-attr :: (String, String) -> Text -> Text.XML.Light.Attr
-attr (ns, name) = Attr (qname ns name) . T.unpack
-
--- | Unqualified name
-uname :: String -> QName
-uname name = QName name Nothing Nothing
+attr :: (Text, Text) -> Text -> X.Attr
+attr (ns, name) = Attr (qname ns name)
 
 -- | Qualified name
-qname :: String -> String -> QName
+qname :: Text -> Text -> QName
 qname ns name = QName name Nothing (Just ns)
 
 -- | Abbreviation for 'concatMap'.
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 06369b4db..101b236aa 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 import Text.TeXMath
-import Text.XML.Light
+import qualified Text.XML.Light as XL
 
 newtype ODTState = ODTState { stEntries :: [Entry]
                          }
@@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do
                                         PandocXMLError "styles.xml" msg
                                     Right d -> return $
                                       toEntry "styles.xml" epochtime
-                                      ( fromStringLazy
+                                      ( fromTextLazy
+                                      . TL.fromStrict
                                       . ppTopElement
                                       . addLang lang $ d )
                             else return e) (zEntries arch)
   return arch{ zEntries = entries }
 
+-- TODO FIXME avoid this generic traversal!
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langLanguage lang)
+                           = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langRegion lang)
+                           = Attr n (langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
@@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do
   case writeMathML dt <$> readTeX math of
        Left  _ -> return $ Math t math
        Right r -> do
-         let conf = useShortEmptyTags (const False) defaultConfigPP
-         let mathml = ppcTopElement conf r
+         let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP
+         let mathml = XL.ppcTopElement conf r
          epochtime <- floor `fmap` lift P.getPOSIXTime
          let dirname = "Formula-" ++ show (length entries) ++ "/"
          let fname = dirname ++ "content.xml"
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 8f60e70d5..0533d6c12 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -29,33 +29,32 @@ import Control.Monad.Except (throwError)
 import Text.Pandoc.Error
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
+import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode :: Node t => Text -> [(Text,Text)] -> t -> Element
 mknode s attrs =
   add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) .  node (nodename s)
 
-mktnode :: String -> [(String,String)] -> T.Text -> Element
-mktnode s attrs = mknode s attrs . T.unpack
+mktnode :: Text -> [(Text,Text)] -> T.Text -> Element
+mktnode s attrs = mknode s attrs
 
-nodename :: String -> QName
+nodename :: Text -> QName
 nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
-                             (xs,[])    -> (xs, Nothing)
-                             (ys, _:zs) -> (zs, Just ys)
+ where (name, prefix) = case T.break (==':') s of
+                          (xs,ys) -> case T.uncons ys of
+                                       Nothing     -> (xs, Nothing)
+                                       Just (_,zs) -> (zs, Just xs)
 
 toLazy :: B.ByteString -> BL.ByteString
 toLazy = BL.fromChunks . (:[])
 
 renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
-  UTF8.fromStringLazy (showElement elt)
+renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt))
 
 parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element
 parseXml refArchive distArchive relpath =
@@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath =
 
 -- Copied from Util
 
-attrToNSPair :: XML.Attr -> Maybe (String, String)
-attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair :: Attr -> Maybe (Text, Text)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 -- | Scales the image to fit the page
 -- sizes are passed in emu
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 0a7060895..5caeb0753 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader
 import Control.Monad.State
 import Codec.Archive.Zip
-import Data.Char (toUpper)
 import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
 import Data.Default
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Read
 import Data.Time (formatTime, defaultTimeLocale)
 import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light as XML
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext))
 import Text.TeXMath
 import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
+import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
@@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
   cyS <- findAttr (QName "cy" Nothing Nothing) sldSize
-  (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String)
-  (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String)
+  cx <- readTextAsInteger cxS
+  cy <- readTextAsInteger cyS
   return (cx `div` 12700, cy `div` 12700)
 
+readTextAsInteger :: Text -> Maybe Integer
+readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal
+
 data WriterEnv = WriterEnv { envRefArchive :: Archive
                            , envDistArchive :: Archive
                            , envUTCTime :: UTCTime
@@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st
 
 --------------------------------------------------------------------
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText n = fmap T.pack . findAttr n
-
 monospaceFont :: Monad m => P m T.Text
 monospaceFont = do
   vars <- writerVariables <$> asks envOpts
@@ -171,10 +172,9 @@ monospaceFont = do
     Just s -> return s
     Nothing -> return "Courier"
 
--- Kept as string for XML.Light
-fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)]
+fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)]
 fontSizeAttributes RunProps { rPropForceSize = Just sz } =
-  return [("sz", show $ sz * 100)]
+  return [("sz", tshow $ sz * 100)]
 fontSizeAttributes _ = return []
 
 copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive
@@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
 shapeHasId ns ident element
   | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element
   , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr
-  , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr =
+  , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr =
       nm == ident
   | otherwise = False
 
@@ -396,10 +396,10 @@ getShapeDimensions ns element
       ext <- findChild (elemName ns "a" "ext") xfrm
       cxS <- findAttr (QName "cx" Nothing Nothing) ext
       cyS <- findAttr (QName "cy" Nothing Nothing) ext
-      (x, _) <- listToMaybe $ reads xS
-      (y, _) <- listToMaybe $ reads yS
-      (cx, _) <- listToMaybe $ reads cxS
-      (cy, _) <- listToMaybe $ reads cyS
+      x <- readTextAsInteger xS
+      y <- readTextAsInteger yS
+      cx <- readTextAsInteger cxS
+      cy <- readTextAsInteger cyS
       return ((x `div` 12700, y `div` 12700),
               (cx `div` 12700, cy `div` 12700))
   | otherwise = Nothing
@@ -430,7 +430,7 @@ getContentShapeSize ns layout master
         Nothing -> do let mbSz =
                             findChild (elemName ns "p" "nvSpPr") sp >>=
                             findChild (elemName ns "p" "cNvPr") >>=
-                            findAttrText (QName "id" Nothing Nothing) >>=
+                            findAttr (QName "id" Nothing Nothing) >>=
                             flip getMasterShapeDimensionsById master
                       case mbSz of
                         Just sz' -> return sz'
@@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes =
         fn _        = True
 
 replaceNamedChildren :: NameSpaces
-                     -> String
-                     -> String
+                     -> Text
+                     -> Text
                      -> [Element]
                      -> Element
                      -> Element
@@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do
                        ]
                      , mknode "p:spPr" []
                        [ mknode "a:xfrm" []
-                         [ mknode "a:off" [("x", show $ 12700 * x),
-                                           ("y", show $ 12700 * (y + cy - captionHeight))] ()
-                         , mknode "a:ext" [("cx", show $ 12700 * cx),
-                                           ("cy", show $ 12700 * captionHeight)] ()
+                         [ mknode "a:off" [("x", tshow $ 12700 * x),
+                                           ("y", tshow $ 12700 * (y + cy - captionHeight))] ()
+                         , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                                           ("cy", tshow $ 12700 * captionHeight)] ()
                          ]
                        , mknode "a:prstGeom" [("prst", "rect")]
                          [ mknode "a:avLst" [] ()
@@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do
                                      ,("noChangeAspect","1")] ()
   -- cNvPr will contain the link information so we do that separately,
   -- and register the link if necessary.
-  let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")]
+  let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo),
+                   ("id","0"),
+                   ("name","Picture 1")]
   cNvPr <- case picPropLink picProps of
     Just link -> do idNum <- registerLink link
                     return $ mknode "p:cNvPr" cNvPrAttr $
-                      mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] ()
+                      mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] ()
     Nothing   -> return $ mknode "p:cNvPr" cNvPrAttr ()
   let nvPicPr  = mknode "p:nvPicPr" []
                  [ cNvPr
@@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do
                  , mknode "p:nvPr" [] ()]
   let blipFill = mknode "p:blipFill" []
                  [ mknode "a:blip" [("r:embed", "rId" <>
-          show (mInfoLocalId mInfo))] ()
+                     tshow (mInfoLocalId mInfo))] ()
                  , mknode "a:stretch" [] $
                    mknode "a:fillRect" [] () ]
   let xfrm =    mknode "a:xfrm" []
-                [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] ()
-                , mknode "a:ext" [("cx",show dimX')
-                                 ,("cy",show dimY')] () ]
+                [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] ()
+                , mknode "a:ext" [("cx", tshow dimX')
+                                 ,("cy", tshow dimY')] () ]
   let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                  mknode "a:avLst" [] ()
   let ln =      mknode "a:ln" [("w","9525")]
@@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do
             Just DoubleStrike -> [("strike", "dblStrike")]
             Nothing -> []) <>
         (case rBaseline rpr of
-            Just n -> [("baseline", show n)]
+            Just n -> [("baseline", tshow n)]
             Nothing -> []) <>
         (case rCap rpr of
             Just NoCapitals -> [("cap", "none")]
@@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do
                    return $ case link of
                      InternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              , ("action", "ppaction://hlinksldjump")
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                      -- external
                      ExternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                  Nothing -> return []
   let colorContents = case rSolidFill rpr of
                         Just color ->
                           case fromColor color of
-                            '#':hx ->  [mknode "a:solidFill" []
-                                        [mknode "a:srgbClr" [("val", map toUpper hx)] ()]
-                                       ]
+                            '#':hx ->
+                              [mknode "a:solidFill" []
+                                [mknode "a:srgbClr"
+                                  [("val", T.toUpper $ T.pack hx)] ()]]
                             _ -> []
                         Nothing -> []
   codeFont <- monospaceFont
   let codeContents =
-        [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr]
+        [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr]
   let propContents = linkProps <> colorContents <> codeContents
   return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents
-                                 , mknode "a:t" [] $ T.unpack s
+                                 , mknode "a:t" [] s
                                  ]]
 paraElemToElements (MathElem mathType texStr) = do
   isInSpkrNotes <- asks envInSpeakerNotes
   if isInSpkrNotes
     then paraElemToElements $ Run def $ unTeXString texStr
     else do res <- convertMath writeOMML mathType (unTeXString texStr)
-            case res of
+            case fromXLElement <$> res of
               Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r]
               Left (Str s) -> paraElemToElements (Run def s)
               Left _       -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
 paraElemToElements (RawOOXMLParaElem str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 
 
 -- This is a bit of a kludge -- really requires adding an option to
@@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return
 -- step at a time.
 addMathInfo :: Element -> Element
 addMathInfo element =
-  let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns")
-                       , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
-                       }
+  let mathspace =
+        Attr { attrKey = QName "m" Nothing (Just "xmlns")
+             , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
+             }
   in add_attr mathspace element
 
 -- We look through the element to see if it contains an a14:m
@@ -849,13 +853,13 @@ surroundWithMathAlternate element =
 paragraphToElement :: PandocMonad m => Paragraph -> P m Element
 paragraphToElement par = do
   let
-    attrs = [("lvl", show $ pPropLevel $ paraProps par)] <>
+    attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <>
             (case pPropMarginLeft (paraProps par) of
-               Just px -> [("marL", show $ pixelsToEmu px)]
+               Just px -> [("marL", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropIndent (paraProps par) of
-               Just px -> [("indent", show $ pixelsToEmu px)]
+               Just px -> [("indent", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropAlign (paraProps par) of
@@ -867,7 +871,7 @@ paragraphToElement par = do
     props = [] <>
             (case pPropSpaceBefore $ paraProps par of
                Just px -> [mknode "a:spcBef" [] [
-                              mknode "a:spcPts" [("val", show $ 100 * px)] ()
+                              mknode "a:spcPts" [("val", tshow $ 100 * px)] ()
                               ]
                           ]
                Nothing -> []
@@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do
 shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$>
   graphicFrameToElements layout tbls cptn
 shapeToElements _ (RawOOXMLShape str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 shapeToElements layout shp = do
   element <- shapeToElement layout shp
   return [Elem element]
@@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do
             [mknode "p:ph" [("idx", "1")] ()]
           ]
         , mknode "p:xfrm" []
-          [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] ()
-          , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] ()
+          [ mknode "a:off" [("x", tshow $ 12700 * x),
+                            ("y", tshow $ 12700 * y)] ()
+          , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                            ("cy", tshow $ 12700 * cy)] ()
           ]
         ] <> elements
 
@@ -957,7 +963,7 @@ getDefaultTableStyle = do
   refArchive <- asks envRefArchive
   distArchive <- asks envDistArchive
   tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml"
-  return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst
+  return $ findAttr (QName "def" Nothing Nothing) tblStyleLst
 
 graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element
 graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
@@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
   let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells
 
   let mkgridcol w = mknode "a:gridCol"
-                       [("w", show ((12700 * w) :: Integer))] ()
+                       [("w", tshow ((12700 * w) :: Integer))] ()
   let hasHeader = not (all null hdrCells)
 
   mbDefTblStyle <- getDefaultTableStyle
@@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
                  , ("bandRow", if tblPrBandRow tblPr then "1" else "0")
                  ] (case mbDefTblStyle of
                       Nothing -> []
-                      Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty])
+                      Just sty -> [mknode "a:tableStyleId" [] sty])
 
   return $ mknode "a:graphic" []
     [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")]
@@ -1037,7 +1043,7 @@ findPHType ns spElem phType
         -- if it's a named PHType, we want to check that the attribute
         -- value matches.
         Just phElem | (PHType tp) <- phType ->
-                        case findAttrText (QName "type" Nothing Nothing) phElem of
+                        case findAttr (QName "type" Nothing Nothing) phElem of
                           Just tp' -> tp == tp'
                           Nothing -> False
         -- if it's an ObjType, we want to check that there is NO
@@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster
   , Just txBody <- findChild (elemName ns "p" "txBody") sp
   , Just p <- findChild (elemName ns "a" "p") txBody
   , Just fld <- findChild (elemName ns "a" "fld") p
-  , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld =
+  , Just fldId <- findAttr (QName "id" Nothing Nothing) fld =
       return fldId
   | otherwise = throwError $
                 PandocSomeError
@@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId =
     [ mknode "a:bodyPr" [] ()
     , mknode "a:lstStyle" [] ()
     , mknode "a:p" []
-      [ mknode "a:fld" [ ("id", T.unpack fieldId)
+      [ mknode "a:fld" [ ("id", fieldId)
                        , ("type", "slidenum")
                        ]
         [ mknode "a:rPr" [("lang", "en-US")] ()
-        , mknode "a:t" [] (show pgNum)
+        , mknode "a:t" [] (tshow pgNum)
         ]
       , mknode "a:endParaRPr" [("lang", "en-US")] ()
       ]
@@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do
     Just n -> return n
     Nothing -> throwError $
                PandocShouldNeverHappenError $
-               "Slide Id " <> T.pack (show sldId) <> " not found."
+               "Slide Id " <> tshow sldId <> " not found."
 
 slideNum :: PandocMonad m => Slide -> P m Int
 slideNum slide = getSlideIdNum $ slideId slide
@@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text
 slideToRelId slide = do
   n <- slideNum slide
   offset <- asks envSlideIdOffset
-  return $ "rId" <> T.pack (show $ n + offset)
+  return $ "rId" <> tshow (n + offset)
 
 
 data Relationship = Relationship { relId :: Int
@@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship
 elementToRel element
   | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing =
       do rId <- findAttr (QName "Id" Nothing Nothing) element
-         numStr <- stripPrefix "rId" rId
-         num <- case reads numStr :: [(Int, String)] of
-           (n, _) : _ -> Just n
-           []         -> Nothing
-         type' <- findAttrText (QName "Type" Nothing Nothing) element
+         numStr <- T.stripPrefix "rId" rId
+         num <- fromIntegral <$> readTextAsInteger numStr
+         type' <- findAttr (QName "Type" Nothing Nothing) element
          target <- findAttr (QName "Target" Nothing Nothing) element
-         return $ Relationship num type' target
+         return $ Relationship num type' (T.unpack target)
   | otherwise = Nothing
 
 slideToPresRel :: PandocMonad m => Slide -> P m Relationship
@@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry
 topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels
 
 relToElement :: Relationship -> Element
-relToElement rel = mknode "Relationship" [ ("Id", "rId" <>
-    show (relId rel))
-                                         , ("Type", T.unpack $ relType rel)
-                                         , ("Target", relTarget rel) ] ()
+relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel))
+                                         , ("Type", relType rel)
+                                         , ("Target", T.pack $ relTarget rel) ] ()
 
 relsToElement :: [Relationship] -> Element
 relsToElement rels = mknode "Relationships"
@@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do
       Just element | Just notesIdNum <- mbNotesIdNum ->
                        Just <$>
                        elemToEntry
-                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml")
+                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <>
+                        ".xml")
                        element
       _ -> return Nothing
 
@@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do
     [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
     [ mknode "Relationship" [ ("Id", "rId2")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                            , ("Target", "../slides/slide" <> show idNum <> ".xml")
+                            , ("Target", "../slides/slide" <> tshow idNum <> ".xml")
                             ] ()
     , mknode "Relationship" [ ("Id", "rId1")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
@@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element
 linkRelElement (rIdNum, InternalTarget targetId) = do
   targetIdNum <- getSlideIdNum targetId
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                          , ("Target", "slide" <> show targetIdNum <> ".xml")
+                          , ("Target", "slide" <> tshow targetIdNum <> ".xml")
                           ] ()
 linkRelElement (rIdNum, ExternalTarget (url, _)) =
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink")
-                          , ("Target", T.unpack url)
+                          , ("Target", url)
                           , ("TargetMode", "External")
                           ] ()
 
@@ -1567,10 +1571,10 @@ mediaRelElement mInfo =
   let ext = fromMaybe "" (mInfoExt mInfo)
   in
     mknode "Relationship" [ ("Id", "rId" <>
-      show (mInfoLocalId mInfo))
+      tshow (mInfoLocalId mInfo))
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image")
                           , ("Target", "../media/image" <>
-      show (mInfoGlobalId mInfo) <> T.unpack ext)
+      tshow (mInfoGlobalId mInfo) <> ext)
                           ] ()
 
 speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
@@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do
   return $ case M.lookup idNum mp of
     Nothing -> Nothing
     Just n ->
-      let target = "../notesSlides/notesSlide" <> show n <> ".xml"
+      let target = "../notesSlides/notesSlide" <> tshow n <> ".xml"
       in Just $
          mknode "Relationship" [ ("Id", "rId2")
                                , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide")
@@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do
 slideToSldIdElement :: PandocMonad m => Slide -> P m Element
 slideToSldIdElement slide = do
   n <- slideNum slide
-  let id' = show $ n + 255
+  let id' = tshow $ n + 255
   rId <- slideToRelId slide
-  return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] ()
+  return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] ()
 
 presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element
 presentationToSldIdLst (Presentation _ slides) = do
@@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do
       notesMasterElem =  mknode "p:notesMasterIdLst" []
                          [ mknode
                            "p:NotesMasterId"
-                           [("r:id", "rId" <> show notesMasterRId)]
+                           [("r:id", "rId" <> tshow notesMasterRId)]
                            ()
                          ]
 
@@ -1702,17 +1706,17 @@ docPropsElement docProps = do
     ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
     ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
     $
-      mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps)
+      mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps)
     :
-      mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps)
+      mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps)
     :
-      mknode "cp:keywords" [] (T.unpack keywords)
-    : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)])
-    <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)])
-    <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)])
+      mknode "cp:keywords" [] keywords
+    : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)])
+    <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)])
+    <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)])
     <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
-             , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-             ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+              , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+              ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
 
 docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
 docPropsToEntry docProps = docPropsElement docProps >>=
@@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
 docCustomPropsElement docProps = do
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
-         ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v)
+         ,("pid", tshow pid)
+         ,("name", k)] $ mknode "vt:lpwstr" [] v
   return $ mknode "Properties"
           [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
           ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
@@ -1743,7 +1747,7 @@ viewPropsElement = do
   distArchive <- asks envDistArchive
   viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml"
   -- remove  "lastView" if it exists:
-  let notLastView :: Text.XML.Light.Attr -> Bool
+  let notLastView :: XML.Attr -> Bool
       notLastView attr =
           qName (attrKey attr) /= "lastView"
   return $
@@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml"
 defaultContentTypeToElem :: DefaultContentType -> Element
 defaultContentTypeToElem dct =
   mknode "Default"
-  [("Extension", T.unpack $ defContentTypesExt dct),
-    ("ContentType", T.unpack $ defContentTypesType dct)]
+  [("Extension", defContentTypesExt dct),
+    ("ContentType", defContentTypesType dct)]
   ()
 
 overrideContentTypeToElem :: OverrideContentType -> Element
 overrideContentTypeToElem oct =
   mknode "Override"
-  [("PartName", overrideContentTypesPart oct),
-    ("ContentType", T.unpack $ overrideContentTypesType oct)]
+  [("PartName", T.pack $ overrideContentTypesPart oct),
+   ("ContentType", overrideContentTypesType oct)]
   ()
 
 contentTypesToElement :: ContentTypes -> Element
@@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath]
 getSpeakerNotesFilePaths = do
   mp <- asks envSpeakerNotesIdMap
   let notesIdNums = M.elems mp
-  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums
+  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml")
+               notesIdNums
 
 presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes
 presentationToContentTypes p@(Presentation _ slides) = do
@@ -1885,11 +1890,11 @@ getContentType fp
   | otherwise = Nothing
 
 -- Kept as String for XML.Light
-autoNumAttrs :: ListAttributes -> [(String, String)]
+autoNumAttrs :: ListAttributes -> [(Text, Text)]
 autoNumAttrs (startNum, numStyle, numDelim) =
   numAttr <> typeAttr
   where
-    numAttr = [("startAt", show startNum) | startNum /= 1]
+    numAttr = [("startAt", tshow startNum) | startNum /= 1]
     typeAttr = [("type", typeString <> delimString)]
     typeString = case numStyle of
       Decimal -> "arabic"
diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs
new file mode 100644
index 000000000..38e4df218
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light.hs
@@ -0,0 +1,586 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+xml-light, which we used in pandoc's the XML-based readers, has
+some limitations: in particular, it produces nodes with String
+instead of Text, and the parser falls over on processing instructions
+(see #7091).
+
+This module exports much of the API of xml-light, but using Text instead
+of String. In addition, the xml-light parsers are replaced by xml-conduit's
+well-tested parser.  (The xml-conduit types are mapped to types
+isomorphic to xml-light's, to avoid the need for massive code modifications
+elsewhere.)  Bridge functions to map xml-light types to this module's
+types are also provided (since libraries like texmath still use xml-light).
+
+Another advantage of the xml-conduit parser is that it gives us
+detailed information on xml parse errors.
+
+In the future we may want to move to using xml-conduit or another
+xml library in the code base, but this change gives us
+better performance and accuracy without much change in the
+code that used xml-light.
+-}
+module Text.Pandoc.XML.Light
+  ( -- * Basic types, duplicating those from xml-light but with Text
+    -- instead of String
+    Line
+  , Content(..)
+  , Element(..)
+  , Attr(..)
+  , CData(..)
+  , CDataKind(..)
+  , QName(..)
+  , Node(..)
+  , unode
+  , unqual
+  , add_attr
+  , add_attrs
+    -- * Conversion functions from xml-light types
+  , fromXLQName
+  , fromXLCData
+  , fromXLElement
+  , fromXLAttr
+  , fromXLContent
+    -- * Replacement for xml-light's Text.XML.Proc
+  , strContent
+  , onlyElems
+  , elChildren
+  , onlyText
+  , findChildren
+  , filterChildren
+  , filterChildrenName
+  , findChild
+  , filterChild
+  , filterChildName
+  , findElement
+  , filterElement
+  , filterElementName
+  , findElements
+  , filterElements
+  , filterElementsName
+  , findAttr
+  , lookupAttr
+  , lookupAttrBy
+  , findAttrBy
+    -- * Replacement for xml-light's Text.XML.Output
+  , ppTopElement
+  , ppElement
+  , ppContent
+  , ppcElement
+  , ppcContent
+  , showTopElement
+  , showElement
+  , showContent
+  , useShortEmptyTags
+  , defaultConfigPP
+  , ConfigPP(..)
+    -- * Replacement for xml-light's Text.XML.Input
+  , parseXMLElement
+  , parseXMLContents
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import qualified Data.Map as M
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import Data.Maybe (mapMaybe, listToMaybe)
+import Data.List(find)
+import qualified Text.XML.Light as XL
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Attr (nameToQname n) v) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing ->
+        case T.stripPrefix "xmlns:" localName of
+          Just rest -> QName rest mbns (Just "xmlns")
+          Nothing   -> QName localName mbns mbpref
+      _ -> QName localName mbns mbpref
+
+nodeToContent :: Conduit.Node -> Maybe Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Text (CData CDataText t Nothing))
+nodeToContent _ = Nothing
+
+unqual :: Text -> QName
+unqual x = QName x Nothing Nothing
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+--
+-- type definitions lightly modified from xml-light
+--
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef Text
+                deriving (Show, Typeable, Data)
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: Text
+                } deriving (Eq, Ord, Show, Typeable, Data)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: Text,
+                  cdLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Show, Typeable, Data )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: Text,
+                  qURI    :: Maybe Text,
+                  qPrefix :: Maybe Text
+                } deriving (Show, Typeable, Data)
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = Element { elName     = n
+                                , elAttribs  = attrs
+                                , elContent  = cont
+                                , elLine     = Nothing
+                                }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node [Element]          where node n es     = node n ([]::[Attr],es)
+instance Node Element            where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],Text)      where
+  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
+                                    , cdData = t
+                                    , cdLine = Nothing })
+
+instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
+instance Node Text               where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => Text -> t -> Element
+unode = node . unqual
+
+--
+-- conversion from xml-light
+--
+
+fromXLQName :: XL.QName -> QName
+fromXLQName qn = QName { qName = T.pack $ XL.qName qn
+                       , qURI = T.pack <$> XL.qURI qn
+                       , qPrefix = T.pack <$> XL.qPrefix qn }
+
+fromXLCData :: XL.CData -> CData
+fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
+                                        XL.CDataText -> CDataText
+                                        XL.CDataVerbatim -> CDataVerbatim
+                                        XL.CDataRaw -> CDataRaw
+                       , cdData = T.pack $ XL.cdData cd
+                       , cdLine = XL.cdLine cd }
+
+fromXLElement :: XL.Element -> Element
+fromXLElement el = Element { elName = fromXLQName $ XL.elName el
+                           , elAttribs = map fromXLAttr $ XL.elAttribs el
+                           , elContent = map fromXLContent $ XL.elContent el
+                           , elLine = XL.elLine el }
+
+fromXLAttr :: XL.Attr -> Attr
+fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
+
+fromXLContent :: XL.Content -> Content
+fromXLContent (XL.Elem el) = Elem $ fromXLElement el
+fromXLContent (XL.Text cd) = Text $ fromXLCData cd
+fromXLContent (XL.CRef s)  = CRef (T.pack s)
+
+--
+-- copied from xml-light Text.XML.Proc
+--
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> Text
+strContent          = mconcat . map cdData . onlyText . elContent
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr          :: QName -> Element -> Maybe Text
+findAttr x e       = lookupAttr x (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr        :: QName -> [Attr] -> Maybe Text
+lookupAttr x       = lookupAttrBy (x ==)
+
+-- | Lookup the first attribute whose name satisfies the given predicate.
+lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
+lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
+
+-- | Lookup the value of the first attribute whose name
+-- satisfies the given predicate.
+findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
+findAttrBy p e      = lookupAttrBy p (elAttribs e)
+
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+  { shortEmptyTag :: QName -> Bool
+  , prettify      :: Bool
+  }
+
+-- | Default pretty orinting configuration.
+--  * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+                           , prettify      = False
+                           }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />.  This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c  = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP     :: ConfigPP
+prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> Text
+ppTopElement        = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement          :: Element -> Text
+ppElement           = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent          :: Content -> Text
+ppContent           = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement      :: ConfigPP -> Element -> Text
+ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement         :: ConfigPP -> Element -> Text
+ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent         :: ConfigPP -> Content -> Text
+ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData           :: ConfigPP -> CData -> Text
+ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS         :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+                     Elem e -> ppElementS c i e
+                     Text t -> ppCDataS c i t
+                     CRef r -> showCRefS r
+
+ppElementS         :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+  (case elContent e of
+    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+       | shortEmptyTag c name  -> fromText " />"
+    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+    cs -> singleton '>' <> nl <>
+          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+          i <> tagEnd name
+      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
+  )
+  where name = elName e
+
+ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
+                             then showCDataS t
+                             else foldr cons mempty (T.unpack (showCData t))
+  where cons         :: Char -> Builder -> Builder
+        cons '\n' ys  = singleton '\n' <> i <> ys
+        cons y ys     = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> Text
+showTopElement c    = xmlHeader <> showElement c
+
+showContent        :: Content -> Text
+showContent         = ppcContent defaultConfigPP
+
+showElement        :: Element -> Text
+showElement         = ppcElement defaultConfigPP
+
+showCData          :: CData -> Text
+showCData           = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: Text -> Builder
+showCRefS r         = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+                    fromText "]]>"
+   CDataRaw      -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData           :: Text -> Builder
+escCData t
+  | "]]>" `T.isPrefixOf` t =
+     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+  = case T.uncons t of
+      Nothing     -> mempty
+      Just (c,t') -> singleton c <> escCData t'
+
+escChar            :: Char -> Builder
+escChar c = case c of
+  '<'   -> fromText "&lt;"
+  '>'   -> fromText "&gt;"
+  '&'   -> fromText "&amp;"
+  '"'   -> fromText "&quot;"
+  -- we use &#39 instead of &apos; because IE apparently has difficulties
+  -- rendering &apos; in xhtml.
+  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+  '\''  -> fromText "&#39;"
+  _     -> singleton c
+
+  {- original xml-light version:
+  -- NOTE: We escape '\r' explicitly because otherwise they get lost
+  -- when parsed back in because of then end-of-line normalization rules.
+  _ | isPrint c || c == '\n' -> singleton c
+    | otherwise -> showText "&#" . showsT oc . singleton ';'
+      where oc = ord c
+  -}
+
+escStr             :: Text -> Builder
+escStr cs          = if T.any needsEscape cs
+                        then mconcat (map escChar (T.unpack cs))
+                        else fromText cs
+ where
+  needsEscape '<' = True
+  needsEscape '>' = True
+  needsEscape '&' = True
+  needsEscape '"' = True
+  needsEscape '\'' = True
+  needsEscape _ = False
+
+tagEnd             :: QName -> Builder
+tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart           :: QName -> [Attr] -> Builder
+tagStart qn as      = singleton '<' <> showQName qn <> as_str
+ where as_str       = if null as
+                         then mempty
+                         else mconcat (map showAttr as)
+
+showAttr           :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+                       singleton '=' <>
+                       singleton '"' <> escStr v <> singleton '"'
+
+showQName          :: QName -> Builder
+showQName q         =
+  case qPrefix q of
+    Nothing -> fromText (qName q)
+    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
deleted file mode 100644
index 8ad22a66a..000000000
--- a/src/Text/Pandoc/XMLParser.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.XMLParser
-   Copyright   : Copyright (C) 2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Bridge to allow using xml-conduit's parser with xml-light's types.
--}
-module Text.Pandoc.XMLParser
-  ( parseXMLElement
-  , parseXMLContents
-  , module Text.XML.Light.Types
-  ) where
-
-import qualified Control.Exception as E
-import qualified Text.XML as Conduit
-import Text.XML.Unresolved (InvalidEventStream(..))
-import qualified Text.XML.Light as Light
-import Text.XML.Light.Types
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Map as M
-import Data.Maybe (mapMaybe)
-
--- Drop in replacement for parseXMLDoc in xml-light.
-parseXMLElement :: TL.Text -> Either T.Text Light.Element
-parseXMLElement t =
-  elementToElement .  Conduit.documentRoot <$>
-    either (Left . T.pack . E.displayException) Right
-    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
-
-parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
-parseXMLContents t =
-  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
-    Left e ->
-      case E.fromException e of
-        Just (ContentAfterRoot _) ->
-          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
-        _ -> Left . T.pack . E.displayException $ e
-    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
-
-elementToElement :: Conduit.Element -> Light.Element
-elementToElement (Conduit.Element name attribMap nodes) =
-  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
- where
-  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
-              M.toList attribMap
-  nameToQname (Conduit.Name localName mbns mbpref) =
-    case mbpref of
-      Nothing | "xmlns:" `T.isPrefixOf` localName ->
-           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
-                       (Just "xmlns")
-      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
-                       (T.unpack <$> mbpref)
-
-nodeToContent :: Conduit.Node -> Maybe Light.Content
-nodeToContent (Conduit.NodeElement el) =
-  Just (Light.Elem (elementToElement el))
-nodeToContent (Conduit.NodeContent t) =
-  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
-nodeToContent _ = Nothing
-
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index 376f02c55..c1e47622d 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -43,7 +43,8 @@ compareXMLBool _ _ = False
 
 displayDiff :: Content -> Content -> String
 displayDiff elemA elemB =
-  showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB)
+  showDiff (1,1)
+    (getDiff (lines $ showContent elemA) (lines $ showContent elemB))
 
 goldenArchive :: FilePath -> IO Archive
 goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index d3b16d0f2..ed7d1165c 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 6293ef493..07ae75676 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index 4205a1516..e5f034378 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index adb3f23db..174942135 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 92c8137fe..b5c31a851 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index f53470617..c42ca1b05 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index d6af90a72..1cb4c1fd7 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index e18dbe853..7122456ea 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 820299043..616ba0f81 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index ae0f41d12..c30dcdee9 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 94cd35dfa..8a704b41e 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 879f2a25b..b1906c8c4 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 93f86478f..8adf1cf75 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 967d297f2..584117503 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index c5614e2fa..8859fe55c 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 0f39a831f..b80f3b3ba 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 07046f223..35beed68a 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3656618e6..2c29fd674 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 8798253d5..10a948886 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 0a24d1840..5b90e74a0 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 52bb7a217..cc81b46d1 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 182c06c64..1394dc442 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 7b69a56a3..0d1688694 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 3d3a35701..be1caef2d 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 5ae37b406..a1d2323c2 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index c29aa6716..2f3a831a7 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 664493246..af066107c 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index b6d15340e..9cc7a075f 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f8e1092d2..f8b8dcfde 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index b4cda82f2..1c3baf0bf 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index ee222efa0..28686970d 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c6f8d9c96..7051cefbd 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index ea8146690..9df631640 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index 58070eb3f..5e9c2c630 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index db9b7e371..15232c32d 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index c7b1ed7d5..aab0cc6f5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 6944d92bf..fe5b675f3 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index ae0d28429..de5e68151 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 37c74c69a..89e6fbdf2 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 324e443a1..6bcbd1b9c 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index c81b983e3..79d48560b 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index 30ce33db6..9d46036fe 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index d6c604968..54ec7f305 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index 000e17ecd..a028b346f 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index fdcd2e29b..1158c16fc 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index e73126376..670a825de 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index e3f968e9e..6d297ef11 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index eadb9372e..473b9498d 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 8ca6bab2b..2cdf54474 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index ae188ee68..ffc2eb9f7 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 60301fa50..676954cb8 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 17124a50d..29164af15 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index 19ae7dd4e..1742b3296 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index b650b7585..c6df8e18e 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 0ab029614..cf6e52eef 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index 2a6e35080..e06d9079d 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a7bcf6a4b..449339778 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 21b018c25..258098082 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index 4ec4772a4..2f0213919 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index 5983657b6..9dbfa41a0 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index dd54c7082..f288dde14 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index b3e5ed5b9..0ab1302da 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 1078854bb..6343bffe4 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 5116c6c4e..5d4465f64 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index 0c8e49bd9..d581681aa 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 68695939d..c922df3a8 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 7ed9b946d..13f564bf0 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 79fc82345..f0b302738 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 9f943c279..2d8d00242 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index ac72d8ced..4320128b3 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 15c7b5469..0a3f6e56d 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 926c5e699..e41219844 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index a37e72d2c..82b8aa13d 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 7f86533fe..270a7eeac 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 89e3db0ab..44985d701 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From d7a4996b1e7e201095ce792375a0776984fa8fcd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Feb 2021 18:40:06 -0800
Subject: Split up T.P.XML.Light into submodules.

---
 pandoc.cabal                        |   3 +
 src/Text/Pandoc/XML/Light.hs        | 511 +-----------------------------------
 src/Text/Pandoc/XML/Light/Output.hs | 230 ++++++++++++++++
 src/Text/Pandoc/XML/Light/Proc.hs   | 138 ++++++++++
 src/Text/Pandoc/XML/Light/Types.hs  | 190 ++++++++++++++
 5 files changed, 568 insertions(+), 504 deletions(-)
 create mode 100644 src/Text/Pandoc/XML/Light/Output.hs
 create mode 100644 src/Text/Pandoc/XML/Light/Proc.hs
 create mode 100644 src/Text/Pandoc/XML/Light/Types.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 22aebd55e..d27520ba0 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -688,6 +688,9 @@ library
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
                    Text.Pandoc.XML.Light,
+                   Text.Pandoc.XML.Light.Types,
+                   Text.Pandoc.XML.Light.Proc,
+                   Text.Pandoc.XML.Light.Output,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs
index 38e4df218..07113ea92 100644
--- a/src/Text/Pandoc/XML/Light.hs
+++ b/src/Text/Pandoc/XML/Light.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE DeriveDataTypeable #-}
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.XML.Light
@@ -31,59 +30,9 @@ better performance and accuracy without much change in the
 code that used xml-light.
 -}
 module Text.Pandoc.XML.Light
-  ( -- * Basic types, duplicating those from xml-light but with Text
-    -- instead of String
-    Line
-  , Content(..)
-  , Element(..)
-  , Attr(..)
-  , CData(..)
-  , CDataKind(..)
-  , QName(..)
-  , Node(..)
-  , unode
-  , unqual
-  , add_attr
-  , add_attrs
-    -- * Conversion functions from xml-light types
-  , fromXLQName
-  , fromXLCData
-  , fromXLElement
-  , fromXLAttr
-  , fromXLContent
-    -- * Replacement for xml-light's Text.XML.Proc
-  , strContent
-  , onlyElems
-  , elChildren
-  , onlyText
-  , findChildren
-  , filterChildren
-  , filterChildrenName
-  , findChild
-  , filterChild
-  , filterChildName
-  , findElement
-  , filterElement
-  , filterElementName
-  , findElements
-  , filterElements
-  , filterElementsName
-  , findAttr
-  , lookupAttr
-  , lookupAttrBy
-  , findAttrBy
-    -- * Replacement for xml-light's Text.XML.Output
-  , ppTopElement
-  , ppElement
-  , ppContent
-  , ppcElement
-  , ppcContent
-  , showTopElement
-  , showElement
-  , showContent
-  , useShortEmptyTags
-  , defaultConfigPP
-  , ConfigPP(..)
+  ( module Text.Pandoc.XML.Light.Types
+  , module Text.Pandoc.XML.Light.Proc
+  , module Text.Pandoc.XML.Light.Output
     -- * Replacement for xml-light's Text.XML.Input
   , parseXMLElement
   , parseXMLContents
@@ -92,16 +41,13 @@ module Text.Pandoc.XML.Light
 import qualified Control.Exception as E
 import qualified Text.XML as Conduit
 import Text.XML.Unresolved (InvalidEventStream(..))
-import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
-import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
 import qualified Data.Map as M
-import Data.Data (Data)
-import Data.Typeable (Typeable)
-import Data.Maybe (mapMaybe, listToMaybe)
-import Data.List(find)
-import qualified Text.XML.Light as XL
+import Data.Maybe (mapMaybe)
+import Text.Pandoc.XML.Light.Types
+import Text.Pandoc.XML.Light.Proc
+import Text.Pandoc.XML.Light.Output
 
 -- Drop in replacement for parseXMLDoc in xml-light.
 parseXMLElement :: TL.Text -> Either T.Text Element
@@ -141,446 +87,3 @@ nodeToContent (Conduit.NodeContent t) =
   Just (Text (CData CDataText t Nothing))
 nodeToContent _ = Nothing
 
-unqual :: Text -> QName
-unqual x = QName x Nothing Nothing
-
--- | Add an attribute to an element.
-add_attr :: Attr -> Element -> Element
-add_attr a e = add_attrs [a] e
-
--- | Add some attributes to an element.
-add_attrs :: [Attr] -> Element -> Element
-add_attrs as e = e { elAttribs = as ++ elAttribs e }
-
---
--- type definitions lightly modified from xml-light
---
-
--- | A line is an Integer
-type Line     = Integer
-
--- | XML content
-data Content  = Elem Element
-              | Text CData
-              | CRef Text
-                deriving (Show, Typeable, Data)
-
--- | XML elements
-data Element  = Element {
-                  elName      :: QName,
-                  elAttribs   :: [Attr],
-                  elContent   :: [Content],
-                  elLine      :: Maybe Line
-                } deriving (Show, Typeable, Data)
-
--- | XML attributes
-data Attr     = Attr {
-                  attrKey :: QName,
-                  attrVal :: Text
-                } deriving (Eq, Ord, Show, Typeable, Data)
-
--- | XML CData
-data CData    = CData {
-                  cdVerbatim  :: CDataKind,
-                  cdData      :: Text,
-                  cdLine      :: Maybe Line
-                } deriving (Show, Typeable, Data)
-
-data CDataKind
- = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
- | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
- | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
-   deriving ( Eq, Show, Typeable, Data )
-
--- | XML qualified names
-data QName    = QName {
-                  qName   :: Text,
-                  qURI    :: Maybe Text,
-                  qPrefix :: Maybe Text
-                } deriving (Show, Typeable, Data)
-
-
-instance Eq QName where
-  q1 == q2  = compare q1 q2 == EQ
-
-instance Ord QName where
-  compare q1 q2 =
-    case compare (qName q1) (qName q2) of
-      EQ  -> case (qURI q1, qURI q2) of
-               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
-               (u1,u2)           -> compare u1 u2
-      x   -> x
-
-class Node t where
- node :: QName -> t -> Element
-
-instance Node ([Attr],[Content]) where
-  node n (attrs,cont) = Element { elName     = n
-                                , elAttribs  = attrs
-                                , elContent  = cont
-                                , elLine     = Nothing
-                                }
-
-instance Node [Attr]             where node n as   = node n (as,[]::[Content])
-instance Node Attr               where node n a    = node n [a]
-instance Node ()                 where node n ()   = node n ([]::[Attr])
-
-instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
-instance Node Content            where node n c      = node n [c]
-instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
-instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
-
-instance Node ([Attr],[Element]) where
-  node n (as,cs) = node n (as,map Elem cs)
-
-instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
-instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
-instance Node [Element]          where node n es     = node n ([]::[Attr],es)
-instance Node Element            where node n e      = node n [e]
-
-instance Node ([Attr],[CData])   where
-  node n (as,cs) = node n (as,map Text cs)
-
-instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
-instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
-instance Node [CData]            where node n es     = node n ([]::[Attr],es)
-instance Node CData              where node n e      = node n [e]
-
-instance Node ([Attr],Text)      where
-  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
-                                    , cdData = t
-                                    , cdLine = Nothing })
-
-instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
-instance Node Text               where node n t      = node n ([]::[Attr],t)
-
--- | Create node with unqualified name
-unode :: Node t => Text -> t -> Element
-unode = node . unqual
-
---
--- conversion from xml-light
---
-
-fromXLQName :: XL.QName -> QName
-fromXLQName qn = QName { qName = T.pack $ XL.qName qn
-                       , qURI = T.pack <$> XL.qURI qn
-                       , qPrefix = T.pack <$> XL.qPrefix qn }
-
-fromXLCData :: XL.CData -> CData
-fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
-                                        XL.CDataText -> CDataText
-                                        XL.CDataVerbatim -> CDataVerbatim
-                                        XL.CDataRaw -> CDataRaw
-                       , cdData = T.pack $ XL.cdData cd
-                       , cdLine = XL.cdLine cd }
-
-fromXLElement :: XL.Element -> Element
-fromXLElement el = Element { elName = fromXLQName $ XL.elName el
-                           , elAttribs = map fromXLAttr $ XL.elAttribs el
-                           , elContent = map fromXLContent $ XL.elContent el
-                           , elLine = XL.elLine el }
-
-fromXLAttr :: XL.Attr -> Attr
-fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
-
-fromXLContent :: XL.Content -> Content
-fromXLContent (XL.Elem el) = Elem $ fromXLElement el
-fromXLContent (XL.Text cd) = Text $ fromXLCData cd
-fromXLContent (XL.CRef s)  = CRef (T.pack s)
-
---
--- copied from xml-light Text.XML.Proc
---
-
--- | Get the text value of an XML element.  This function
--- ignores non-text elements, and concatenates all text elements.
-strContent         :: Element -> Text
-strContent          = mconcat . map cdData . onlyText . elContent
-
--- | Select only the elements from a list of XML content.
-onlyElems          :: [Content] -> [Element]
-onlyElems xs        = [ x | Elem x <- xs ]
-
--- | Select only the elements from a parent.
-elChildren         :: Element -> [Element]
-elChildren e        = [ x | Elem x <- elContent e ]
-
--- | Select only the text from a list of XML content.
-onlyText           :: [Content] -> [CData]
-onlyText xs         = [ x | Text x <- xs ]
-
--- | Find all immediate children with the given name.
-findChildren       :: QName -> Element -> [Element]
-findChildren q e    = filterChildren ((q ==) . elName) e
-
--- | Filter all immediate children wrt a given predicate.
-filterChildren       :: (Element -> Bool) -> Element -> [Element]
-filterChildren p e    = filter p (onlyElems (elContent e))
-
-
--- | Filter all immediate children wrt a given predicate over their names.
-filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
-filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
-
-
--- | Find an immediate child with the given name.
-findChild          :: QName -> Element -> Maybe Element
-findChild q e       = listToMaybe (findChildren q e)
-
--- | Find an immediate child with the given name.
-filterChild          :: (Element -> Bool) -> Element -> Maybe Element
-filterChild p e       = listToMaybe (filterChildren p e)
-
--- | Find an immediate child with name matching a predicate.
-filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
-filterChildName p e   = listToMaybe (filterChildrenName p e)
-
--- | Find the left-most occurrence of an element matching given name.
-findElement        :: QName -> Element -> Maybe Element
-findElement q e     = listToMaybe (findElements q e)
-
--- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElement        :: (Element -> Bool) -> Element -> Maybe Element
-filterElement p e     = listToMaybe (filterElements p e)
-
--- | Filter the left-most occurrence of an element wrt. given predicate.
-filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
-filterElementName p e  = listToMaybe (filterElementsName p e)
-
--- | Find all non-nested occurances of an element.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-findElements       :: QName -> Element -> [Element]
-findElements qn e = filterElementsName (qn==) e
-
--- | Find all non-nested occurrences of an element wrt. given predicate.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-filterElements       :: (Element -> Bool) -> Element -> [Element]
-filterElements p e
- | p e        = [e]
- | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
-
--- | Find all non-nested occurences of an element wrt a predicate over element names.
--- (i.e., once we have found an element, we do not search
--- for more occurances among the element's children).
-filterElementsName       :: (QName -> Bool) -> Element -> [Element]
-filterElementsName p e = filterElements (p.elName) e
-
--- | Lookup the value of an attribute.
-findAttr          :: QName -> Element -> Maybe Text
-findAttr x e       = lookupAttr x (elAttribs e)
-
--- | Lookup attribute name from list.
-lookupAttr        :: QName -> [Attr] -> Maybe Text
-lookupAttr x       = lookupAttrBy (x ==)
-
--- | Lookup the first attribute whose name satisfies the given predicate.
-lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
-lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
-
--- | Lookup the value of the first attribute whose name
--- satisfies the given predicate.
-findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
-findAttrBy p e      = lookupAttrBy p (elAttribs e)
-
-
---
--- duplicates functinos from Text.XML.Output
---
-
--- | The XML 1.0 header
-xmlHeader :: Text
-xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
-
-
---------------------------------------------------------------------------------
-data ConfigPP = ConfigPP
-  { shortEmptyTag :: QName -> Bool
-  , prettify      :: Bool
-  }
-
--- | Default pretty orinting configuration.
---  * Always use abbreviate empty tags.
-defaultConfigPP :: ConfigPP
-defaultConfigPP = ConfigPP { shortEmptyTag = const True
-                           , prettify      = False
-                           }
-
--- | The predicate specifies for which empty tags we should use XML's
--- abbreviated notation <TAG />.  This is useful if we are working with
--- some XML-ish standards (such as certain versions of HTML) where some
--- empty tags should always be displayed in the <TAG></TAG> form.
-useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
-useShortEmptyTags p c = c { shortEmptyTag = p }
-
-
--- | Specify if we should use extra white-space to make document more readable.
--- WARNING: This adds additional white-space to text elements,
--- and so it may change the meaning of the document.
-useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
-useExtraWhiteSpace p c  = c { prettify = p }
-
--- | A configuration that tries to make things pretty
--- (possibly at the cost of changing the semantics a bit
--- through adding white space.)
-prettyConfigPP     :: ConfigPP
-prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
-
-
---------------------------------------------------------------------------------
-
-
--- | Pretty printing renders XML documents faithfully,
--- with the exception that whitespace may be added\/removed
--- in non-verbatim character data.
-ppTopElement       :: Element -> Text
-ppTopElement        = ppcTopElement prettyConfigPP
-
--- | Pretty printing elements
-ppElement          :: Element -> Text
-ppElement           = ppcElement prettyConfigPP
-
--- | Pretty printing content
-ppContent          :: Content -> Text
-ppContent           = ppcContent prettyConfigPP
-
--- | Pretty printing renders XML documents faithfully,
--- with the exception that whitespace may be added\/removed
--- in non-verbatim character data.
-ppcTopElement      :: ConfigPP -> Element -> Text
-ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
-
--- | Pretty printing elements
-ppcElement         :: ConfigPP -> Element -> Text
-ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
-
--- | Pretty printing content
-ppcContent         :: ConfigPP -> Content -> Text
-ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
-
-ppcCData           :: ConfigPP -> CData -> Text
-ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
-
-type Indent = Builder
-
--- | Pretty printing content using ShowT
-ppContentS         :: ConfigPP -> Indent -> Content -> Builder
-ppContentS c i x = case x of
-                     Elem e -> ppElementS c i e
-                     Text t -> ppCDataS c i t
-                     CRef r -> showCRefS r
-
-ppElementS         :: ConfigPP -> Indent -> Element -> Builder
-ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
-  (case elContent e of
-    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
-       | shortEmptyTag c name  -> fromText " />"
-    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
-    cs -> singleton '>' <> nl <>
-          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
-          i <> tagEnd name
-      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
-  )
-  where name = elName e
-
-ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
-ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
-                             then showCDataS t
-                             else foldr cons mempty (T.unpack (showCData t))
-  where cons         :: Char -> Builder -> Builder
-        cons '\n' ys  = singleton '\n' <> i <> ys
-        cons y ys     = singleton y <> ys
-
-
-
---------------------------------------------------------------------------------
-
--- | Adds the <?xml?> header.
-showTopElement     :: Element -> Text
-showTopElement c    = xmlHeader <> showElement c
-
-showContent        :: Content -> Text
-showContent         = ppcContent defaultConfigPP
-
-showElement        :: Element -> Text
-showElement         = ppcElement defaultConfigPP
-
-showCData          :: CData -> Text
-showCData           = ppcCData defaultConfigPP
-
--- Note: crefs should not contain '&', ';', etc.
-showCRefS          :: Text -> Builder
-showCRefS r         = singleton '&' <> fromText r <> singleton ';'
-
--- | Convert a text element to characters.
-showCDataS         :: CData -> Builder
-showCDataS cd =
- case cdVerbatim cd of
-   CDataText     -> escStr (cdData cd)
-   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
-                    fromText "]]>"
-   CDataRaw      -> fromText (cdData cd)
-
---------------------------------------------------------------------------------
-escCData           :: Text -> Builder
-escCData t
-  | "]]>" `T.isPrefixOf` t =
-     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
-escCData t
-  = case T.uncons t of
-      Nothing     -> mempty
-      Just (c,t') -> singleton c <> escCData t'
-
-escChar            :: Char -> Builder
-escChar c = case c of
-  '<'   -> fromText "&lt;"
-  '>'   -> fromText "&gt;"
-  '&'   -> fromText "&amp;"
-  '"'   -> fromText "&quot;"
-  -- we use &#39 instead of &apos; because IE apparently has difficulties
-  -- rendering &apos; in xhtml.
-  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
-  '\''  -> fromText "&#39;"
-  _     -> singleton c
-
-  {- original xml-light version:
-  -- NOTE: We escape '\r' explicitly because otherwise they get lost
-  -- when parsed back in because of then end-of-line normalization rules.
-  _ | isPrint c || c == '\n' -> singleton c
-    | otherwise -> showText "&#" . showsT oc . singleton ';'
-      where oc = ord c
-  -}
-
-escStr             :: Text -> Builder
-escStr cs          = if T.any needsEscape cs
-                        then mconcat (map escChar (T.unpack cs))
-                        else fromText cs
- where
-  needsEscape '<' = True
-  needsEscape '>' = True
-  needsEscape '&' = True
-  needsEscape '"' = True
-  needsEscape '\'' = True
-  needsEscape _ = False
-
-tagEnd             :: QName -> Builder
-tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
-
-tagStart           :: QName -> [Attr] -> Builder
-tagStart qn as      = singleton '<' <> showQName qn <> as_str
- where as_str       = if null as
-                         then mempty
-                         else mconcat (map showAttr as)
-
-showAttr           :: Attr -> Builder
-showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
-                       singleton '=' <>
-                       singleton '"' <> escStr v <> singleton '"'
-
-showQName          :: QName -> Builder
-showQName q         =
-  case qPrefix q of
-    Nothing -> fromText (qName q)
-    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs
new file mode 100644
index 000000000..dc94ce914
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Output.hs
@@ -0,0 +1,230 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Output
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.XML.Light.Output
+  ( -- * Replacement for xml-light's Text.XML.Output
+    ppTopElement
+  , ppElement
+  , ppContent
+  , ppcElement
+  , ppcContent
+  , showTopElement
+  , showElement
+  , showContent
+  , useShortEmptyTags
+  , defaultConfigPP
+  , ConfigPP(..)
+  ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import Text.Pandoc.XML.Light.Types
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+  { shortEmptyTag :: QName -> Bool
+  , prettify      :: Bool
+  }
+
+-- | Default pretty orinting configuration.
+--  * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+                           , prettify      = False
+                           }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />.  This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c  = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP     :: ConfigPP
+prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> Text
+ppTopElement        = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement          :: Element -> Text
+ppElement           = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent          :: Content -> Text
+ppContent           = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement      :: ConfigPP -> Element -> Text
+ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement         :: ConfigPP -> Element -> Text
+ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent         :: ConfigPP -> Content -> Text
+ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData           :: ConfigPP -> CData -> Text
+ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS         :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+                     Elem e -> ppElementS c i e
+                     Text t -> ppCDataS c i t
+                     CRef r -> showCRefS r
+
+ppElementS         :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+  (case elContent e of
+    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+       | shortEmptyTag c name  -> fromText " />"
+    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+    cs -> singleton '>' <> nl <>
+          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+          i <> tagEnd name
+      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
+  )
+  where name = elName e
+
+ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
+                             then showCDataS t
+                             else foldr cons mempty (T.unpack (showCData t))
+  where cons         :: Char -> Builder -> Builder
+        cons '\n' ys  = singleton '\n' <> i <> ys
+        cons y ys     = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> Text
+showTopElement c    = xmlHeader <> showElement c
+
+showContent        :: Content -> Text
+showContent         = ppcContent defaultConfigPP
+
+showElement        :: Element -> Text
+showElement         = ppcElement defaultConfigPP
+
+showCData          :: CData -> Text
+showCData           = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: Text -> Builder
+showCRefS r         = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+                    fromText "]]>"
+   CDataRaw      -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData           :: Text -> Builder
+escCData t
+  | "]]>" `T.isPrefixOf` t =
+     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+  = case T.uncons t of
+      Nothing     -> mempty
+      Just (c,t') -> singleton c <> escCData t'
+
+escChar            :: Char -> Builder
+escChar c = case c of
+  '<'   -> fromText "&lt;"
+  '>'   -> fromText "&gt;"
+  '&'   -> fromText "&amp;"
+  '"'   -> fromText "&quot;"
+  -- we use &#39 instead of &apos; because IE apparently has difficulties
+  -- rendering &apos; in xhtml.
+  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+  '\''  -> fromText "&#39;"
+  _     -> singleton c
+
+  {- original xml-light version:
+  -- NOTE: We escape '\r' explicitly because otherwise they get lost
+  -- when parsed back in because of then end-of-line normalization rules.
+  _ | isPrint c || c == '\n' -> singleton c
+    | otherwise -> showText "&#" . showsT oc . singleton ';'
+      where oc = ord c
+  -}
+
+escStr             :: Text -> Builder
+escStr cs          = if T.any needsEscape cs
+                        then mconcat (map escChar (T.unpack cs))
+                        else fromText cs
+ where
+  needsEscape '<' = True
+  needsEscape '>' = True
+  needsEscape '&' = True
+  needsEscape '"' = True
+  needsEscape '\'' = True
+  needsEscape _ = False
+
+tagEnd             :: QName -> Builder
+tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart           :: QName -> [Attr] -> Builder
+tagStart qn as      = singleton '<' <> showQName qn <> as_str
+ where as_str       = if null as
+                         then mempty
+                         else mconcat (map showAttr as)
+
+showAttr           :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+                       singleton '=' <>
+                       singleton '"' <> escStr v <> singleton '"'
+
+showQName          :: QName -> Builder
+showQName q         =
+  case qPrefix q of
+    Nothing -> fromText (qName q)
+    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs
new file mode 100644
index 000000000..838d5af74
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Proc.hs
@@ -0,0 +1,138 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Proc
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.XML.Light.Proc
+  ( 
+    -- * Replacement for xml-light's Text.XML.Proc
+    strContent
+  , onlyElems
+  , elChildren
+  , onlyText
+  , findChildren
+  , filterChildren
+  , filterChildrenName
+  , findChild
+  , filterChild
+  , filterChildName
+  , findElement
+  , filterElement
+  , filterElementName
+  , findElements
+  , filterElements
+  , filterElementsName
+  , findAttr
+  , lookupAttr
+  , lookupAttrBy
+  , findAttrBy
+  ) where
+
+import Data.Text (Text)
+import Data.Maybe (listToMaybe)
+import Data.List(find)
+import Text.Pandoc.XML.Light.Types
+
+--
+-- copied from xml-light Text.XML.Proc
+--
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> Text
+strContent          = mconcat . map cdData . onlyText . elContent
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr          :: QName -> Element -> Maybe Text
+findAttr x e       = lookupAttr x (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr        :: QName -> [Attr] -> Maybe Text
+lookupAttr x       = lookupAttrBy (x ==)
+
+-- | Lookup the first attribute whose name satisfies the given predicate.
+lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
+lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
+
+-- | Lookup the value of the first attribute whose name
+-- satisfies the given predicate.
+findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
+findAttrBy p e      = lookupAttrBy p (elAttribs e)
+
+
diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs
new file mode 100644
index 000000000..f338da6ea
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Types.hs
@@ -0,0 +1,190 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Types
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+-}
+module Text.Pandoc.XML.Light.Types
+  ( -- * Basic types, duplicating those from xml-light but with Text
+    -- instead of String
+    Line
+  , Content(..)
+  , Element(..)
+  , Attr(..)
+  , CData(..)
+  , CDataKind(..)
+  , QName(..)
+  , Node(..)
+  , unode
+  , unqual
+  , add_attr
+  , add_attrs
+  -- * Conversion functions from xml-light types
+  , fromXLQName
+  , fromXLCData
+  , fromXLElement
+  , fromXLAttr
+  , fromXLContent
+  ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import qualified Text.XML.Light as XL
+
+--
+-- type definitions lightly modified from xml-light
+--
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef Text
+                deriving (Show, Typeable, Data)
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: Text
+                } deriving (Eq, Ord, Show, Typeable, Data)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: Text,
+                  cdLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Show, Typeable, Data )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: Text,
+                  qURI    :: Maybe Text,
+                  qPrefix :: Maybe Text
+                } deriving (Show, Typeable, Data)
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = Element { elName     = n
+                                , elAttribs  = attrs
+                                , elContent  = cont
+                                , elLine     = Nothing
+                                }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node [Element]          where node n es     = node n ([]::[Attr],es)
+instance Node Element            where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],Text)      where
+  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
+                                    , cdData = t
+                                    , cdLine = Nothing })
+
+instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
+instance Node Text               where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => Text -> t -> Element
+unode = node . unqual
+
+unqual :: Text -> QName
+unqual x = QName x Nothing Nothing
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+--
+-- conversion from xml-light
+--
+
+fromXLQName :: XL.QName -> QName
+fromXLQName qn = QName { qName = T.pack $ XL.qName qn
+                       , qURI = T.pack <$> XL.qURI qn
+                       , qPrefix = T.pack <$> XL.qPrefix qn }
+
+fromXLCData :: XL.CData -> CData
+fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
+                                        XL.CDataText -> CDataText
+                                        XL.CDataVerbatim -> CDataVerbatim
+                                        XL.CDataRaw -> CDataRaw
+                       , cdData = T.pack $ XL.cdData cd
+                       , cdLine = XL.cdLine cd }
+
+fromXLElement :: XL.Element -> Element
+fromXLElement el = Element { elName = fromXLQName $ XL.elName el
+                           , elAttribs = map fromXLAttr $ XL.elAttribs el
+                           , elContent = map fromXLContent $ XL.elContent el
+                           , elLine = XL.elLine el }
+
+fromXLAttr :: XL.Attr -> Attr
+fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
+
+fromXLContent :: XL.Content -> Content
+fromXLContent (XL.Elem el) = Elem $ fromXLElement el
+fromXLContent (XL.Text cd) = Text $ fromXLCData cd
+fromXLContent (XL.CRef s)  = CRef (T.pack s)
+
+
-- 
cgit v1.2.3


From 4af378702ae31d4c8a11d0c827a5986f54b5e310 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Feb 2021 18:44:38 -0800
Subject: Add orig copyright/license info for code derived from xml-light.

---
 src/Text/Pandoc/XML/Light/Output.hs | 6 +++++-
 src/Text/Pandoc/XML/Light/Proc.hs   | 4 +++-
 src/Text/Pandoc/XML/Light/Types.hs  | 5 ++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs
index dc94ce914..8182ef2ec 100644
--- a/src/Text/Pandoc/XML/Light/Output.hs
+++ b/src/Text/Pandoc/XML/Light/Output.hs
@@ -2,12 +2,16 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.XML.Light.Output
-   Copyright   : Copyright (C) 2021 John MacFarlane
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
+
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
    Portability : portable
+
+   This code is based on code from xml-light, released under the BSD3 license.
+   We use a text Builder instead of ShowS.
 -}
 module Text.Pandoc.XML.Light.Output
   ( -- * Replacement for xml-light's Text.XML.Output
diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs
index 838d5af74..b53c4b545 100644
--- a/src/Text/Pandoc/XML/Light/Proc.hs
+++ b/src/Text/Pandoc/XML/Light/Proc.hs
@@ -2,12 +2,14 @@
 {-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.XML.Light.Proc
-   Copyright   : Copyright (C) 2021 John MacFarlane
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
    Portability : portable
+
+   This code is taken from xml-light, released under the BSD3 license.
 -}
 module Text.Pandoc.XML.Light.Proc
   ( 
diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs
index f338da6ea..03fdc2e4d 100644
--- a/src/Text/Pandoc/XML/Light/Types.hs
+++ b/src/Text/Pandoc/XML/Light/Types.hs
@@ -2,13 +2,16 @@
 {-# LANGUAGE DeriveDataTypeable #-}
 {- |
    Module      : Text.Pandoc.XML.Light.Types
-   Copyright   : Copyright (C) 2021 John MacFarlane
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
    Portability : portable
 
+   This code is taken from xml-light, released under the BSD3 license.
+   It has been modified to use Text instead of String, and the fromXL*
+   functions have been added.
 -}
 module Text.Pandoc.XML.Light.Types
   ( -- * Basic types, duplicating those from xml-light but with Text
-- 
cgit v1.2.3


From d8fc4971868104274881570ce9bc3d9edf0d2506 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Feb 2021 18:51:41 -0800
Subject: Add T.P.XML.Light.Cursor.

---
 pandoc.cabal                        |   1 +
 src/Text/Pandoc/XML/Light/Cursor.hs | 346 ++++++++++++++++++++++++++++++++++++
 2 files changed, 347 insertions(+)
 create mode 100644 src/Text/Pandoc/XML/Light/Cursor.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index d27520ba0..61bf0b51d 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -689,6 +689,7 @@ library
                    Text.Pandoc.Lua.Walk,
                    Text.Pandoc.XML.Light,
                    Text.Pandoc.XML.Light.Types,
+                   Text.Pandoc.XML.Light.Cursor,
                    Text.Pandoc.XML.Light.Proc,
                    Text.Pandoc.XML.Light.Output,
                    Text.Pandoc.CSS,
diff --git a/src/Text/Pandoc/XML/Light/Cursor.hs b/src/Text/Pandoc/XML/Light/Cursor.hs
new file mode 100644
index 000000000..2e6da5346
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light/Cursor.hs
@@ -0,0 +1,346 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light.Cursor
+   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+   This code is taken from xml-light, released under the BSD3 license.
+-}
+module Text.Pandoc.XML.Light.Cursor
+  ( -- * Replacement for xml-light's Text.XML.Cursor
+    Tag(..)
+  , getTag
+  , setTag
+  , fromTag
+  , Cursor(..)
+  , Path
+
+  -- * Conversions
+  , fromContent
+  , fromElement
+  , fromForest
+  , toForest
+  , toTree
+
+  -- * Moving around
+  , parent
+  , root
+  , getChild
+  , firstChild
+  , lastChild
+  , left
+  , right
+  , nextDF
+
+  -- ** Searching
+  , findChild
+  , findLeft
+  , findRight
+  , findRec
+
+  -- * Node classification
+  , isRoot
+  , isFirst
+  , isLast
+  , isLeaf
+  , isChild
+  , hasChildren
+  , getNodeIndex
+
+  -- * Updates
+  , setContent
+  , modifyContent
+  , modifyContentM
+
+  -- ** Inserting content
+  , insertLeft
+  , insertRight
+  , insertGoLeft
+  , insertGoRight
+
+  -- ** Removing content
+  , removeLeft
+  , removeRight
+  , removeGoLeft
+  , removeGoRight
+  , removeGoUp
+
+  ) where
+
+import Text.Pandoc.XML.Light.Types
+import Data.Maybe(isNothing)
+import Control.Monad(mplus)
+
+data Tag = Tag { tagName    :: QName
+               , tagAttribs :: [Attr]
+               , tagLine    :: Maybe Line
+               } deriving (Show)
+
+getTag :: Element -> Tag
+getTag e = Tag { tagName = elName e
+               , tagAttribs = elAttribs e
+               , tagLine = elLine e
+               }
+
+setTag :: Tag -> Element -> Element
+setTag t e = fromTag t (elContent e)
+
+fromTag :: Tag -> [Content] -> Element
+fromTag t cs = Element { elName    = tagName t
+                       , elAttribs = tagAttribs t
+                       , elLine    = tagLine t
+                       , elContent = cs
+                       }
+
+type Path = [([Content],Tag,[Content])]
+
+-- | The position of a piece of content in an XML document.
+data Cursor = Cur
+  { current :: Content      -- ^ The currently selected content.
+  , lefts   :: [Content]    -- ^ Siblings on the left, closest first.
+  , rights  :: [Content]    -- ^ Siblings on the right, closest first.
+  , parents :: Path -- ^ The contexts of the parent elements of this location.
+  } deriving (Show)
+
+-- Moving around ---------------------------------------------------------------
+
+-- | The parent of the given location.
+parent :: Cursor -> Maybe Cursor
+parent loc =
+  case parents loc of
+    (pls,v,prs) : ps -> Just
+      Cur { current = Elem
+                    (fromTag v
+                    (combChildren (lefts loc) (current loc) (rights loc)))
+          , lefts = pls, rights = prs, parents = ps
+          }
+    [] -> Nothing
+
+
+-- | The top-most parent of the given location.
+root :: Cursor -> Cursor
+root loc = maybe loc root (parent loc)
+
+-- | The left sibling of the given location.
+left :: Cursor -> Maybe Cursor
+left loc =
+  case lefts loc of
+    t : ts -> Just loc { current = t, lefts = ts
+                                    , rights = current loc : rights loc }
+    []     -> Nothing
+
+-- | The right sibling of the given location.
+right :: Cursor -> Maybe Cursor
+right loc =
+  case rights loc of
+    t : ts -> Just loc { current = t, lefts = current loc : lefts loc
+                                    , rights = ts }
+    []     -> Nothing
+
+-- | The first child of the given location.
+firstChild :: Cursor -> Maybe Cursor
+firstChild loc =
+  do (t : ts, ps) <- downParents loc
+     return Cur { current = t, lefts = [], rights = ts , parents = ps }
+
+-- | The last child of the given location.
+lastChild :: Cursor -> Maybe Cursor
+lastChild loc =
+  do (ts, ps) <- downParents loc
+     case reverse ts of
+       l : ls -> return Cur { current = l, lefts = ls, rights = []
+                                                     , parents = ps }
+       [] -> Nothing
+
+-- | Find the next left sibling that satisfies a predicate.
+findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findLeft p loc = do loc1 <- left loc
+                    if p loc1 then return loc1 else findLeft p loc1
+
+-- | Find the next right sibling that satisfies a predicate.
+findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findRight p loc = do loc1 <- right loc
+                     if p loc1 then return loc1 else findRight p loc1
+
+-- | The first child that satisfies a predicate.
+findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findChild p loc =
+  do loc1 <- firstChild loc
+     if p loc1 then return loc1 else findRight p loc1
+
+-- | The next position in a left-to-right depth-first traversal of a document:
+-- either the first child, right sibling, or the right sibling of a parent that
+-- has one.
+nextDF :: Cursor -> Maybe Cursor
+nextDF c = firstChild c `mplus` up c
+  where up x = right x `mplus` (up =<< parent x)
+
+-- | Perform a depth first search for a descendant that satisfies the
+-- given predicate.
+findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
+findRec p c = if p c then Just c else findRec p =<< nextDF c
+
+-- | The child with the given index (starting from 0).
+getChild :: Int -> Cursor -> Maybe Cursor
+getChild n loc =
+  do (ts,ps) <- downParents loc
+     (ls,t,rs) <- splitChildren ts n
+     return Cur { current = t, lefts = ls, rights = rs, parents = ps }
+
+
+-- | private: computes the parent for "down" operations.
+downParents :: Cursor -> Maybe ([Content], Path)
+downParents loc =
+  case current loc of
+    Elem e -> Just ( elContent e
+                   , (lefts loc, getTag e, rights loc) : parents loc
+                   )
+    _      -> Nothing
+
+-- Conversions -----------------------------------------------------------------
+
+-- | A cursor for the given content.
+fromContent :: Content -> Cursor
+fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
+
+-- | A cursor for the given element.
+fromElement :: Element -> Cursor
+fromElement e = fromContent (Elem e)
+
+-- | The location of the first tree in a forest.
+fromForest :: [Content] -> Maybe Cursor
+fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
+                                                      , parents = [] }
+fromForest []     = Nothing
+
+-- | Computes the tree containing this location.
+toTree :: Cursor -> Content
+toTree loc = current (root loc)
+
+-- | Computes the forest containing this location.
+toForest :: Cursor -> [Content]
+toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
+
+
+-- Queries ---------------------------------------------------------------------
+
+-- | Are we at the top of the document?
+isRoot :: Cursor -> Bool
+isRoot loc = null (parents loc)
+
+-- | Are we at the left end of the the document?
+isFirst :: Cursor -> Bool
+isFirst loc = null (lefts loc)
+
+-- | Are we at the right end of the document?
+isLast :: Cursor -> Bool
+isLast loc = null (rights loc)
+
+-- | Are we at the bottom of the document?
+isLeaf :: Cursor -> Bool
+isLeaf loc = isNothing (downParents loc)
+
+-- | Do we have a parent?
+isChild :: Cursor -> Bool
+isChild loc = not (isRoot loc)
+
+-- | Get the node index inside the sequence of children
+getNodeIndex :: Cursor -> Int
+getNodeIndex loc = length (lefts loc)
+
+-- | Do we have children?
+hasChildren :: Cursor -> Bool
+hasChildren loc = not (isLeaf loc)
+
+
+
+-- Updates ---------------------------------------------------------------------
+
+-- | Change the current content.
+setContent :: Content -> Cursor -> Cursor
+setContent t loc = loc { current = t }
+
+-- | Modify the current content.
+modifyContent :: (Content -> Content) -> Cursor -> Cursor
+modifyContent f loc = setContent (f (current loc)) loc
+
+-- | Modify the current content, allowing for an effect.
+modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
+modifyContentM f loc = do x <- f (current loc)
+                          return (setContent x loc)
+
+-- | Insert content to the left of the current position.
+insertLeft :: Content -> Cursor -> Cursor
+insertLeft t loc = loc { lefts = t : lefts loc }
+
+-- | Insert content to the right of the current position.
+insertRight :: Content -> Cursor -> Cursor
+insertRight t loc = loc { rights = t : rights loc }
+
+-- | Remove the content on the left of the current position, if any.
+removeLeft :: Cursor -> Maybe (Content,Cursor)
+removeLeft loc = case lefts loc of
+                   l : ls -> return (l,loc { lefts = ls })
+                   [] -> Nothing
+
+-- | Remove the content on the right of the current position, if any.
+removeRight :: Cursor -> Maybe (Content,Cursor)
+removeRight loc = case rights loc of
+                    l : ls -> return (l,loc { rights = ls })
+                    [] -> Nothing
+
+
+-- | Insert content to the left of the current position.
+-- The new content becomes the current position.
+insertGoLeft :: Content -> Cursor -> Cursor
+insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
+
+-- | Insert content to the right of the current position.
+-- The new content becomes the current position.
+insertGoRight :: Content -> Cursor -> Cursor
+insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
+
+-- | Remove the current element.
+-- The new position is the one on the left.
+removeGoLeft :: Cursor -> Maybe Cursor
+removeGoLeft loc = case lefts loc of
+                     l : ls -> Just loc { current = l, lefts = ls }
+                     []     -> Nothing
+
+-- | Remove the current element.
+-- The new position is the one on the right.
+removeGoRight :: Cursor -> Maybe Cursor
+removeGoRight loc = case rights loc of
+                     l : ls -> Just loc { current = l, rights = ls }
+                     []     -> Nothing
+
+-- | Remove the current element.
+-- The new position is the parent of the old position.
+removeGoUp :: Cursor -> Maybe Cursor
+removeGoUp loc =
+  case parents loc of
+    (pls,v,prs) : ps -> Just
+      Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
+          , lefts = pls, rights = prs, parents = ps
+          }
+    [] -> Nothing
+
+
+-- | private: Gets the given element of a list.
+-- Also returns the preceding elements (reversed) and the following elements.
+splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
+splitChildren _ n | n < 0 = Nothing
+splitChildren cs pos = loop [] cs pos
+  where loop acc (x:xs) 0 = Just (acc,x,xs)
+        loop acc (x:xs) n = loop (x:acc) xs $! n-1
+        loop _ _ _        = Nothing
+
+-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
+combChildren :: [a] -> a -> [a] -> [a]
+combChildren ls t rs = foldl (flip (:)) (t:rs) ls
+
-- 
cgit v1.2.3


From 80a1d5c9b60b676ba7b7e6ed0267197c8f0ec459 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Feb 2021 19:18:01 -0800
Subject: Revert "Add T.P.XML.Light.Cursor."

This reverts commit d8fc4971868104274881570ce9bc3d9edf0d2506.
---
 pandoc.cabal                        |   1 -
 src/Text/Pandoc/XML/Light/Cursor.hs | 346 ------------------------------------
 2 files changed, 347 deletions(-)
 delete mode 100644 src/Text/Pandoc/XML/Light/Cursor.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 61bf0b51d..d27520ba0 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -689,7 +689,6 @@ library
                    Text.Pandoc.Lua.Walk,
                    Text.Pandoc.XML.Light,
                    Text.Pandoc.XML.Light.Types,
-                   Text.Pandoc.XML.Light.Cursor,
                    Text.Pandoc.XML.Light.Proc,
                    Text.Pandoc.XML.Light.Output,
                    Text.Pandoc.CSS,
diff --git a/src/Text/Pandoc/XML/Light/Cursor.hs b/src/Text/Pandoc/XML/Light/Cursor.hs
deleted file mode 100644
index 2e6da5346..000000000
--- a/src/Text/Pandoc/XML/Light/Cursor.hs
+++ /dev/null
@@ -1,346 +0,0 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.XML.Light.Cursor
-   Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-   This code is taken from xml-light, released under the BSD3 license.
--}
-module Text.Pandoc.XML.Light.Cursor
-  ( -- * Replacement for xml-light's Text.XML.Cursor
-    Tag(..)
-  , getTag
-  , setTag
-  , fromTag
-  , Cursor(..)
-  , Path
-
-  -- * Conversions
-  , fromContent
-  , fromElement
-  , fromForest
-  , toForest
-  , toTree
-
-  -- * Moving around
-  , parent
-  , root
-  , getChild
-  , firstChild
-  , lastChild
-  , left
-  , right
-  , nextDF
-
-  -- ** Searching
-  , findChild
-  , findLeft
-  , findRight
-  , findRec
-
-  -- * Node classification
-  , isRoot
-  , isFirst
-  , isLast
-  , isLeaf
-  , isChild
-  , hasChildren
-  , getNodeIndex
-
-  -- * Updates
-  , setContent
-  , modifyContent
-  , modifyContentM
-
-  -- ** Inserting content
-  , insertLeft
-  , insertRight
-  , insertGoLeft
-  , insertGoRight
-
-  -- ** Removing content
-  , removeLeft
-  , removeRight
-  , removeGoLeft
-  , removeGoRight
-  , removeGoUp
-
-  ) where
-
-import Text.Pandoc.XML.Light.Types
-import Data.Maybe(isNothing)
-import Control.Monad(mplus)
-
-data Tag = Tag { tagName    :: QName
-               , tagAttribs :: [Attr]
-               , tagLine    :: Maybe Line
-               } deriving (Show)
-
-getTag :: Element -> Tag
-getTag e = Tag { tagName = elName e
-               , tagAttribs = elAttribs e
-               , tagLine = elLine e
-               }
-
-setTag :: Tag -> Element -> Element
-setTag t e = fromTag t (elContent e)
-
-fromTag :: Tag -> [Content] -> Element
-fromTag t cs = Element { elName    = tagName t
-                       , elAttribs = tagAttribs t
-                       , elLine    = tagLine t
-                       , elContent = cs
-                       }
-
-type Path = [([Content],Tag,[Content])]
-
--- | The position of a piece of content in an XML document.
-data Cursor = Cur
-  { current :: Content      -- ^ The currently selected content.
-  , lefts   :: [Content]    -- ^ Siblings on the left, closest first.
-  , rights  :: [Content]    -- ^ Siblings on the right, closest first.
-  , parents :: Path -- ^ The contexts of the parent elements of this location.
-  } deriving (Show)
-
--- Moving around ---------------------------------------------------------------
-
--- | The parent of the given location.
-parent :: Cursor -> Maybe Cursor
-parent loc =
-  case parents loc of
-    (pls,v,prs) : ps -> Just
-      Cur { current = Elem
-                    (fromTag v
-                    (combChildren (lefts loc) (current loc) (rights loc)))
-          , lefts = pls, rights = prs, parents = ps
-          }
-    [] -> Nothing
-
-
--- | The top-most parent of the given location.
-root :: Cursor -> Cursor
-root loc = maybe loc root (parent loc)
-
--- | The left sibling of the given location.
-left :: Cursor -> Maybe Cursor
-left loc =
-  case lefts loc of
-    t : ts -> Just loc { current = t, lefts = ts
-                                    , rights = current loc : rights loc }
-    []     -> Nothing
-
--- | The right sibling of the given location.
-right :: Cursor -> Maybe Cursor
-right loc =
-  case rights loc of
-    t : ts -> Just loc { current = t, lefts = current loc : lefts loc
-                                    , rights = ts }
-    []     -> Nothing
-
--- | The first child of the given location.
-firstChild :: Cursor -> Maybe Cursor
-firstChild loc =
-  do (t : ts, ps) <- downParents loc
-     return Cur { current = t, lefts = [], rights = ts , parents = ps }
-
--- | The last child of the given location.
-lastChild :: Cursor -> Maybe Cursor
-lastChild loc =
-  do (ts, ps) <- downParents loc
-     case reverse ts of
-       l : ls -> return Cur { current = l, lefts = ls, rights = []
-                                                     , parents = ps }
-       [] -> Nothing
-
--- | Find the next left sibling that satisfies a predicate.
-findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findLeft p loc = do loc1 <- left loc
-                    if p loc1 then return loc1 else findLeft p loc1
-
--- | Find the next right sibling that satisfies a predicate.
-findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findRight p loc = do loc1 <- right loc
-                     if p loc1 then return loc1 else findRight p loc1
-
--- | The first child that satisfies a predicate.
-findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findChild p loc =
-  do loc1 <- firstChild loc
-     if p loc1 then return loc1 else findRight p loc1
-
--- | The next position in a left-to-right depth-first traversal of a document:
--- either the first child, right sibling, or the right sibling of a parent that
--- has one.
-nextDF :: Cursor -> Maybe Cursor
-nextDF c = firstChild c `mplus` up c
-  where up x = right x `mplus` (up =<< parent x)
-
--- | Perform a depth first search for a descendant that satisfies the
--- given predicate.
-findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor
-findRec p c = if p c then Just c else findRec p =<< nextDF c
-
--- | The child with the given index (starting from 0).
-getChild :: Int -> Cursor -> Maybe Cursor
-getChild n loc =
-  do (ts,ps) <- downParents loc
-     (ls,t,rs) <- splitChildren ts n
-     return Cur { current = t, lefts = ls, rights = rs, parents = ps }
-
-
--- | private: computes the parent for "down" operations.
-downParents :: Cursor -> Maybe ([Content], Path)
-downParents loc =
-  case current loc of
-    Elem e -> Just ( elContent e
-                   , (lefts loc, getTag e, rights loc) : parents loc
-                   )
-    _      -> Nothing
-
--- Conversions -----------------------------------------------------------------
-
--- | A cursor for the given content.
-fromContent :: Content -> Cursor
-fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] }
-
--- | A cursor for the given element.
-fromElement :: Element -> Cursor
-fromElement e = fromContent (Elem e)
-
--- | The location of the first tree in a forest.
-fromForest :: [Content] -> Maybe Cursor
-fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts
-                                                      , parents = [] }
-fromForest []     = Nothing
-
--- | Computes the tree containing this location.
-toTree :: Cursor -> Content
-toTree loc = current (root loc)
-
--- | Computes the forest containing this location.
-toForest :: Cursor -> [Content]
-toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r)
-
-
--- Queries ---------------------------------------------------------------------
-
--- | Are we at the top of the document?
-isRoot :: Cursor -> Bool
-isRoot loc = null (parents loc)
-
--- | Are we at the left end of the the document?
-isFirst :: Cursor -> Bool
-isFirst loc = null (lefts loc)
-
--- | Are we at the right end of the document?
-isLast :: Cursor -> Bool
-isLast loc = null (rights loc)
-
--- | Are we at the bottom of the document?
-isLeaf :: Cursor -> Bool
-isLeaf loc = isNothing (downParents loc)
-
--- | Do we have a parent?
-isChild :: Cursor -> Bool
-isChild loc = not (isRoot loc)
-
--- | Get the node index inside the sequence of children
-getNodeIndex :: Cursor -> Int
-getNodeIndex loc = length (lefts loc)
-
--- | Do we have children?
-hasChildren :: Cursor -> Bool
-hasChildren loc = not (isLeaf loc)
-
-
-
--- Updates ---------------------------------------------------------------------
-
--- | Change the current content.
-setContent :: Content -> Cursor -> Cursor
-setContent t loc = loc { current = t }
-
--- | Modify the current content.
-modifyContent :: (Content -> Content) -> Cursor -> Cursor
-modifyContent f loc = setContent (f (current loc)) loc
-
--- | Modify the current content, allowing for an effect.
-modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor
-modifyContentM f loc = do x <- f (current loc)
-                          return (setContent x loc)
-
--- | Insert content to the left of the current position.
-insertLeft :: Content -> Cursor -> Cursor
-insertLeft t loc = loc { lefts = t : lefts loc }
-
--- | Insert content to the right of the current position.
-insertRight :: Content -> Cursor -> Cursor
-insertRight t loc = loc { rights = t : rights loc }
-
--- | Remove the content on the left of the current position, if any.
-removeLeft :: Cursor -> Maybe (Content,Cursor)
-removeLeft loc = case lefts loc of
-                   l : ls -> return (l,loc { lefts = ls })
-                   [] -> Nothing
-
--- | Remove the content on the right of the current position, if any.
-removeRight :: Cursor -> Maybe (Content,Cursor)
-removeRight loc = case rights loc of
-                    l : ls -> return (l,loc { rights = ls })
-                    [] -> Nothing
-
-
--- | Insert content to the left of the current position.
--- The new content becomes the current position.
-insertGoLeft :: Content -> Cursor -> Cursor
-insertGoLeft t loc = loc { current = t, rights = current loc : rights loc }
-
--- | Insert content to the right of the current position.
--- The new content becomes the current position.
-insertGoRight :: Content -> Cursor -> Cursor
-insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc }
-
--- | Remove the current element.
--- The new position is the one on the left.
-removeGoLeft :: Cursor -> Maybe Cursor
-removeGoLeft loc = case lefts loc of
-                     l : ls -> Just loc { current = l, lefts = ls }
-                     []     -> Nothing
-
--- | Remove the current element.
--- The new position is the one on the right.
-removeGoRight :: Cursor -> Maybe Cursor
-removeGoRight loc = case rights loc of
-                     l : ls -> Just loc { current = l, rights = ls }
-                     []     -> Nothing
-
--- | Remove the current element.
--- The new position is the parent of the old position.
-removeGoUp :: Cursor -> Maybe Cursor
-removeGoUp loc =
-  case parents loc of
-    (pls,v,prs) : ps -> Just
-      Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc))
-          , lefts = pls, rights = prs, parents = ps
-          }
-    [] -> Nothing
-
-
--- | private: Gets the given element of a list.
--- Also returns the preceding elements (reversed) and the following elements.
-splitChildren :: [a] -> Int -> Maybe ([a],a,[a])
-splitChildren _ n | n < 0 = Nothing
-splitChildren cs pos = loop [] cs pos
-  where loop acc (x:xs) 0 = Just (acc,x,xs)
-        loop acc (x:xs) n = loop (x:acc) xs $! n-1
-        loop _ _ _        = Nothing
-
--- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys
-combChildren :: [a] -> a -> [a] -> [a]
-combChildren ls t rs = foldl (flip (:)) (t:rs) ls
-
-- 
cgit v1.2.3


From 73add0578989e1da6e9cd1de68e2e4142f789188 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Feb 2021 09:54:39 -0800
Subject: Docx reader: use Map instead of list for Namespaces.

This gives a speedup of about 5-10%.

The reader is now approximately twice as fast as in the last
release.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 14 +++++++-------
 src/Text/Pandoc/Readers/Docx/Util.hs  | 26 +++++++++++++-------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index c76f3c171..f8ed248d7 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -396,9 +396,9 @@ archiveToNotes zf =
                >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
                >>= parseXMLFromEntry
-      fn_namespaces = maybe [] elemToNameSpaces fnElem
-      en_namespaces = maybe [] elemToNameSpaces enElem
-      ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
+      fn_namespaces = maybe mempty elemToNameSpaces fnElem
+      en_namespaces = maybe mempty elemToNameSpaces enElem
+      ns = M.union fn_namespaces en_namespaces
       fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
       en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
@@ -408,7 +408,7 @@ archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
                >>= parseXMLFromEntry
-      cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
+      cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem
       cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
                cmtsElem
   in
@@ -518,7 +518,7 @@ levelElemToLevel _ _ = Nothing
 archiveToNumbering' :: Archive -> Maybe Numbering
 archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
-    Nothing -> Just $ Numbering [] [] []
+    Nothing -> Just $ Numbering mempty [] []
     Just entry -> do
       numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
@@ -530,7 +530,7 @@ archiveToNumbering' zf =
 
 archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
-  fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
+  fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
 
 elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
@@ -875,7 +875,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index 21df03d9e..ac331cba6 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -22,42 +22,42 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , findAttrByName
                                       ) where
 
-import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.XML.Light
+import qualified Data.Map as M
 
-type NameSpaces = [(Text, Text)]
+type NameSpaces = M.Map Text Text
 
 elemToNameSpaces :: Element -> NameSpaces
-elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-
-attrToNSPair :: Attr -> Maybe (Text, Text)
-attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
-attrToNSPair _                                     = Nothing
+elemToNameSpaces = foldr (\(Attr qn val) ->
+                             case qn of
+                               QName s _ (Just "xmlns") -> M.insert s val
+                               _ -> id) mempty . elAttribs
 
 elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
+  QName name (M.lookup prefix ns)
+             (if T.null prefix then Nothing else Just prefix)
 
 isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
-  let ns' = ns ++ elemToNameSpaces element
+  let ns' = ns <> elemToNameSpaces element
   in qName (elName element) == name &&
-     qURI (elName element) == lookup prefix ns'
+     qURI (elName element) == M.lookup prefix ns'
 
 findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
 findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
 findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
-  let ns' = ns ++ elemToNameSpaces el
+  let ns' = ns <> elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-- 
cgit v1.2.3


From 743f7216de98556a316301ac72b8606bafc2deee Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 18 Feb 2021 21:53:19 +0100
Subject: Org reader: fix bug in org-ref citation parsing.

The org-ref syntax allows to list multiple citations separated by comma.
This fixes a bug that accepted commas as part of the citation id, so all
citation lists were parsed as one single citation.

Fixes: #7101
---
 src/Text/Pandoc/Readers/Org/Inlines.hs    |  2 +-
 test/Tests/Readers/Org/Inline/Citation.hs | 40 +++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 68c2ba5e0..519a6ce04 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -322,7 +322,7 @@ linkLikeOrgRefCite = try $ do
 -- from the `org-ref-cite-re` variable in `org-ref.el`.
 orgRefCiteKey :: PandocMonad m => OrgParser m Text
 orgRefCiteKey =
-  let citeKeySpecialChars = "-_:\\./," :: String
+  let citeKeySpecialChars = "-_:\\./" :: String
       isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
       isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c
       endOfCitation = try $ do
diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs
index 5538f1ec8..a11804983 100644
--- a/test/Tests/Readers/Org/Inline/Citation.hs
+++ b/test/Tests/Readers/Org/Inline/Citation.hs
@@ -116,6 +116,46 @@ tests =
                      }
       in (para $ cite [citation] "citep:pandoc")
 
+    , "multiple simple citations" =:
+      "citep:picard,riker" =?>
+      let picard = Citation
+                 { citationId = "picard"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+          riker  = Citation
+                 { citationId = "riker"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+      in (para $ cite [picard,riker] "citep:picard,riker")
+
+    , "multiple simple citations succeeded by comma" =:
+      "citep:picard,riker," =?>
+      let picard = Citation
+                 { citationId = "picard"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+          riker  = Citation
+                 { citationId = "riker"
+                 , citationPrefix = mempty
+                 , citationSuffix = mempty
+                 , citationMode = NormalCitation
+                 , citationNoteNum = 0
+                 , citationHash = 0
+                 }
+      in (para $ cite [picard,riker] "citep:picard,riker" <> str ",")
+
     , "extended citation" =:
       "[[citep:Dominik201408][See page 20::, for example]]" =?>
       let citation = Citation
-- 
cgit v1.2.3


From 9e728b40f36d48c687372ad447670186ed415337 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Feb 2021 17:21:22 -0800
Subject: T.P.Shared: cleanup.

Cleanup up some functions and added deprecation pragmas
to funtions no longer used in the code base.
---
 src/Text/Pandoc/Shared.hs | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6d5d4c97d..a579681b1 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -151,21 +151,22 @@ splitBy :: (a -> Bool) -> [a] -> [[a]]
 splitBy _ [] = []
 splitBy isSep lst =
   let (first, rest) = break isSep lst
-      rest'         = dropWhile isSep rest
-  in  first:splitBy isSep rest'
+  in  first:splitBy isSep (dropWhile isSep rest)
 
+-- | Split text by groups of one or more separator.
 splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text]
 splitTextBy isSep t
   | T.null t = []
   | otherwise = let (first, rest) = T.break isSep t
-                    rest'         = T.dropWhile isSep rest
-                in  first : splitTextBy isSep rest'
+                in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
+{-# DEPRECATED splitByIndices "This function is slated for removal" #-}
 splitByIndices :: [Int] -> [a] -> [[a]]
 splitByIndices [] lst = [lst]
 splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
   where (first, rest) = splitAt x lst
 
+{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-}
 -- | Split string into chunks divided at specified indices.
 splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 splitStringByIndices [] lst = [lst]
@@ -173,15 +174,22 @@ splitStringByIndices (x:xs) lst =
   let (first, rest) = splitAt' x lst in
   first : splitStringByIndices (map (\y -> y - x) xs) rest
 
-splitTextByIndices :: [Int] -> T.Text -> [T.Text]
-splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack
-
+-- DEPRECATED: can be removed when splitStringByIndices is
 splitAt' :: Int -> [Char] -> ([Char],[Char])
 splitAt' _ []          = ([],[])
 splitAt' n xs | n <= 0 = ([],xs)
 splitAt' n (x:xs)      = (x:ys,zs)
   where (ys,zs) = splitAt' (n - charWidth x) xs
 
+splitTextByIndices :: [Int] -> T.Text -> [T.Text]
+splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
+ where
+  splitTextByRelIndices [] t = [t]
+  splitTextByRelIndices (x:xs) t =
+    let (first, rest) = T.splitAt x t
+     in first : splitTextByRelIndices xs rest
+
+{-# DEPRECATED substitute "This function is slated for removal" #-}
 -- | Replace each occurrence of one sublist in a list with another.
 substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
 substitute _ _ [] = []
@@ -254,17 +262,24 @@ notElemText c = T.all (/= c)
 stripTrailingNewlines :: T.Text -> T.Text
 stripTrailingNewlines = T.dropWhileEnd (== '\n')
 
+isWS :: Char -> Bool
+isWS ' '  = True
+isWS '\r' = True
+isWS '\n' = True
+isWS '\t' = True
+isWS _    = False
+
 -- | Remove leading and trailing space (including newlines) from string.
 trim :: T.Text -> T.Text
-trim = T.dropAround (`elemText` " \r\n\t")
+trim = T.dropAround isWS
 
 -- | Remove leading space (including newlines) from string.
 triml :: T.Text -> T.Text
-triml = T.dropWhile (`elemText` " \r\n\t")
+triml = T.dropWhile isWS
 
 -- | Remove trailing space (including newlines) from string.
 trimr :: T.Text -> T.Text
-trimr = T.dropWhileEnd (`elemText` " \r\n\t")
+trimr = T.dropWhileEnd isWS
 
 -- | Trim leading space and trailing space unless after \.
 trimMath :: T.Text -> T.Text
@@ -275,7 +290,7 @@ trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd
       | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff
       | otherwise = suff
       where
-        (pref, suff) = T.span (`elemText` " \t\n\r") t
+        (pref, suff) = T.span isWS t
 
 -- | Strip leading and trailing characters from string
 stripFirstAndLast :: T.Text -> T.Text
-- 
cgit v1.2.3


From 53cf8295a4825297f78e20b6ce73d2da8fd7ef84 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Feb 2021 14:33:56 -0800
Subject: LaTeX writer: adjust hypertargets to beginnings of paragraphs.

Use `\vadjust pre` so that the hypertarget takes you to the
beginning of the paragraph rather than one line down.

Closes #7078.

This makes a particular difference for links to citations
using `--citeproc` and `link-citations: true`.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 93603a26e..e8a187599 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -500,12 +500,13 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
                else blockListToLaTeX bs
   modify $ \st -> st{ stIncremental = oldIncremental }
   linkAnchor' <- hypertarget True identifier empty
-  -- see #2704 for the motivation for adding \leavevmode:
+  -- see #2704 for the motivation for adding \leavevmode
+  -- and #7078 for \vadjust pre
   let linkAnchor =
         case bs of
           Para _ : _
             | not (isEmpty linkAnchor')
-              -> "\\leavevmode" <> linkAnchor' <> "%"
+              -> "\\leavevmode\\vadjust pre{" <> linkAnchor' <> "}%"
           _ -> linkAnchor'
       wrapNotes txt = if beamer && "notes" `elem` classes
                          then "\\note" <> braces txt -- speaker notes
-- 
cgit v1.2.3


From 0f5c56dfb171e27745f4fe8530325223ecefe52a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Feb 2021 17:11:53 -0800
Subject: T.P.PDF: disable `smart` when building PDF via LaTeX.

This is to prevent accidental creation of ligatures like
`` ?` `` and `` !` `` (especially in languages with quotations
like German), and similar ligature issues.

See jgm/citeproc#54.
---
 src/Text/Pandoc/PDF.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 7c0082c29..3f9dd8dad 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -43,6 +43,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError))
 import Text.Pandoc.MIME (getMimeType)
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..))
+import Text.Pandoc.Extensions (disableExtension, Extension(Ext_smart))
 import Text.Pandoc.Process (pipeProcess)
 import System.Process (readProcessWithExitCode)
 import Text.Pandoc.Shared (inDirectory, stringify, tshow)
@@ -114,7 +115,10 @@ makePDF program pdfargs writer opts doc =
         runIOorExplode $ do
           putCommonState commonState
           doc' <- handleImages opts tmpdir doc
-          source <- writer opts doc'
+          source <- writer opts{ writerExtensions = -- disable use of quote
+                                    -- ligatures to avoid bad ligatures like ?`
+                                    disableExtension Ext_smart
+                                     (writerExtensions opts) } doc'
           res <- case baseProg of
             "context" -> context2pdf verbosity program pdfargs tmpdir source
             "tectonic" -> tectonic2pdf verbosity program pdfargs tmpdir source
-- 
cgit v1.2.3


From ef642e2bbc1f46056fc27560ceba791f27f2daa6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Feb 2021 18:11:27 -0800
Subject: T.P.XML Improve fromEntities.

---
 src/Text/Pandoc/XML.hs | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index c4e3ed1e7..6dbbce1d2 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -1,6 +1,5 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.XML
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
@@ -123,23 +122,20 @@ html5EntityMap = foldr go mempty htmlEntities
 
 -- Unescapes XML entities
 fromEntities :: Text -> Text
-fromEntities = T.pack . fromEntities'
+fromEntities t
+  = let (x, y) = T.break (== '&') t
+     in if T.null y
+           then t
+           else x <>
+             let (ent, rest) = T.break (\c -> isSpace c || c == ';') y
+                 rest' = case T.uncons rest of
+                            Just (';',ys) -> ys
+                            _ -> rest
+                 ent' = T.drop 1 ent <> ";"
+              in case T.pack <$> lookupEntity (T.unpack ent') of
+                   Just c  -> c <> fromEntities rest'
+                   Nothing  -> ent <> fromEntities rest
 
-fromEntities' :: Text -> String
-fromEntities' (T.uncons -> Just ('&', xs)) =
-  case lookupEntity $ T.unpack ent' of
-        Just c  -> c <> fromEntities' rest
-        Nothing -> "&" <> fromEntities' xs
-    where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of
-                          (zs,T.uncons -> Just (';',ys)) -> (zs,ys)
-                          (zs, ys) -> (zs,ys)
-          ent'
-            | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys  -- workaround tagsoup bug
-            | Just ('#', _) <- T.uncons ent     = ent
-            | otherwise                         = ent <> ";"
-fromEntities' t = case T.uncons t of
-  Just (x, xs) -> x : fromEntities' xs
-  Nothing      -> ""
 
 html5Attributes :: Set.Set Text
 html5Attributes = Set.fromList
-- 
cgit v1.2.3


From 98d26c234579a06446a5bef1992ed77bac48a4ac Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Feb 2021 21:24:31 -0800
Subject: DocBook, JATS, OPML readers: performance optimization.

With the new XML parser, we can avoid the expensive tree
normalization step we used to do.

This gives a significant speed boost in docbook and JATS
parsing (e.g. 9.7 to 6 ms).
---
 src/Text/Pandoc/Readers/DocBook.hs | 26 ++++----------------------
 src/Text/Pandoc/Readers/JATS.hs    | 24 +++---------------------
 src/Text/Pandoc/Readers/OPML.hs    | 22 +---------------------
 3 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index e201b54fe..d38b07864 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper, isLetter)
+import Data.Char (isSpace, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@@ -540,8 +540,9 @@ instance Default DBState where
 
 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
-            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
+  tree <- either (throwError . PandocXMLError "") return $
+            parseXMLContents
+              (TL.fromStrict . handleInstructions $ crFilter inp)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
@@ -571,25 +572,6 @@ getFigure e = do
   modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty }
   return res
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <>
-                                    convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr elt =
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 5353f2001..602f3b4f2 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -54,30 +54,11 @@ instance Default JATSState where
 
 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  tree <- either (throwError . PandocXMLError "")
-                 (return . normalizeTree) $
+  tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents (TL.fromStrict $ crFilter inp)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr =
@@ -454,7 +435,8 @@ elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
 parseInline (Text (CData _ s _)) = return $ text s
-parseInline (CRef ref) = return . text . convertEntity $ ref
+parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack)
+                                $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 184d5a63f..5f2ddb876 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -14,12 +14,10 @@ Conversion of OPML to 'Pandoc' document.
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
 import Data.Default
-import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
-import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
@@ -53,7 +51,7 @@ readOPML opts inp  = do
   (bs, st') <- runStateT
                  (case parseXMLContents (TL.fromStrict (crFilter inp)) of
                      Left msg -> throwError $ PandocXMLError "" msg
-                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                     Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
@@ -61,24 +59,6 @@ readOPML opts inp  = do
     setDate (opmlDocDate st') $
     doc $ mconcat bs
 
--- normalize input, consolidating adjacent Text and CRef elements
-normalizeTree :: [Content] -> [Content]
-normalizeTree = everywhere (mkT go)
-  where go :: [Content] -> [Content]
-        go (Text (CData CDataRaw _ _):xs) = xs
-        go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 <> s2) z):xs
-        go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 <> convertEntity r) z):xs
-        go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r <> s1) z):xs
-        go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
-        go xs = xs
-
-convertEntity :: Text -> Text
-convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
-
 -- convenience function to get an attribute value, defaulting to ""
 attrValue :: Text -> Element -> Text
 attrValue attr elt =
-- 
cgit v1.2.3


From 13847267e969b634e9c16c15170e7f217d432e8a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 00:07:38 -0800
Subject: HTML reader: efficiency improvements.

Do a lookahead to find the right parser to use.

Benchmarks from 34ms to 23ms, with less allocation.
Also speeds up the epub reader.
---
 src/Text/Pandoc/Readers/HTML.hs | 210 ++++++++++++++++++++++++----------------
 1 file changed, 129 insertions(+), 81 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index cc60b5501..47856d2f7 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -159,29 +159,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
 
 block :: PandocMonad m => TagParser m Blocks
 block = do
-  res <- choice
-            [ eSection
-            , eSwitch B.para block
-            , mempty <$ eFootnote
-            , mempty <$ eTOC
-            , mempty <$ eTitlePage
-            , pPara
-            , pHeader
-            , pBlockQuote
-            , pCodeBlock
-            , pList
-            , pHrule
-            , pTable block
-            , pHtml
-            , pHead
-            , pBody
-            , pLineBlock
-            , pDiv
-            , pPlain
-            , pFigure
-            , pIframe
-            , pRawHtmlBlock
-            ]
+  exts <- getOption readerExtensions
+  tag <- lookAhead pAny
+  res <-
+   (case tag of
+    TagOpen name attr ->
+      let type' = fromMaybe "" $
+                     lookup "type" attr <|> lookup "epub:type" attr
+          epubExts = extensionEnabled Ext_epub_html_exts exts
+      in
+      case name of
+        _ | name `elem` sectioningContent
+          , epubExts
+          , "chapter" `T.isInfixOf` type'
+          -> eSection
+        _ | epubExts
+          , type' `elem` ["footnote", "rearnote"]
+          -> mempty <$ eFootnote
+        _ | epubExts
+          , type' == "toc"
+          -> mempty <$ eTOC
+        _ | "titlepage" `T.isInfixOf` type'
+          , name `elem` ("section" : groupingContent)
+          -> mempty <$ eTitlePage
+        "p" -> pPara
+        "h1" -> pHeader
+        "h2" -> pHeader
+        "h3" -> pHeader
+        "h4" -> pHeader
+        "h5" -> pHeader
+        "h6" -> pHeader
+        "blockquote" -> pBlockQuote
+        "pre" -> pCodeBlock
+        "ul" -> pBulletList
+        "ol" -> pOrderedList
+        "dl" -> pDefinitionList
+        "table" -> pTable block
+        "hr" -> pHrule
+        "html" -> pHtml
+        "head" -> pHead
+        "body" -> pBody
+        "div"
+          | extensionEnabled Ext_line_blocks exts
+          , Just "line-block" <- lookup "class" attr
+          -> pLineBlock
+          | otherwise
+          -> pDiv
+        "section" -> pDiv
+        "main" -> pDiv
+        "figure" -> pFigure
+        "iframe" -> pIframe
+        "style" -> pRawHtmlBlock
+        "textarea" -> pRawHtmlBlock
+        "switch"
+          | epubExts
+          -> eSwitch B.para block
+        _ -> mzero
+    _ -> mzero)
+   <|> pPlain
+   <|> pRawHtmlBlock
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
@@ -256,9 +292,6 @@ eTOC = try $ do
   guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc"
   void (pInTags tag block)
 
-pList :: PandocMonad m => TagParser m Blocks
-pList = pBulletList <|> pOrderedList <|> pDefinitionList
-
 pBulletList :: PandocMonad m => TagParser m Blocks
 pBulletList = try $ do
   pSatisfy (matchTagOpen "ul" [])
@@ -369,13 +402,15 @@ pLineBlock = try $ do
             B.toList ils
   return $ B.lineBlock lns
 
+isDivLike :: Text -> Bool
+isDivLike "div"     = True
+isDivLike "section" = True
+isDivLike "main"    = True
+isDivLike _         = False
+
 pDiv :: PandocMonad m => TagParser m Blocks
 pDiv = try $ do
   guardEnabled Ext_native_divs
-  let isDivLike "div"     = True
-      isDivLike "section" = True
-      isDivLike "main"    = True
-      isDivLike _         = False
   TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
   let (ident, classes, kvs) = toAttr attr'
   contents <- pInTags tag block
@@ -544,31 +579,47 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = choice
-           [ eNoteref
-           , eSwitch id inline
-           , pTagText
-           , pQ
-           , pEmph
-           , pStrong
-           , pSuperscript
-           , pSubscript
-           , pSpanLike
-           , pSmall
-           , pStrikeout
-           , pUnderline
-           , pLineBreak
-           , pLink
-           , pImage
-           , pSvg
-           , pBdo
-           , pCode
-           , pCodeWithClass [("samp","sample"),("var","variable")]
-           , pSpan
-           , pMath False
-           , pScriptMath
-           , pRawHtmlInline
-           ]
+inline = do
+  exts <- getOption readerExtensions
+  tag <- lookAhead pAny
+  case tag of
+    TagOpen name attr ->
+      case name of
+        "a" | extensionEnabled Ext_epub_html_exts exts
+          , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr
+          , Just ('#',_) <- lookup "href" attr >>= T.uncons
+            -> eNoteref
+            | otherwise -> pLink
+        "switch" -> eSwitch id inline
+        "q" -> pQ
+        "em" -> pEmph
+        "i"  -> pEmph
+        "strong" -> pStrong
+        "b" -> pStrong
+        "sup" -> pSuperscript
+        "sub" -> pSubscript
+        "small" -> pSmall
+        "s" -> pStrikeout
+        "strike" -> pStrikeout
+        "del" -> pStrikeout
+        "u" -> pUnderline
+        "ins" -> pUnderline
+        "br" -> pLineBreak
+        "img" -> pImage
+        "svg" -> pSvg
+        "bdo" -> pBdo
+        "code" -> pCode
+        "samp" -> pCodeWithClass "samp" "sample"
+        "var" -> pCodeWithClass "var" "variable"
+        "span" -> pSpan
+        "math" -> pMath False
+        "script"
+          | Just x <- lookup "type" attr
+          , "math/tex" `T.isPrefixOf` x -> pScriptMath
+        _ | name `elem` htmlSpanLikeElements -> pSpanLike
+        _ -> pRawHtmlInline
+    TagText _ -> pTagText
+    _ -> pRawHtmlInline
 
 pSelfClosing :: PandocMonad m
              => (Text -> Bool) -> ([Attribute Text] -> Bool)
@@ -579,27 +630,25 @@ pSelfClosing f g = do
   return open
 
 pQ :: PandocMonad m => TagParser m Inlines
-pQ = choice $ map try [citedQuote, normalQuote]
-  where citedQuote = do
-          tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
-
-          url <- canonicalizeUrl $ fromAttrib "cite" tag
-          let uid = fromMaybe (fromAttrib "name" tag) $
-                       maybeFromAttrib "id" tag
-          let cls = T.words $ fromAttrib "class" tag
-
-          makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
-        normalQuote = do
-          pSatisfy $ tagOpenLit "q" (const True)
-          makeQuote id
-        makeQuote wrapper = do
-          ctx <- asks quoteContext
-          let (constructor, innerContext) = case ctx of
-                        InDoubleQuote -> (B.singleQuoted, InSingleQuote)
-                        _             -> (B.doubleQuoted, InDoubleQuote)
-
-          content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q"))
-          return $ extractSpaces (constructor . wrapper) content
+pQ = do
+  TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True)
+  case lookup "cite" attrs of
+    Just url -> do
+      let uid = fromMaybe mempty $
+                   lookup "name" attrs <> lookup "id" attrs
+      let cls = maybe [] T.words $ lookup "class" attrs
+      url' <- canonicalizeUrl url
+      makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')])
+    Nothing -> makeQuote id
+ where
+  makeQuote wrapper = do
+    ctx <- asks quoteContext
+    let (constructor, innerContext) = case ctx of
+                  InDoubleQuote -> (B.singleQuoted, InSingleQuote)
+                  _             -> (B.doubleQuoted, InDoubleQuote)
+    content <- withQuoteContext innerContext
+                  (mconcat <$> manyTill inline (pCloses "q"))
+    return $ extractSpaces (constructor . wrapper) content
 
 pEmph :: PandocMonad m => TagParser m Inlines
 pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph
@@ -690,13 +739,12 @@ pSvg = do
                    UTF8.toText (encode $ UTF8.fromText rawText)
   return $ B.imageWith (ident,cls,[]) svgData mempty mempty
 
-pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
-  let tagTest = flip elem . fmap fst $ elemToClass
-  TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
+pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines
+pCodeWithClass name class' = try $ do
+  TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True)
   result <- manyTill pAny (pCloses open)
   let (ids,cs,kvs) = toAttr attr'
-      cs'          = maybe cs (:cs) . lookup open $ elemToClass
+      cs'          = class' : cs
   return . B.codeWith (ids,cs',kvs) .
     T.unwords . T.lines . innerText $ result
 
-- 
cgit v1.2.3


From 0f955b10b455e9b3d326262d03261f17538a6943 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 18:57:21 -0800
Subject: T.P.Readers.LaTeX.Parsing: improve braced'.

Remove the parameter, have it parse the opening brace,
and make it more efficient.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 313aa6c51..dab4d334e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -680,28 +680,25 @@ grouped parser = try $ do
   -- {{a,b}} should be parsed the same as {a,b}
   try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup)
 
-braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok]
-braced' getTok n =
-  handleEgroup <|> handleBgroup <|> handleOther
-  where handleEgroup = do
-          t <- symbol '}'
-          if n == 1
-             then return []
-             else (t:) <$> braced' getTok (n - 1)
-        handleBgroup = do
-          t <- symbol '{'
-          (t:) <$> braced' getTok (n + 1)
-        handleOther = do
-          t <- getTok
-          (t:) <$> braced' getTok n
+braced' :: PandocMonad m => LP m Tok -> LP m [Tok]
+braced' getTok = symbol '{' *> go (1 :: Int)
+ where
+  go n = do
+    t <- getTok
+    case t of
+      Tok _ Symbol "}"
+        | n > 1     -> (t:) <$> go (n - 1)
+        | otherwise -> return []
+      Tok _ Symbol "{" -> (t:) <$> go (n + 1)
+      _ -> (t:) <$> go n
 
 braced :: PandocMonad m => LP m [Tok]
-braced = symbol '{' *> braced' anyTok 1
+braced = braced' anyTok
 
 -- URLs require special handling, because they can contain %
 -- characters.  So we retonenize comments as we go...
 bracedUrl :: PandocMonad m => LP m [Tok]
-bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1
+bracedUrl = braced' (retokenizeComment >> anyTok)
 
 -- For handling URLs, which allow literal % characters...
 retokenizeComment :: PandocMonad m => LP m ()
-- 
cgit v1.2.3


From 31b8f60ea82d96b370cf4a765c46c18004ff6fa8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:03:29 -0800
Subject: LaTeX reader:  avoid macro resolution code if no macros defined.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index dab4d334e..fc8542894 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -453,20 +453,24 @@ doMacros = do
       updateState $ \st -> st{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp =
-  case inp of
-     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos name ts
-     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-      Tok _ Word name : Tok _ Symbol "}" : ts
-        -> handleMacros n spos ("end" <> name) ts
-     Tok _ (CtrlSeq "expandafter") _ : t : ts
-        -> combineTok t <$> doMacros' n ts
-     Tok spos (CtrlSeq name) _ : ts
-        -> handleMacros n spos name ts
-     _ -> return inp
-   <|> return inp
+doMacros' n inp = do
+  macros <- sMacros <$> getState
+  if M.null macros
+     then return inp
+     else
+       case inp of
+          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos name ts
+          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+           Tok _ Word name : Tok _ Symbol "}" : ts
+             -> handleMacros macros n spos ("end" <> name) ts
+          Tok _ (CtrlSeq "expandafter") _ : t : ts
+             -> combineTok t <$> doMacros' n ts
+          Tok spos (CtrlSeq name) _ : ts
+             -> handleMacros macros n spos name ts
+          _ -> return inp
+        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -507,10 +511,9 @@ doMacros' n inp =
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros n' spos name ts = do
+    handleMacros macros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
-      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From cec541e54cd947c8032f9148db18104cd1a48783 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:14:31 -0800
Subject: LaTeX reader: Another small improvement to macro handling.

---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index fc8542894..953747d2f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -461,16 +461,15 @@ doMacros' n inp = do
        case inp of
           Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
            Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts
+             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
           Tok _ (CtrlSeq "expandafter") _ : t : ts
              -> combineTok t <$> doMacros' n ts
           Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts
+             -> handleMacros macros n spos name ts <|> return inp
           _ -> return inp
-        <|> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
-- 
cgit v1.2.3


From 321343b2cf8a4a75abe1b6713aa40e278ca57997 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 22:49:20 -0800
Subject: HTML reader: small efficiency improvements.

Also, remove exported class NamedTag(..) [API change].
This was just intended to smooth over the transition from String to Text
and is no longer needed.

The functions isInlineTag and isBlockTag are no longer
polymorphic.
---
 src/Text/Pandoc/Readers/HTML.hs | 43 +++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 47856d2f7..50201fe77 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -19,7 +19,6 @@ module Text.Pandoc.Readers.HTML ( readHtml
                                 , htmlInBalanced
                                 , isInlineTag
                                 , isBlockTag
-                                , NamedTag(..)
                                 , isTextTag
                                 , isCommentTag
                                 ) where
@@ -579,9 +578,9 @@ tagToText (TagOpen "br" _) = "\n"
 tagToText _                = ""
 
 inline :: PandocMonad m => TagParser m Inlines
-inline = do
+inline = pTagText <|> do
+  tag <- lookAhead (pSatisfy isInlineTag)
   exts <- getOption readerExtensions
-  tag <- lookAhead pAny
   case tag of
     TagOpen name attr ->
       case name of
@@ -935,27 +934,21 @@ pSpace = many1 (satisfy isSpace) >>= \xs ->
                then return B.softbreak
                else return B.space
 
-class NamedTag a where
-  getTagName :: a -> Maybe Text
-
-instance NamedTag (Tag Text) where
-  getTagName (TagOpen t _) = Just t
-  getTagName (TagClose t)  = Just t
-  getTagName _             = Nothing
+getTagName :: Tag Text -> Maybe Text
+getTagName (TagOpen t _) = Just t
+getTagName (TagClose t)  = Just t
+getTagName _             = Nothing
 
-instance NamedTag (Tag String) where
-  getTagName (TagOpen t _) = Just (T.pack t)
-  getTagName (TagClose t)  = Just (T.pack t)
-  getTagName _             = Nothing
-
-isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
+isInlineTag :: Tag Text -> Bool
 isInlineTag t =
-  isCommentTag t || case getTagName t of
-                           Nothing  -> False
-                           Just x   -> x `Set.notMember` blockTags ||
-                                       T.take 1 x == "?" -- processing instr.
-
-isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
+  isCommentTag t ||
+  case getTagName t of
+    Nothing  -> False
+    Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+    Just x   -> x `Set.notMember` blockTags ||
+                T.take 1 x == "?" -- processing instr.
+
+isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
                  where isBlockTagName =
                          case getTagName t of
@@ -966,10 +959,10 @@ isBlockTag t = isBlockTagName || isTagComment t
                                     || x `Set.member` eitherBlockOrInline
                               Nothing -> False
 
-isTextTag :: Tag a -> Bool
+isTextTag :: Tag Text -> Bool
 isTextTag = tagText (const True)
 
-isCommentTag :: Tag a -> Bool
+isCommentTag :: Tag Text -> Bool
 isCommentTag = tagComment (const True)
 
 --- parsers for use in markdown, textile readers
@@ -1018,7 +1011,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
         go n (t:ts') = (t :) <$> go n ts'
         go _ [] = mzero
 
-hasTagWarning :: [Tag a] -> Bool
+hasTagWarning :: [Tag Text] -> Bool
 hasTagWarning (TagWarning _:_) = True
 hasTagWarning _                = False
 
-- 
cgit v1.2.3


From d8ef383692a167c97c67114107878a60d0aee6e1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 23:01:07 -0800
Subject: T.P.Shared: remove some obsolete functions [API change].

Removed:

- `splitByIndices`
- `splitStringByIndicies`
- `substitute`
- `underlineSpan`

None of these are used elsewhere in the code base.
---
 src/Text/Pandoc/Shared.hs | 44 +-------------------------------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index a579681b1..922df7922 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -21,10 +21,7 @@ module Text.Pandoc.Shared (
                      -- * List processing
                      splitBy,
                      splitTextBy,
-                     splitByIndices,
-                     splitStringByIndices,
                      splitTextByIndices,
-                     substitute,
                      ordNub,
                      findM,
                      -- * Text processing
@@ -74,7 +71,6 @@ module Text.Pandoc.Shared (
                      addMetaField,
                      makeMeta,
                      eastAsianLineBreakFilter,
-                     underlineSpan,
                      htmlSpanLikeElements,
                      splitSentences,
                      filterIpynbOutput,
@@ -113,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, stripPrefix, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn)
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -160,27 +156,6 @@ splitTextBy isSep t
   | otherwise = let (first, rest) = T.break isSep t
                 in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
-{-# DEPRECATED splitByIndices "This function is slated for removal" #-}
-splitByIndices :: [Int] -> [a] -> [[a]]
-splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
-  where (first, rest) = splitAt x lst
-
-{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-}
--- | Split string into chunks divided at specified indices.
-splitStringByIndices :: [Int] -> [Char] -> [[Char]]
-splitStringByIndices [] lst = [lst]
-splitStringByIndices (x:xs) lst =
-  let (first, rest) = splitAt' x lst in
-  first : splitStringByIndices (map (\y -> y - x) xs) rest
-
--- DEPRECATED: can be removed when splitStringByIndices is
-splitAt' :: Int -> [Char] -> ([Char],[Char])
-splitAt' _ []          = ([],[])
-splitAt' n xs | n <= 0 = ([],xs)
-splitAt' n (x:xs)      = (x:ys,zs)
-  where (ys,zs) = splitAt' (n - charWidth x) xs
-
 splitTextByIndices :: [Int] -> T.Text -> [T.Text]
 splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
  where
@@ -189,16 +164,6 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
     let (first, rest) = T.splitAt x t
      in first : splitTextByRelIndices xs rest
 
-{-# DEPRECATED substitute "This function is slated for removal" #-}
--- | Replace each occurrence of one sublist in a list with another.
-substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
-substitute _ _ [] = []
-substitute [] _ xs = xs
-substitute target replacement lst@(x:xs) =
-    case stripPrefix target lst of
-      Just lst' -> replacement ++ substitute target replacement lst'
-      Nothing   -> x : substitute target replacement xs
-
 ordNub :: (Ord a) => [a] -> [a]
 ordNub l = go Set.empty l
   where
@@ -765,13 +730,6 @@ eastAsianLineBreakFilter = bottomUp go
         go xs
           = xs
 
-{-# DEPRECATED underlineSpan "Use Text.Pandoc.Builder.underline instead" #-}
--- | Builder for underline (deprecated).
--- This probably belongs in Builder.hs in pandoc-types.
--- Will be replaced once Underline is an element.
-underlineSpan :: Inlines -> Inlines
-underlineSpan = B.underline
-
 -- | Set of HTML elements that are represented as Span with a class equal as
 -- the element tag itself.
 htmlSpanLikeElements :: Set.Set T.Text
-- 
cgit v1.2.3


From c0c8865eaac7089b0304c9b21d981f82ea4c2ebd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Feb 2021 23:40:02 -0800
Subject: HTML reader: small performance tweak.

---
 src/Text/Pandoc/Readers/HTML.hs | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 50201fe77..b73c138ab 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -157,11 +157,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
           return mempty
 
 block :: PandocMonad m => TagParser m Blocks
-block = do
+block = ((do
+  tag <- lookAhead (pSatisfy isBlockTag)
   exts <- getOption readerExtensions
-  tag <- lookAhead pAny
-  res <-
-   (case tag of
+  case tag of
     TagOpen name attr ->
       let type' = fromMaybe "" $
                      lookup "type" attr <|> lookup "epub:type" attr
@@ -214,11 +213,8 @@ block = do
           | epubExts
           -> eSwitch B.para block
         _ -> mzero
-    _ -> mzero)
-   <|> pPlain
-   <|> pRawHtmlBlock
-  trace (T.take 60 $ tshow $ B.toList res)
-  return res
+    _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res ->
+        res <$ trace (T.take 60 $ tshow $ B.toList res)
 
 namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
 namespaces = [(mathMLNamespace, pMath True)]
-- 
cgit v1.2.3


From f43cb5ddcf56ab9387b24ad55c2c30eceb606fad Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 10:26:48 -0800
Subject: LaTeX reader: further performance optimization.

Avoid unnecessary 'doMacros'.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 42 +++++++++++++++-----------------
 1 file changed, 19 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 953747d2f..20311651b 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -446,30 +446,25 @@ satisfyTok f = do
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
-  expanded <- sExpanded <$> getState
-  verbatimMode <- sVerbatimMode <$> getState
-  unless (expanded || verbatimMode) $ do
-      getInput >>= doMacros' 1 >>= setInput
-      updateState $ \st -> st{ sExpanded = True }
+  st <- getState
+  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+    getInput >>= doMacros' 1 >>= setInput
+    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
-doMacros' n inp = do
-  macros <- sMacros <$> getState
-  if M.null macros
-     then return inp
-     else
-       case inp of
-          Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
-           Tok _ Word name : Tok _ Symbol "}" : ts
-             -> handleMacros macros n spos ("end" <> name) ts <|> return inp
-          Tok _ (CtrlSeq "expandafter") _ : t : ts
-             -> combineTok t <$> doMacros' n ts
-          Tok spos (CtrlSeq name) _ : ts
-             -> handleMacros macros n spos name ts <|> return inp
-          _ -> return inp
+doMacros' n inp =
+  case inp of
+     Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos name ts <|> return inp
+     Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" :
+      Tok _ Word name : Tok _ Symbol "}" : ts
+        -> handleMacros n spos ("end" <> name) ts <|> return inp
+     Tok _ (CtrlSeq "expandafter") _ : t : ts
+        -> combineTok t <$> doMacros' n ts
+     Tok spos (CtrlSeq name) _ : ts
+        -> handleMacros n spos name ts <|> return inp
+     _ -> return inp
 
   where
     combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts)
@@ -510,9 +505,10 @@ doMacros' n inp = do
         Tok spos (CtrlSeq x) (txt <> " ") : acc
     addTok _ _ spos t acc = setpos spos t : acc
 
-    handleMacros macros n' spos name ts = do
+    handleMacros n' spos name ts = do
       when (n' > 20)  -- detect macro expansion loops
         $ throwError $ PandocMacroLoop name
+      macros <- sMacros <$> getState
       case M.lookup name macros of
            Nothing -> mzero
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
-- 
cgit v1.2.3


From db4f8823152578d199691f8084fddcc0f04f679b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:23:04 -0800
Subject: LaTeX reader: removed sExpanded in state.

This isn't actually needed and checking it doesn't change
anything.

Also remove an unnecessary `doMacros` before `satisfyTok`,
which does it anyway.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 20311651b..0e098d9d9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -155,7 +155,6 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sLabels        :: M.Map Text [Inline]
                             , sHasChapters   :: Bool
                             , sToggles       :: M.Map Text Bool
-                            , sExpanded      :: Bool
                             , sFileContents  :: M.Map Text Text
                             , sEnableWithRaw :: Bool
                             , sRawTokens     :: IntMap.IntMap [Tok]
@@ -183,7 +182,6 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sLabels        = M.empty
                               , sHasChapters   = False
                               , sToggles       = M.empty
-                              , sExpanded      = False
                               , sFileContents  = M.empty
                               , sEnableWithRaw = True
                               , sRawTokens     = IntMap.empty
@@ -256,7 +254,6 @@ rawLaTeXParser toks retokenize parser valParser = do
        Right toks' -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
-                                        doMacros
                                         ts <- many (satisfyTok (const True))
                                         setInput ts
                                       rawparser)
@@ -432,8 +429,7 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sExpanded = False
-                           , sRawTokens =
+    updateState $ \st -> st{ sRawTokens =
                               if sEnableWithRaw st
                                  then IntMap.map (res:) $ sRawTokens st
                                  else sRawTokens st }
@@ -447,9 +443,8 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st || M.null (sMacros st)) $ do
     getInput >>= doMacros' 1 >>= setInput
-    updateState $ \s -> s{ sExpanded = True }
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
 doMacros' n inp =
-- 
cgit v1.2.3


From 2b37ed9f212b711a6381dc89ff7d3431d7c5d916 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Feb 2021 11:29:38 -0800
Subject: LaTeX reader: further optimizations in satisfyTok.

Benchmarks show 2/3 of the run time and 2/3 of the allocation
of the Feb. 10 benchmarks.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 0e098d9d9..c2e10570d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -429,11 +429,11 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok
 satisfyTok f = do
     doMacros -- apply macros on remaining input stream
     res <- tokenPrim (T.unpack . untoken) updatePos matcher
-    updateState $ \st -> st{ sRawTokens =
-                              if sEnableWithRaw st
-                                 then IntMap.map (res:) $ sRawTokens st
-                                 else sRawTokens st }
-    return res
+    updateState $ \st ->
+      if sEnableWithRaw st
+         then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st }
+         else st
+    return $! res
   where matcher t | f t       = Just t
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
-- 
cgit v1.2.3


From 80fde18fb1d983b938476ed5b3771ed5d6158d44 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 11:30:07 -0800
Subject: Text.Pandoc.UTF8: change IO functions to return Text, not String.

[API change] This affects `readFile`, `getContents`, `writeFileWith`,
`writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`.
`hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`.

This avoids the need to uselessly create a linked list of characters
when emiting output.
---
 src/Text/Pandoc/App.hs                    | 10 ++++----
 src/Text/Pandoc/App/CommandLineOptions.hs | 38 +++++++++++++++-------------
 src/Text/Pandoc/App/OutputSettings.hs     |  4 +--
 src/Text/Pandoc/Class/IO.hs               |  6 ++---
 src/Text/Pandoc/Error.hs                  |  2 +-
 src/Text/Pandoc/PDF.hs                    | 25 +++++++++---------
 src/Text/Pandoc/Parsing.hs                |  2 +-
 src/Text/Pandoc/UTF8.hs                   | 42 +++++++++++++++----------------
 test/Tests/Command.hs                     |  2 +-
 test/Tests/Old.hs                         |  5 ++--
 test/Tests/Readers/LaTeX.hs               |  2 +-
 11 files changed, 70 insertions(+), 68 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6a071ad5a..63996828e 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -73,8 +73,9 @@ convertWithOpts opts = do
   let verbosity = optVerbosity opts
 
   when (optDumpArgs opts) $
-    do UTF8.hPutStrLn stdout outputFile
-       mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts)
+    do UTF8.hPutStrLn stdout (T.pack outputFile)
+       mapM_ (UTF8.hPutStrLn stdout . T.pack)
+             (fromMaybe ["-"] $ optInputFiles opts)
        exitSuccess
 
   let sources = case optInputFiles opts of
@@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr
 writeFnBinary f   = liftIO . BL.writeFile (UTF8.encodePath f)
 
 writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
--- TODO this implementation isn't maximally efficient:
-writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack
-writerFn eol f   = liftIO . UTF8.writeFileWith eol f . T.unpack
+writerFn eol "-" = liftIO . UTF8.putStrWith eol
+writerFn eol f   = liftIO . UTF8.writeFileWith eol f
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index a4c510d97..0a8193f6c 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -812,10 +812,10 @@ options =
                            map (\c -> ['-',c]) shorts ++
                            map ("--" ++) longs
                      let allopts = unwords (concatMap optnames options)
-                     UTF8.hPutStrLn stdout $ printf tpl allopts
-                         (unwords readersNames)
-                         (unwords writersNames)
-                         (unwords $ map (T.unpack . fst) highlightingStyles)
+                     UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts
+                         (T.unpack $ T.unwords readersNames)
+                         (T.unpack $ T.unwords writersNames)
+                         (T.unpack $ T.unwords $ map fst highlightingStyles)
                          (unwords datafiles)
                      exitSuccess ))
                  "" -- "Print bash completion script"
@@ -854,7 +854,7 @@ options =
                                else if extensionEnabled x allExts
                                        then '-'
                                        else ' ') : drop 4 (show x)
-                     mapM_ (UTF8.hPutStrLn stdout . showExt)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt)
                        [ex | ex <- extList, extensionEnabled ex allExts]
                      exitSuccess )
                   "FORMAT")
@@ -868,14 +868,14 @@ options =
                                  , sShortname s `notElem`
                                     [T.pack "Alert", T.pack "Alert_indent"]
                                  ]
-                     mapM_ (UTF8.hPutStrLn stdout) (sort langs)
+                     mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs)
                      exitSuccess ))
                  ""
 
     , Option "" ["list-highlight-styles"]
                  (NoArg
                   (\_ -> do
-                     mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles
+                     mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles
                      exitSuccess ))
                  ""
 
@@ -893,7 +893,7 @@ options =
                             | T.null t -> -- e.g. for docx, odt, json:
                                 E.throwIO $ PandocCouldNotFindDataFileError $ T.pack
                                   ("templates/default." ++ arg)
-                            | otherwise -> write . T.unpack $ t
+                            | otherwise -> write t
                           Left e  -> E.throwIO e
                      exitSuccess)
                   "FORMAT")
@@ -940,11 +940,13 @@ options =
                   (\_ -> do
                      prg <- getProgName
                      defaultDatadirs <- defaultUserDataDirs
-                     UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++
-                       compileInfo ++
-                       "\nUser data directory: " ++
-                       intercalate " or " defaultDatadirs ++
-                       ('\n':copyrightMessage))
+                     UTF8.hPutStrLn stdout
+                      $ T.pack
+                      $ prg ++ " " ++ T.unpack pandocVersion ++
+                        compileInfo ++
+                        "\nUser data directory: " ++
+                        intercalate " or " defaultDatadirs ++
+                        ('\n':copyrightMessage)
                      exitSuccess ))
                  "" -- "Print version"
 
@@ -952,7 +954,7 @@ options =
                  (NoArg
                   (\_ -> do
                      prg <- getProgName
-                     UTF8.hPutStr stdout (usageMessage prg options)
+                     UTF8.hPutStr stdout (T.pack $ usageMessage prg options)
                      exitSuccess ))
                  "" -- "Show help"
     ]
@@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw"
 handleUnrecognizedOption x =
   (("Unknown option " ++ x ++ ".") :)
 
-readersNames :: [String]
-readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)]))
+readersNames :: [Text]
+readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)]))
 
-writersNames :: [String]
+writersNames :: [Text]
 writersNames = sort
-  ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)]))
+  ("pdf" : map fst (writers :: [(Text, Writer PandocIO)]))
 
 splitField :: String -> (String, String)
 splitField = second (tailDef "true") . break (`elemText` ":=")
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 53c7d82ef..3864ab188 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -59,8 +59,8 @@ optToOutputSettings opts = do
   let outputFile = fromMaybe "-" (optOutputFile opts)
 
   when (optDumpArgs opts) . liftIO $ do
-    UTF8.hPutStrLn stdout outputFile
-    mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts)
+    UTF8.hPutStrLn stdout (T.pack outputFile)
+    mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts)
     exitSuccess
 
   epubMetadata <- traverse readUtf8File $ optEpubMetadata opts
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index eecda5711..bb4e2b732 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime
 logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m ()
 logOutput msg = liftIO $ do
   UTF8.hPutStr stderr $
-      "[" ++ show (messageVerbosity msg) ++ "] "
+      "[" <> T.pack (show (messageVerbosity msg)) <> "] "
   alertIndent $ T.lines $ showLogMessage msg
 
 -- | Prints the list of lines to @stderr@, indenting every but the first
@@ -191,10 +191,10 @@ logOutput msg = liftIO $ do
 alertIndent :: [Text] -> IO ()
 alertIndent [] = return ()
 alertIndent (l:ls) = do
-  UTF8.hPutStrLn stderr $ unpack l
+  UTF8.hPutStrLn stderr l
   mapM_ go ls
   where go l' = do UTF8.hPutStr stderr "  "
-                   UTF8.hPutStrLn stderr $ unpack l'
+                   UTF8.hPutStrLn stderr l'
 
 -- | Extract media from the mediabag into a directory.
 extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 94c013cdb..0fdb7bfe5 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -191,6 +191,6 @@ handleError (Left e) =
 
 err :: Int -> Text -> IO a
 err exitCode msg = do
-  UTF8.hPutStrLn stderr (T.unpack msg)
+  UTF8.hPutStrLn stderr msg
   exitWith $ ExitFailure exitCode
   return undefined
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 3f9dd8dad..6f462aad5 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do
         | isAscii c   = T.singleton c
         | otherwise   = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")"
   let addCodePoint = T.concatMap toCodePoint
-  let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l))
+  let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l))
                  | l <- ls
                  , isMissingCharacterWarning l
                  ]
@@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do
     env <- liftIO getEnvironment
     when (verbosity >= INFO) $ liftIO $
       showVerboseInfo (Just tmpDir) program programArgs env
-         (utf8ToString sourceBL)
+         (utf8ToText sourceBL)
     (exit, out) <- liftIO $ E.catch
       (pipeProcess (Just env) program programArgs sourceBL)
       (handlePDFProgramNotFound program)
@@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do
             (pipeProcess (Just env'') program programArgs BL.empty)
             (handlePDFProgramNotFound program)
           when (verbosity >= INFO) $ liftIO $ do
-            UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber
+            UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber
             BL.hPutStr stderr out
             UTF8.hPutStr stderr "\n"
           if runNumber < numRuns
@@ -405,7 +405,7 @@ generic2pdf :: Verbosity
 generic2pdf verbosity program args source = do
   env' <- getEnvironment
   when (verbosity >= INFO) $
-    showVerboseInfo Nothing program args env' (T.unpack source)
+    showVerboseInfo Nothing program args env' source
   (exit, out) <- E.catch
     (pipeProcess (Just env') program args
                      (BL.fromStrict $ UTF8.fromText source))
@@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath
                 -> String
                 -> [String]
                 -> [(String, String)]
-                -> String
+                -> Text
                 -> IO ()
 showVerboseInfo mbTmpDir program programArgs env source = do
   case mbTmpDir of
     Just tmpDir -> do
       UTF8.hPutStrLn stderr "[makePDF] temp dir:"
-      UTF8.hPutStrLn stderr tmpDir
+      UTF8.hPutStrLn stderr (T.pack tmpDir)
     Nothing -> return ()
   UTF8.hPutStrLn stderr "[makePDF] Command line:"
-  UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs)
+  UTF8.hPutStrLn stderr $
+       T.pack program <> " " <> T.pack (unwords (map show programArgs))
   UTF8.hPutStr stderr "\n"
   UTF8.hPutStrLn stderr "[makePDF] Environment:"
-  mapM_ (UTF8.hPutStrLn stderr . show) env
+  mapM_ (UTF8.hPutStrLn stderr . tshow) env
   UTF8.hPutStr stderr "\n"
   UTF8.hPutStrLn stderr "[makePDF] Source:"
   UTF8.hPutStrLn stderr source
@@ -517,8 +518,8 @@ handlePDFProgramNotFound program e
       E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program
   | otherwise = E.throwIO e
 
-utf8ToString :: ByteString -> String
-utf8ToString lbs =
+utf8ToText :: ByteString -> Text
+utf8ToText lbs =
   case decodeUtf8' lbs of
-    Left _  -> BC.unpack lbs  -- if decoding fails, treat as latin1
-    Right t -> TL.unpack t
+    Left _  -> T.pack $ BC.unpack lbs  -- if decoding fails, treat as latin1
+    Right t -> TL.toStrict t
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 4c4dd531d..8d3799c3e 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1148,7 +1148,7 @@ testStringWith :: Show a
                => ParserT Text ParserState Identity a
                -> Text
                -> IO ()
-testStringWith parser str = UTF8.putStrLn $ show $
+testStringWith parser str = UTF8.putStrLn $ tshow $
                             readWith parser defaultParserState str
 
 -- | Parsing options.
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 567f5abe5..4d5921faf 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -39,67 +39,65 @@ where
 
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Lazy.Char8 as BL
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.IO as TIO
 import qualified Data.Text.Encoding as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TL
 import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile)
 import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr,
                   putStrLn, readFile, writeFile)
-import qualified System.IO as IO
 
-readFile :: FilePath -> IO String
+readFile :: FilePath -> IO Text
 readFile f = do
   h <- openFile (encodePath f) ReadMode
   hGetContents h
 
-getContents :: IO String
+getContents :: IO Text
 getContents = hGetContents stdin
 
-writeFileWith :: Newline -> FilePath -> String -> IO ()
+writeFileWith :: Newline -> FilePath -> Text -> IO ()
 writeFileWith eol f s =
   withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s
 
-writeFile :: FilePath -> String -> IO ()
+writeFile :: FilePath -> Text -> IO ()
 writeFile = writeFileWith nativeNewline
 
-putStrWith :: Newline -> String -> IO ()
+putStrWith :: Newline -> Text -> IO ()
 putStrWith eol s = hPutStrWith eol stdout s
 
-putStr :: String -> IO ()
+putStr :: Text -> IO ()
 putStr = putStrWith nativeNewline
 
-putStrLnWith :: Newline -> String -> IO ()
+putStrLnWith :: Newline -> Text -> IO ()
 putStrLnWith eol s = hPutStrLnWith eol stdout s
 
-putStrLn :: String -> IO ()
+putStrLn :: Text -> IO ()
 putStrLn = putStrLnWith nativeNewline
 
-hPutStrWith :: Newline -> Handle -> String -> IO ()
+hPutStrWith :: Newline -> Handle -> Text -> IO ()
 hPutStrWith eol h s =
   hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStr h s
+  hSetEncoding h utf8 >> TIO.hPutStr h s
 
-hPutStr :: Handle -> String -> IO ()
+hPutStr :: Handle -> Text -> IO ()
 hPutStr = hPutStrWith nativeNewline
 
-hPutStrLnWith :: Newline -> Handle -> String -> IO ()
+hPutStrLnWith :: Newline -> Handle -> Text -> IO ()
 hPutStrLnWith eol h s =
   hSetNewlineMode h (NewlineMode eol eol) >>
-  hSetEncoding h utf8 >> IO.hPutStrLn h s
+  hSetEncoding h utf8 >> TIO.hPutStrLn h s
 
-hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn :: Handle -> Text -> IO ()
 hPutStrLn = hPutStrLnWith nativeNewline
 
-hGetContents :: Handle -> IO String
-hGetContents = fmap toString . B.hGetContents
--- hGetContents h = hSetEncoding h utf8_bom
---                   >> hSetNewlineMode h universalNewlineMode
---                   >> IO.hGetContents h
+hGetContents :: Handle -> IO Text
+hGetContents = fmap toText . B.hGetContents
 
 -- | Convert UTF8-encoded ByteString to Text, also
 -- removing '\r' characters.
-toText :: B.ByteString -> T.Text
+toText :: B.ByteString -> Text
 toText = T.decodeUtf8 . filterCRs . dropBOM
   where dropBOM bs =
          if "\xEF\xBB\xBF" `B.isPrefixOf` bs
@@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM
 toStringLazy :: BL.ByteString -> String
 toStringLazy = TL.unpack . toTextLazy
 
-fromText :: T.Text -> B.ByteString
+fromText :: Text -> B.ByteString
 fromText = T.encodeUtf8
 
 fromTextLazy :: TL.Text -> BL.ByteString
diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs
index 07d825f73..59b04eac1 100644
--- a/test/Tests/Command.hs
+++ b/test/Tests/Command.hs
@@ -130,7 +130,7 @@ runCommandTest testExePath fp num code =
     let cmdline = "% " <> cmd
     let x = cmdline <> "\n" <> input <> "^D\n" <> norm
     let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm
-    let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw)
+    let updated = T.replace (T.pack x) (T.pack y) raw
     UTF8.writeFile fp' updated
 
 extractCommandTest :: FilePath -> FilePath -> TestTree
diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs
index 17ece49fd..160086be2 100644
--- a/test/Tests/Old.hs
+++ b/test/Tests/Old.hs
@@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup)
 import Test.Tasty.Golden.Advanced (goldenTest)
 import Tests.Helpers hiding (test)
 import qualified Text.Pandoc.UTF8 as UTF8
+import qualified Data.Text as T
 
 tests :: FilePath -> [TestTree]
 tests pandocPath =
@@ -231,7 +232,7 @@ tests pandocPath =
 -- makes sure file is fully closed after reading
 readFile' :: FilePath -> IO String
 readFile' f = do s <- UTF8.readFile f
-                 return $! (length s `seq` s)
+                 return $! (T.length s `seq` T.unpack s)
 
 lhsWriterTests :: FilePath -> String -> [TestTree]
 lhsWriterTests pandocPath format
@@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm =
                              $ UTF8.toStringLazy out
                    -- filter \r so the tests will work on Windows machines
                  else fail $ "Pandoc failed with error code " ++ show ec
-        updateGolden = UTF8.writeFile norm
+        updateGolden = UTF8.writeFile norm . T.pack
         options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts
 
 compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String)
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 77104c853..8385b751e 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t
 tests :: [TestTree]
 tests = [ testGroup "tokenization"
           [ testCase "tokenizer round trip on test case" $ do
-                 orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex"
+                 orig <- UTF8.readFile "../test/latex-reader.latex"
                  let new = untokenize $ tokenize "../test/latex-reader.latex"
                              orig
                  assertEqual "untokenize . tokenize is identity" orig new
-- 
cgit v1.2.3


From 4617f229ea051fea50bce6307fe8221b246a23fe Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 13:28:47 -0800
Subject: Text.Pandoc.MIME: add exported function getCharset.

[API change]
---
 src/Text/Pandoc/MIME.hs | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 53c5cd018..3d06e1579 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -10,8 +10,13 @@
 
 Mime type lookup.
 -}
-module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
-                          extensionFromMimeType, mediaCategory ) where
+module Text.Pandoc.MIME (
+  MimeType,
+  getMimeType,
+  getMimeTypeDef,
+  getCharset,
+  extensionFromMimeType,
+  mediaCategory ) where
 import Data.List (isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
 import qualified Data.Text as T
@@ -54,6 +59,14 @@ reverseMimeTypes = M.fromList $ map swap mimeTypesList
 mimeTypes :: M.Map T.Text MimeType
 mimeTypes = M.fromList mimeTypesList
 
+-- | Get the charset from a mime type, if one is present.
+getCharset :: MimeType -> Maybe T.Text
+getCharset mt =
+  let (_,y) = T.breakOn "charset=" mt
+   in if T.null y
+         then Nothing
+         else Just $ T.toUpper $ T.takeWhile (/= ';') $ T.drop 8 y
+
 -- | Collection of common mime types.
 -- Except for first entry, list borrowed from
 -- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server>
-- 
cgit v1.2.3


From bafccd5aa2dc977e5e49b67c587e1507dd73417c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 13:59:38 -0800
Subject: T.P.Error: Add PandocUnsupportedCharsetError constructor...

...for PandocError.  [API change]
---
 MANUAL.txt               | 1 +
 src/Text/Pandoc/Error.hs | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 51898f224..ba031bcb2 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1477,6 +1477,7 @@ Nonzero exit codes have the following meanings:
    91 PandocMacroLoop
    92 PandocUTF8DecodingError
    93 PandocIpynbDecodingError
+   94 PandocUnsupportedCharsetError
    97 PandocCouldNotFindDataFileError
    99 PandocResourceNotFound
 ----- ------------------------------------
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 0fdb7bfe5..8102f04cc 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -60,6 +60,7 @@ data PandocError = PandocIOError Text IOError
                  | PandocMacroLoop Text
                  | PandocUTF8DecodingError Text Int Word8
                  | PandocIpynbDecodingError Text
+                 | PandocUnsupportedCharsetError Text
                  | PandocUnknownReaderError Text
                  | PandocUnknownWriterError Text
                  | PandocUnsupportedExtensionError Text Text
@@ -124,6 +125,8 @@ renderError e =
       "The input must be a UTF-8 encoded text."
     PandocIpynbDecodingError w ->
       "ipynb decoding error: " <> w
+    PandocUnsupportedCharsetError charset ->
+      "Unsupported charset " <> charset
     PandocUnknownReaderError r ->
       "Unknown input format " <> r <>
       case r of
@@ -183,6 +186,7 @@ handleError (Left e) =
       PandocMacroLoop{} -> 91
       PandocUTF8DecodingError{} -> 92
       PandocIpynbDecodingError{} -> 93
+      PandocUnsupportedCharsetError{} -> 94
       PandocUnknownReaderError{} -> 21
       PandocUnknownWriterError{} -> 22
       PandocUnsupportedExtensionError{} -> 23
-- 
cgit v1.2.3


From 5a73c5d3f8136c7fba7429c3ae3a8ae31c58030b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 14:01:10 -0800
Subject: When downloading content from URL arguments, be sensitive to...

the character encoding.  We can properly handle UTF-8 and
latin1 (ISO-8859-1); for others we raise an error.
See #5600.
---
 src/Text/Pandoc/App.hs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 63996828e..59af029b5 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -28,6 +28,7 @@ import Control.Monad ( (>=>), when )
 import Control.Monad.Trans ( MonadIO(..) )
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString as BS
+import qualified Data.ByteString.Char8 as B8
 import qualified Data.ByteString.Lazy as BL
 import Data.Char (toLower)
 import Data.Maybe (fromMaybe, isJust, isNothing)
@@ -45,6 +46,7 @@ import System.FilePath ( takeBaseName, takeExtension )
 import System.IO (nativeNewline, stdout)
 import qualified System.IO as IO (Newline (..))
 import Text.Pandoc
+import Text.Pandoc.MIME (getCharset)
 import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
 import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
                             IpynbOutput (..) )
@@ -344,7 +346,13 @@ readSource src = case parseURI src of
                          _ -> PandocAppError (tshow e))
 
 readURI :: FilePath -> PandocIO Text
-readURI src = UTF8.toText . fst <$> openURL (T.pack src)
+readURI src = do
+  (bs, mt) <- openURL (T.pack src)
+  case mt >>= getCharset of
+    Just "UTF-8"      -> return $ UTF8.toText bs
+    Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
+    Just charset      -> throwError $ PandocUnsupportedCharsetError charset
+    Nothing           -> return $ UTF8.toText bs
 
 readFile' :: MonadIO m => FilePath -> m BL.ByteString
 readFile' "-" = liftIO BL.getContents
-- 
cgit v1.2.3


From d30791a38166538be60a134196f1d2675275017d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 14:17:22 -0800
Subject: Fall back to latin1 if UTF-8 decoding fails...

...when handling URL argument served with no charset in the mime type.
The assumption is that most pages that don't specify a charset
in the mime type are either UTF-8 or latin1.  I think that's a good
assumption, though I'm not sure.
---
 src/Text/Pandoc/App.hs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 59af029b5..40fb34834 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE CPP                 #-}
 {-# LANGUAGE ScopedTypeVariables #-}
@@ -352,7 +353,12 @@ readURI src = do
     Just "UTF-8"      -> return $ UTF8.toText bs
     Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
     Just charset      -> throwError $ PandocUnsupportedCharsetError charset
-    Nothing           -> return $ UTF8.toText bs
+    Nothing           -> liftIO $ -- try first as UTF-8, then as latin1
+                          E.catch (return $! UTF8.toText bs)
+                                  (\case
+                                      TSE.DecodeError{} ->
+                                        return $ T.pack $ B8.unpack bs
+                                      e -> E.throwIO e)
 
 readFile' :: MonadIO m => FilePath -> m BL.ByteString
 readFile' "-" = liftIO BL.getContents
-- 
cgit v1.2.3


From f0a991a22be9b82e192d63bf80bbe39679bb2e07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 22 Feb 2021 21:17:22 -0800
Subject: T.P.CSV: fix parsing of unquoted values.

Previously we didn't allow unescaped quotes in unquoted values,
but they are allowed. Closes #7112.
---
 src/Text/Pandoc/CSV.hs |  3 +--
 test/command/7112.md   | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 test/command/7112.md

(limited to 'src')

diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
index ec212fa9a..2bd21bcfb 100644
--- a/src/Text/Pandoc/CSV.hs
+++ b/src/Text/Pandoc/CSV.hs
@@ -68,8 +68,7 @@ escaped opts = try $
 
 pCSVUnquotedCell :: CSVOptions -> Parser Text
 pCSVUnquotedCell opts = T.pack <$>
-  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
-                  && c /= csvQuote opts))
+  many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'))
 
 pCSVDelim :: CSVOptions -> Parser ()
 pCSVDelim opts = do
diff --git a/test/command/7112.md b/test/command/7112.md
new file mode 100644
index 000000000..a75b9250a
--- /dev/null
+++ b/test/command/7112.md
@@ -0,0 +1,15 @@
+```
+% pandoc -f rst
+.. csv-table::
+
+   setting, ``echo PATH="path"``
+^D
+<table>
+<tbody>
+<tr class="odd">
+<td>setting</td>
+<td><code>echo PATH="path"</code></td>
+</tr>
+</tbody>
+</table>
+```
-- 
cgit v1.2.3


From fae6a204f1a072d5efe4dff35fd08fbad2071198 Mon Sep 17 00:00:00 2001
From: Salim B <salim@posteo.de>
Date: Sat, 27 Feb 2021 01:56:04 +0000
Subject: Fix/update URLs and use HTTP**S** where possible (#7122)

---
 CONTRIBUTING.md                     | 16 ++++++++--------
 COPYING.md                          |  2 +-
 COPYRIGHT                           |  6 +++---
 INSTALL.md                          | 29 ++++++++++++++---------------
 MANUAL.txt                          |  2 +-
 README.template                     | 10 +++++-----
 doc/lua-filters.md                  |  2 +-
 man/pandoc.1                        |  2 +-
 man/pandoc.1.after                  |  2 +-
 src/Text/Pandoc/Citeproc.hs         |  2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs  |  8 ++++----
 src/Text/Pandoc/Readers/Textile.hs  |  2 +-
 src/Text/Pandoc/Writers/Markdown.hs |  2 +-
 test/txt2tags.t2t                   |  2 +-
 trypandoc/index.html                |  2 +-
 windows/Makefile                    |  2 +-
 windows/pandoc.wxs                  |  4 ++--
 17 files changed, 47 insertions(+), 48 deletions(-)

(limited to 'src')

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e87c96694..595fb94ff 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -372,7 +372,7 @@ The code
 --------
 
 Pandoc has a publicly accessible git repository on
-GitHub: <http://github.com/jgm/pandoc>.  To get a local copy of the source:
+GitHub: <https://github.com/jgm/pandoc>.  To get a local copy of the source:
 
     git clone https://github.com/jgm/pandoc.git
 
@@ -382,7 +382,7 @@ the pandoc library is in `src/`, the source for the tests is in
 
 The modules `Text.Pandoc.Definition`, `Text.Pandoc.Builder`, and
 `Text.Pandoc.Generic` are in a separate library `pandoc-types`.  The code can
-be found in <http://github.com/jgm/pandoc-types>.
+be found in <https://github.com/jgm/pandoc-types>.
 
 To build pandoc, you will need a working installation of the
 [Haskell platform].
@@ -445,14 +445,14 @@ you may want to consider submitting a pull request to the
 [closed issues]: https://github.com/jgm/pandoc/issues?q=is%3Aissue+is%3Aclosed
 [latest released version]: https://github.com/jgm/pandoc/releases/latest
 [Nightly builds]: https://github.com/jgm/pandoc/actions?query=workflow%3ANightly
-[pandoc-discuss]: http://groups.google.com/group/pandoc-discuss
+[pandoc-discuss]: https://groups.google.com/group/pandoc-discuss
 [issue tracker]: https://github.com/jgm/pandoc/issues
-[User's Guide]: http://pandoc.org/MANUAL.html
-[FAQs]:  http://pandoc.org/faqs.html
-[EditorConfig]: http://editorconfig.org/
-[Haskell platform]: http://www.haskell.org/platform/
+[User's Guide]: https://pandoc.org/MANUAL.html
+[FAQs]:  https://pandoc.org/faqs.html
+[EditorConfig]: https://editorconfig.org/
+[Haskell platform]: https://www.haskell.org/platform/
 [hlint]: https://hackage.haskell.org/package/hlint
-[hsb2hs]: http://hackage.haskell.org/package/hsb2hs
+[hsb2hs]: https://hackage.haskell.org/package/hsb2hs
 [pre-commit hook]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
 [GitHub labels]: https://github.com/jgm/pandoc/labels
 [good first issue]:https://github.com/jgm/pandoc/labels/good%20first%20issue
diff --git a/COPYING.md b/COPYING.md
index af5153d8f..90ae12017 100644
--- a/COPYING.md
+++ b/COPYING.md
@@ -357,5 +357,5 @@ into proprietary programs. If your program is a subroutine library,
 you may consider it more useful to permit linking proprietary
 applications with the library. If this is what you want to do, use the
 [GNU Lesser General Public
-License](http://www.gnu.org/licenses/lgpl.html) instead of this
+License](https://www.gnu.org/licenses/lgpl.html) instead of this
 License.
\ No newline at end of file
diff --git a/COPYRIGHT b/COPYRIGHT
index a6e3a897c..9752c555d 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -22,11 +22,11 @@ The GNU General Public License is available in the file COPYING.md in
 the source distribution.  On Debian systems, the complete text of the
 GPL can be found in `/usr/share/common-licenses/GPL`.
 
-[GPL]: http://www.gnu.org/copyleft/gpl.html
+[GPL]: https://www.gnu.org/copyleft/gpl.html
 
 Pandoc's complete source code is available from the [Pandoc home page].
 
-[Pandoc home page]: http://pandoc.org
+[Pandoc home page]: https://pandoc.org
 
 Pandoc includes some code with different copyrights, or subject to different
 licenses.  The copyright and license statements for these sources are included
@@ -176,7 +176,7 @@ Released under the GNU General Public License version 2 or later.
 ----------------------------------------------------------------------
 The dzslides template contains JavaScript and CSS from Paul Rouget's
 dzslides template.
-http://github.com/paulrouget/dzslides
+https://github.com/paulrouget/dzslides
 
 Released under the Do What the Fuck You Want To Public License.
 
diff --git a/INSTALL.md b/INSTALL.md
index 721dc5d66..d23c349c1 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -45,7 +45,7 @@ by downloading [this script][uninstaller]
 and running it with `perl uninstall-pandoc.pl`.
 
 Alternatively, you can install pandoc using
-[Homebrew](http://brew.sh):
+[Homebrew](https://brew.sh):
 
      brew install pandoc
 
@@ -116,7 +116,7 @@ package repositories.  For example, on Debian/Ubuntu,
 you can install it with `apt-get install haskell-platform`.
 
 For PDF output, you'll need LaTeX.  We recommend installing
-[TeX Live](http://www.tug.org/texlive/) via your package
+[TeX Live](https://www.tug.org/texlive/) via your package
 manager.  (On Debian/Ubuntu, `apt-get install texlive`.)
 
 ## Chrome OS
@@ -238,7 +238,7 @@ The easiest way to build pandoc from source is to use [stack][stack]:
 
         pandoc --help
 
-    [Not sure where `$CABALDIR` is?](http://www.haskell.org/haskellwiki/Cabal-Install#The_cabal-install_configuration_file)
+    [Not sure where `$CABALDIR` is?](https://wiki.haskell.org/Cabal-Install#The_cabal-install_configuration_file)
 
 5.  By default `pandoc` uses the "i;unicode-casemap" method
     to sort bibliography entries (RFC 5051).  If you would like to
@@ -383,30 +383,29 @@ To run just the markdown benchmarks:
 
 
 [Arch]: https://www.archlinux.org/packages/community/x86_64/pandoc/
-[Cabal User's Guide]: http://www.haskell.org/cabal/release/latest/doc/users-guide/builders.html#setup-configure-paths
+[Cabal User's Guide]: https://cabal.readthedocs.io/
 [Debian]: https://packages.debian.org/pandoc
 [Fedora]: https://apps.fedoraproject.org/packages/pandoc
-[FreeBSD ports]: http://www.freshports.org/textproc/hs-pandoc/
-[GHC]:  http://www.haskell.org/ghc/
-[GPL]:  http://www.gnu.org/copyleft/gpl.html
-[Haskell platform]: http://hackage.haskell.org/platform/
-[MacPorts]: http://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile
+[FreeBSD ports]: https://www.freshports.org/textproc/hs-pandoc/
+[GHC]:  https://www.haskell.org/ghc/
+[Haskell platform]: https://hackage.haskell.org/platform/
+[MacPorts]: https://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile
 [MacTeX]: https://tug.org/mactex/
-[BasicTeX]: http://www.tug.org/mactex/morepackages.html
+[BasicTeX]: https://www.tug.org/mactex/morepackages.html
 [LaTeX]: https://www.latex-project.org
-[MiKTeX]: http://miktex.org/
+[MiKTeX]: https://miktex.org/
 [librsvg]: https://wiki.gnome.org/Projects/LibRsvg
 [Python]: https://www.python.org
-[NetBSD]: http://pkgsrc.se/wip/pandoc
+[NetBSD]: https://pkgsrc.se/wip/pandoc
 [NixOS]: https://nixos.org/nixos/packages.html
 [Slackware]: https://www.slackbuilds.org/result/?search=pandoc&sv=
 [Ubuntu]: https://packages.ubuntu.com/pandoc
 [download page]: https://github.com/jgm/pandoc/releases/latest
-[gentoo]: http://packages.gentoo.org/package/app-text/pandoc
+[gentoo]: https://packages.gentoo.org/package/app-text/pandoc
 [haskell repository]: https://wiki.archlinux.org/index.php/Haskell_Package_Guidelines#.5Bhaskell.5D
 [openSUSE]: https://software.opensuse.org/package/pandoc
-[source tarball]: http://hackage.haskell.org/package/pandoc
+[source tarball]: https://hackage.haskell.org/package/pandoc
 [stack]: https://docs.haskellstack.org/en/stable/install_and_upgrade.html
-[cabal-install]: http://hackage.haskell.org/package/cabal-install
+[cabal-install]: https://hackage.haskell.org/package/cabal-install
 [Void]: https://voidlinux.org/
 [uninstaller]: https://raw.githubusercontent.com/jgm/pandoc/master/macos/uninstall-pandoc.pl
diff --git a/MANUAL.txt b/MANUAL.txt
index ba031bcb2..a9465b20c 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -486,7 +486,7 @@ header when requesting a document from a URL:
 [Emacs Org mode]: https://orgmode.org
 [AsciiDoc]: https://www.methods.co.nz/asciidoc/
 [AsciiDoctor]: https://asciidoctor.org/
-[DZSlides]: http://paulrouget.com/dzslides/
+[DZSlides]: https://paulrouget.com/dzslides/
 [Word docx]: https://en.wikipedia.org/wiki/Office_Open_XML
 [PDF]: https://www.adobe.com/pdf/
 [reveal.js]: https://revealjs.com/
diff --git a/README.template b/README.template
index 53d14b584..4b469f22b 100644
--- a/README.template
+++ b/README.template
@@ -7,9 +7,9 @@ Pandoc
 ======
 
 [![github release](https://img.shields.io/github/release/jgm/pandoc.svg?label=current+release)](https://github.com/jgm/pandoc/releases)
-[![hackage release](https://img.shields.io/hackage/v/pandoc.svg?label=hackage)](http://hackage.haskell.org/package/pandoc)
-[![homebrew](https://img.shields.io/homebrew/v/pandoc.svg)](http://brewformulas.org/Pandoc)
-[![stackage LTS package](http://stackage.org/package/pandoc/badge/lts)](http://stackage.org/lts/package/pandoc)
+[![hackage release](https://img.shields.io/hackage/v/pandoc.svg?label=hackage)](https://hackage.haskell.org/package/pandoc)
+[![homebrew](https://img.shields.io/homebrew/v/pandoc.svg)](https://formulae.brew.sh/formula/pandoc)
+[![stackage LTS package](https://stackage.org/package/pandoc/badge/lts)](https://www.stackage.org/lts/package/pandoc-types)
 [![CI tests](https://github.com/jgm/pandoc/workflows/CI%20tests/badge.svg)](https://github.com/jgm/pandoc/actions)
 [![license](https://img.shields.io/badge/license-GPLv2+-lightgray.svg)](https://www.gnu.org/licenses/gpl.html)
 [![pandoc-discuss on google groups](https://img.shields.io/badge/pandoc-discuss-red.svg?style=social)](https://groups.google.com/forum/#!forum/pandoc-discuss)
@@ -85,5 +85,5 @@ License
 [GPL], version 2 or greater.  This software carries no warranty of
 any kind.  (See COPYRIGHT for full copyright and warranty notices.)
 
-[GPL]: http://www.gnu.org/copyleft/gpl.html "GNU General Public License"
-[Haskell]: http://haskell.org
+[GPL]: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html "GNU General Public License"
+[Haskell]: https://haskell.org
diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index a99dc9008..df55dc44e 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -3478,7 +3478,7 @@ Contract a filename, based on a relative path. Note that the
 resulting path will usually not introduce `..` paths, as the
 presence of symlinks means `../b` may not reach `a/b` if it starts
 from `a/c`. For a worked example see [this blog
-post](http://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html).
+post](https://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html).
 
 Set `unsafe` to a truthy value to a allow `..` in paths.
 
diff --git a/man/pandoc.1 b/man/pandoc.1
index 491e24278..bcda05489 100644
--- a/man/pandoc.1
+++ b/man/pandoc.1
@@ -7495,4 +7495,4 @@ This software carries no warranty of any kind.
 of contributors, see the file AUTHORS.md in the pandoc source code.
 .PP
 The Pandoc source code and all documentation may be downloaded
-from <http://pandoc.org>.
+from <https://pandoc.org>.
diff --git a/man/pandoc.1.after b/man/pandoc.1.after
index e5eabb670..7cd7a93f0 100644
--- a/man/pandoc.1.after
+++ b/man/pandoc.1.after
@@ -1,3 +1,3 @@
 .PP
 The Pandoc source code and all documentation may be downloaded
-from <http://pandoc.org>.
+from <https://pandoc.org>.
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 6658c8c0c..38e992ba1 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -544,7 +544,7 @@ linkifyVariables ref =
   fixShortDOI x = let x' = extractText x
                   in  if "10/" `T.isPrefixOf` x'
                          then TextVal $ T.drop 3 x'
-                              -- see http://shortdoi.org
+                              -- see https://shortdoi.org
                          else TextVal x'
   tolink pref x = let x' = extractText x
                       x'' = if "://" `T.isInfixOf` x'
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 416fe439e..c0752dadc 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -577,10 +577,10 @@ itemToReference locale variant item = do
                  eprint <- getRawField "eprint"
                  let baseUrl =
                        case T.toLower etype of
-                         "arxiv"       -> "http://arxiv.org/abs/"
-                         "jstor"       -> "http://www.jstor.org/stable/"
-                         "pubmed"      -> "http://www.ncbi.nlm.nih.gov/pubmed/"
-                         "googlebooks" -> "http://books.google.com?id="
+                         "arxiv"       -> "https://arxiv.org/abs/"
+                         "jstor"       -> "https://www.jstor.org/stable/"
+                         "pubmed"      -> "https://www.ncbi.nlm.nih.gov/pubmed/"
+                         "googlebooks" -> "https://books.google.com?id="
                          _             -> ""
                  if T.null baseUrl
                     then mzero
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 4991c6308..860da2dc3 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -11,7 +11,7 @@
    Portability : portable
 
 Conversion from Textile to 'Pandoc' document, based on the spec
-available at http://redcloth.org/textile.
+available at https://www.promptworks.com/textile/.
 
 Implemented and parsed:
  - Paragraphs
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 898905603..d33246a63 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -14,7 +14,7 @@
 
 Conversion of 'Pandoc' documents to markdown-formatted plain text.
 
-Markdown:  <http://daringfireball.net/projects/markdown/>
+Markdown:  <https://daringfireball.net/projects/markdown/>
 -}
 module Text.Pandoc.Writers.Markdown (
   writeMarkdown,
diff --git a/test/txt2tags.t2t b/test/txt2tags.t2t
index f736cfa93..b636c1511 100644
--- a/test/txt2tags.t2t
+++ b/test/txt2tags.t2t
@@ -270,7 +270,7 @@ FTP.DOMAIN.COM
 [img.png]
 
 %%% Syntax: Image pointing to a link: [[img] link]
-[[img.png] http://txt2tags.org]
+[[img.png] https://txt2tags.org]
 
 %%% Align: Image position is preserved when inside paragraph
 [img.png] Image at the line beginning.
diff --git a/trypandoc/index.html b/trypandoc/index.html
index 792f522eb..5761153c9 100644
--- a/trypandoc/index.html
+++ b/trypandoc/index.html
@@ -14,7 +14,7 @@
   <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
   <script>
 "use strict";
-(function($) { // http://stackoverflow.com/questions/901115/how-can-i-get-query-string-values
+(function($) { // https://stackoverflow.com/questions/901115/how-can-i-get-query-string-values
 	$.QueryString = (function(a) {
 	    if (a == "") return {};
 	    var b = {};
diff --git a/windows/Makefile b/windows/Makefile
index 354c1b37c..34dda34c2 100644
--- a/windows/Makefile
+++ b/windows/Makefile
@@ -6,5 +6,5 @@ all: $(SIGNED)
 .PHONY: all
 
 pandoc-%.msi: pandoc-%-UNSIGNED.msi
-	osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i http://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $<
+	osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i https://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $<
 
diff --git a/windows/pandoc.wxs b/windows/pandoc.wxs
index d9f6b836e..a582c4de9 100644
--- a/windows/pandoc.wxs
+++ b/windows/pandoc.wxs
@@ -123,8 +123,8 @@
 
 
     <!-- Set properties for add/remove programs -->
-    <Property Id="ARPURLINFOABOUT" Value="http://pandoc.org" />
-    <Property Id="ARPHELPLINK" Value="http://pandoc.org" />
+    <Property Id="ARPURLINFOABOUT" Value="https://pandoc.org" />
+    <Property Id="ARPHELPLINK" Value="https://pandoc.org" />
     <Property Id="ARPNOREPAIR" Value="yes" Secure="yes" />    <!-- Remove repair -->
     <Property Id="ARPNOMODIFY" Value="yes" Secure="yes" />    <!-- Remove modify -->
 
-- 
cgit v1.2.3


From 3327b225a1ef96543f912d200229d08940936528 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 27 Feb 2021 21:35:41 +0100
Subject: Lua: use strict evaluation when retrieving AST value from the stack

Fixes: #6674
---
 src/Text/Pandoc/Lua/Marshaling/AST.hs | 156 +++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 79 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs
index 6485da661..8e12d232c 100644
--- a/src/Text/Pandoc/Lua/Marshaling/AST.hs
+++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs
@@ -1,4 +1,5 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE BangPatterns         #-}
 {-# LANGUAGE LambdaCase           #-}
 {- |
    Module      : Text.Pandoc.Lua.Marshaling.AST
@@ -17,6 +18,7 @@ module Text.Pandoc.Lua.Marshaling.AST
   ) where
 
 import Control.Applicative ((<|>))
+import Control.Monad ((<$!>))
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError)
@@ -32,17 +34,16 @@ instance Pushable Pandoc where
     pushViaConstructor "Pandoc" blocks meta
 
 instance Peekable Pandoc where
-  peek idx = defineHowTo "get Pandoc value" $ do
-    blocks <- LuaUtil.rawField idx "blocks"
-    meta   <- LuaUtil.rawField idx "meta"
-    return $ Pandoc meta blocks
+  peek idx = defineHowTo "get Pandoc value" $! Pandoc
+    <$!> LuaUtil.rawField idx "meta"
+    <*>  LuaUtil.rawField idx "blocks"
 
 instance Pushable Meta where
   push (Meta mmap) =
     pushViaConstructor "Meta" mmap
 instance Peekable Meta where
-  peek idx = defineHowTo "get Meta value" $
-    Meta <$> Lua.peek idx
+  peek idx = defineHowTo "get Meta value" $!
+    Meta <$!> Lua.peek idx
 
 instance Pushable MetaValue where
   push = pushMetaValue
@@ -68,14 +69,13 @@ instance Pushable Citation where
     pushViaConstructor "Citation" cid mode prefix suffix noteNum hash
 
 instance Peekable Citation where
-  peek idx = do
-    id' <- LuaUtil.rawField idx "id"
-    prefix <- LuaUtil.rawField idx "prefix"
-    suffix <- LuaUtil.rawField idx "suffix"
-    mode <- LuaUtil.rawField idx "mode"
-    num <- LuaUtil.rawField idx "note_num"
-    hash <- LuaUtil.rawField idx "hash"
-    return $ Citation id' prefix suffix mode num hash
+  peek idx = Citation
+    <$!> LuaUtil.rawField idx "id"
+    <*> LuaUtil.rawField idx "prefix"
+    <*> LuaUtil.rawField idx "suffix"
+    <*> LuaUtil.rawField idx "mode"
+    <*> LuaUtil.rawField idx "note_num"
+    <*> LuaUtil.rawField idx "hash"
 
 instance Pushable Alignment where
   push = Lua.push . show
@@ -90,7 +90,7 @@ instance Peekable CitationMode where
 instance Pushable Format where
   push (Format f) = Lua.push f
 instance Peekable Format where
-  peek idx = Format <$> Lua.peek idx
+  peek idx = Format <$!> Lua.peek idx
 
 instance Pushable ListNumberDelim where
   push = Lua.push . show
@@ -130,26 +130,26 @@ peekMetaValue idx = defineHowTo "get MetaValue" $ do
       elementContent = Lua.peek idx
   luatype <- Lua.ltype idx
   case luatype of
-    Lua.TypeBoolean -> MetaBool <$> Lua.peek idx
-    Lua.TypeString  -> MetaString <$> Lua.peek idx
+    Lua.TypeBoolean -> MetaBool <$!> Lua.peek idx
+    Lua.TypeString  -> MetaString <$!> Lua.peek idx
     Lua.TypeTable   -> do
       tag <- try $ LuaUtil.getTag idx
       case tag of
-        Right "MetaBlocks"  -> MetaBlocks  <$> elementContent
-        Right "MetaBool"    -> MetaBool    <$> elementContent
-        Right "MetaMap"     -> MetaMap     <$> elementContent
-        Right "MetaInlines" -> MetaInlines <$> elementContent
-        Right "MetaList"    -> MetaList    <$> elementContent
-        Right "MetaString"  -> MetaString  <$> elementContent
+        Right "MetaBlocks"  -> MetaBlocks  <$!> elementContent
+        Right "MetaBool"    -> MetaBool    <$!> elementContent
+        Right "MetaMap"     -> MetaMap     <$!> elementContent
+        Right "MetaInlines" -> MetaInlines <$!> elementContent
+        Right "MetaList"    -> MetaList    <$!> elementContent
+        Right "MetaString"  -> MetaString  <$!> elementContent
         Right t             -> Lua.throwMessage ("Unknown meta tag: " <> t)
         Left _ -> do
           -- no meta value tag given, try to guess.
           len <- Lua.rawlen idx
           if len <= 0
-            then MetaMap <$> Lua.peek idx
-            else  (MetaInlines <$> Lua.peek idx)
-                  <|> (MetaBlocks <$> Lua.peek idx)
-                  <|> (MetaList <$> Lua.peek idx)
+            then MetaMap <$!> Lua.peek idx
+            else  (MetaInlines <$!> Lua.peek idx)
+                  <|> (MetaBlocks <$!> Lua.peek idx)
+                  <|> (MetaList <$!> Lua.peek idx)
     _        -> Lua.throwMessage "could not get meta value"
 
 -- | Push a block element to the top of the Lua stack.
@@ -174,25 +174,25 @@ pushBlock = \case
 
 -- | Return the value at the given index as block if possible.
 peekBlock :: StackIndex -> Lua Block
-peekBlock idx = defineHowTo "get Block value" $ do
+peekBlock idx = defineHowTo "get Block value" $! do
   tag <- LuaUtil.getTag idx
   case tag of
-      "BlockQuote"     -> BlockQuote <$> elementContent
-      "BulletList"     -> BulletList <$> elementContent
-      "CodeBlock"      -> withAttr CodeBlock <$> elementContent
-      "DefinitionList" -> DefinitionList <$> elementContent
-      "Div"            -> withAttr Div <$> elementContent
+      "BlockQuote"     -> BlockQuote <$!> elementContent
+      "BulletList"     -> BulletList <$!> elementContent
+      "CodeBlock"      -> withAttr CodeBlock <$!> elementContent
+      "DefinitionList" -> DefinitionList <$!> elementContent
+      "Div"            -> withAttr Div <$!> elementContent
       "Header"         -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst)
-                          <$> elementContent
+                          <$!> elementContent
       "HorizontalRule" -> return HorizontalRule
-      "LineBlock"      -> LineBlock <$> elementContent
+      "LineBlock"      -> LineBlock <$!> elementContent
       "OrderedList"    -> (\(LuaListAttributes lstAttr, lst) ->
                              OrderedList lstAttr lst)
-                          <$> elementContent
+                          <$!> elementContent
       "Null"           -> return Null
-      "Para"           -> Para <$> elementContent
-      "Plain"          -> Plain <$> elementContent
-      "RawBlock"       -> uncurry RawBlock <$> elementContent
+      "Para"           -> Para <$!> elementContent
+      "Plain"          -> Plain <$!> elementContent
+      "RawBlock"       -> uncurry RawBlock <$!> elementContent
       "Table"          -> (\(attr, capt, colSpecs, thead, tbodies, tfoot) ->
                               Table (fromLuaAttr attr)
                                     capt
@@ -200,7 +200,7 @@ peekBlock idx = defineHowTo "get Block value" $ do
                                     thead
                                     tbodies
                                     tfoot)
-                          <$> elementContent
+                          <$!> elementContent
       _ -> Lua.throwMessage ("Unknown block type: " <> tag)
  where
    -- Get the contents of an AST element.
@@ -222,15 +222,14 @@ pushCaption (Caption shortCaption longCaption) = do
 
 -- | Peek Caption element
 peekCaption :: StackIndex -> Lua Caption
-peekCaption idx = do
-  short <- Lua.fromOptional <$> LuaUtil.rawField idx "short"
-  long  <- LuaUtil.rawField idx "long"
-  return $ Caption short long
+peekCaption idx = Caption
+  <$!> (Lua.fromOptional <$!> LuaUtil.rawField idx "short")
+  <*>  LuaUtil.rawField idx "long"
 
 instance Peekable ColWidth where
   peek idx = do
-    width <- Lua.fromOptional <$> Lua.peek idx
-    return $ maybe ColWidthDefault ColWidth width
+    width <- Lua.fromOptional <$!> Lua.peek idx
+    return $! maybe ColWidthDefault ColWidth width
 
 instance Pushable ColWidth where
   push = \case
@@ -252,12 +251,11 @@ instance Pushable TableBody where
     LuaUtil.addField "body" body
 
 instance Peekable TableBody where
-  peek idx = do
-    attr <- LuaUtil.rawField idx "attr"
-    rowHeadColumns <- LuaUtil.rawField idx "row_head_columns"
-    head' <- LuaUtil.rawField idx "head"
-    body <- LuaUtil.rawField idx "body"
-    return $ TableBody attr (RowHeadColumns rowHeadColumns) head' body
+  peek idx = TableBody
+    <$!> LuaUtil.rawField idx "attr"
+    <*>  (RowHeadColumns <$!> LuaUtil.rawField idx "row_head_columns")
+    <*>  LuaUtil.rawField idx "head"
+    <*>  LuaUtil.rawField idx "body"
 
 instance Pushable TableHead where
   push (TableHead attr rows) = Lua.push (attr, rows)
@@ -287,13 +285,12 @@ pushCell (Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents) = do
   LuaUtil.addField "contents" contents
 
 peekCell :: StackIndex -> Lua Cell
-peekCell idx = do
-  attr <- fromLuaAttr <$> LuaUtil.rawField idx "attr"
-  align <- LuaUtil.rawField idx "alignment"
-  rowSpan <- LuaUtil.rawField idx "row_span"
-  colSpan <- LuaUtil.rawField idx "col_span"
-  contents <- LuaUtil.rawField idx "contents"
-  return $ Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents
+peekCell idx = Cell
+  <$!> (fromLuaAttr <$!> LuaUtil.rawField idx "attr")
+  <*>  LuaUtil.rawField idx "alignment"
+  <*>  (RowSpan <$!> LuaUtil.rawField idx "row_span")
+  <*>  (ColSpan <$!> LuaUtil.rawField idx "col_span")
+  <*>  LuaUtil.rawField idx "contents"
 
 -- | Push an inline element to the top of the lua stack.
 pushInline :: Inline -> Lua ()
@@ -324,28 +321,29 @@ peekInline :: StackIndex -> Lua Inline
 peekInline idx = defineHowTo "get Inline value" $ do
   tag <- LuaUtil.getTag idx
   case tag of
-    "Cite"       -> uncurry Cite <$> elementContent
-    "Code"       -> withAttr Code <$> elementContent
-    "Emph"       -> Emph <$> elementContent
-    "Underline"  -> Underline <$> elementContent
-    "Image"      -> (\(LuaAttr attr, lst, tgt) -> Image attr lst tgt)
-                    <$> elementContent
-    "Link"       -> (\(LuaAttr attr, lst, tgt) -> Link attr lst tgt)
-                    <$> elementContent
+    "Cite"       -> uncurry Cite <$!> elementContent
+    "Code"       -> withAttr Code <$!> elementContent
+    "Emph"       -> Emph <$!> elementContent
+    "Underline"  -> Underline <$!> elementContent
+    "Image"      -> (\(LuaAttr !attr, !lst, !tgt) -> Image attr lst tgt)
+                    <$!> elementContent
+    "Link"       -> (\(LuaAttr !attr, !lst, !tgt) -> Link attr lst tgt)
+                    <$!> elementContent
     "LineBreak"  -> return LineBreak
-    "Note"       -> Note <$> elementContent
-    "Math"       -> uncurry Math <$> elementContent
-    "Quoted"     -> uncurry Quoted <$> elementContent
-    "RawInline"  -> uncurry RawInline <$> elementContent
-    "SmallCaps"  -> SmallCaps <$> elementContent
+    "Note"       -> Note <$!> elementContent
+    "Math"       -> uncurry Math <$!> elementContent
+    "Quoted"     -> uncurry Quoted <$!> elementContent
+    "RawInline"  -> uncurry RawInline <$!> elementContent
+    "SmallCaps"  -> SmallCaps <$!> elementContent
     "SoftBreak"  -> return SoftBreak
     "Space"      -> return Space
-    "Span"       -> withAttr Span <$> elementContent
-    "Str"        -> Str <$> elementContent
-    "Strikeout"  -> Strikeout <$> elementContent
-    "Strong"     -> Strong <$> elementContent
-    "Subscript"  -> Subscript <$> elementContent
-    "Superscript"-> Superscript <$> elementContent
+    "Span"       -> withAttr Span <$!> elementContent
+    -- strict to Lua string is copied before gc
+    "Str"        -> Str <$!> elementContent
+    "Strikeout"  -> Strikeout <$!> elementContent
+    "Strong"     -> Strong <$!> elementContent
+    "Subscript"  -> Subscript <$!> elementContent
+    "Superscript"-> Superscript <$!> elementContent
     _ -> Lua.throwMessage ("Unknown inline type: " <> tag)
  where
    -- Get the contents of an AST element.
@@ -366,7 +364,7 @@ instance Pushable LuaAttr where
     pushViaConstructor "Attr" id' classes kv
 
 instance Peekable LuaAttr where
-  peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx)
+  peek idx = defineHowTo "get Attr value" $! (LuaAttr <$!> Lua.peek idx)
 
 -- | Wrapper for ListAttributes
 newtype LuaListAttributes = LuaListAttributes  ListAttributes
-- 
cgit v1.2.3


From 925815bb33b462e1a4c19a8e2c617d403dec0ce7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 17:02:44 -0800
Subject: Split off T.P.Readers.LaTeX.Accent.

To help reduce memory demands compiling the main LaTeX reader.
---
 pandoc.cabal                            |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs        | 68 ++++------------------------
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 +++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 60 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 83c5c0120..68edb2b64 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -630,6 +630,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Parsing,
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
+                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7d8dfab0e..51c031f78 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
 import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -64,7 +65,6 @@ import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
 import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
 import Safe
 
 -- for debugging:
@@ -247,9 +247,6 @@ doxspace =
         startsWithLetter _ = False
 
 
-lit :: Text -> LP m Inlines
-lit = pure . str
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,6 +293,9 @@ quoted' f starter ender = do
                               cs   -> cs)
      else lit startchs
 
+lit :: Text -> LP m Inlines
+lit = pure . str
+
 enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
 enquote starred mblang = do
   skipopts
@@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [
   ]
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
+inlineCommands =
+  M.union inlineLanguageCommands $
+  M.union (accentCommands tok) $
+  M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
   , ("textsl", extractSpaces emph <$> tok)
@@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("MakeTextLowercase", makeLowercase <$> tok)
   , ("lowercase", makeLowercase <$> tok)
   , ("/", pure mempty) -- italic correction
-  , ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
                             optional opt
@@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
   , ("hyphen", pure (str "-"))
   ]
 
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
-
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
   optional opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
new file mode 100644
index 000000000..f8c53491c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
@@ -0,0 +1,78 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Accent
+  ( accentCommands )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Parsing
+import qualified Data.Text as T
+import qualified Data.Text.Normalize as Normalize
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+      lit = pure . str
+  in M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
+
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+       _                 -> return ils
+
-- 
cgit v1.2.3


From 08231f5cdd16e31d38d9d6bf59bc5ca12638b438 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Feb 2021 21:40:56 -0800
Subject: Factor out T.P.Readers.LaTeX.Table.

---
 pandoc.cabal                             |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 368 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  33 +++
 src/Text/Pandoc/Readers/LaTeX/Table.hs   | 373 +++++++++++++++++++++++++++++++
 4 files changed, 412 insertions(+), 363 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Table.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 68edb2b64..9149c4f8f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 51c031f78..831c5df05 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE BangPatterns          #-}
 {-# LANGUAGE CPP                   #-}
 {-# LANGUAGE FlexibleInstances     #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -32,7 +31,6 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isDigit, isLetter, toUpper, chr)
 import Data.Default
-import Data.Functor (($>))
 import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
@@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
 import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -551,12 +550,8 @@ inlineCommand' = try $ do
          <|> ignore rawcommand
   lookupListDefault raw names inlineCommands
 
-
 tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
-  where singleChar' = do
-          Tok _ _ t <- singleChar
-          return $ str t
+tok = tokWith inline
 
 opt :: PandocMonad m => LP m Inlines
 opt = do
@@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-label :: PandocMonad m => LP m ()
-label = do
-  controlSeq "label"
-  t <- braced
-  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -1421,13 +1410,6 @@ bracketedNum = do
        Just i -> return i
        _      -> return 0
 
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
-  skipopts
-  ils <- tok
-  optional $ try $ spaces *> label
-  updateState $ \st -> st{ sCaption = Just ils }
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
@@ -1441,10 +1423,6 @@ epigraph = do
   p2 <- grouped block
   return $ divWith ("", ["epigraph"], []) (p1 <> p2)
 
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption   = Nothing
-                                      , sLastLabel = Nothing }
-
 section :: PandocMonad m => Attr -> Int -> LP m Blocks
 section (ident, classes, kvs) lvl = do
   skipopts
@@ -1585,7 +1563,7 @@ blockCommands = M.fromList
    , ("item", looseItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
-   , ("caption", mempty <$ setCaption)
+   , ("caption", mempty <$ setCaption inline)
    , ("bibliography", mempty <$ (skipopts *> braced >>=
          addMeta "bibliography" . splitBibs . untokenize))
    , ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1623,7 +1601,8 @@ blockCommands = M.fromList
 
 
 environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+   M.fromList
    [ ("document", env "document" blocks <* skipMany anyTok)
    , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
    , ("sloppypar", env "sloppypar" blocks)
@@ -1633,13 +1612,6 @@ environments = M.fromList
    , ("figure", env "figure" $ skipopts *> figure)
    , ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
    , ("center", divWith ("", ["center"], []) <$> env "center" blocks)
-   , ("longtable",  env "longtable" $
-          resetCaption *> simpTable "longtable" False >>= addTableCaption)
-   , ("table",  env "table" $
-          skipopts *> resetCaption *> blocks >>= addTableCaption)
-   , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
-   , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
-   , ("tabular", env "tabular"  $ simpTable "tabular" False)
    , ("quote", blockQuote <$> env "quote" blocks)
    , ("quotation", blockQuote <$> env "quotation" blocks)
    , ("verse", blockQuote <$> env "verse" blocks)
@@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils]
 italicize (Plain ils) = Plain [Emph ils]
 italicize x = x
 
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
@@ -2045,333 +2014,6 @@ orderedList' = try $ do
   bs <- listenv "enumerate" (many item)
   return $ orderedListWith (start, style, delim) bs
 
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
-  spaces
-  controlSeq "hline" <|>
-    -- booktabs rules:
-    controlSeq "toprule" <|>
-    controlSeq "bottomrule" <|>
-    controlSeq "midrule" <|>
-    controlSeq "endhead" <|>
-    controlSeq "endfirsthead"
-  spaces
-  optional opt
-  return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
-         <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
-  inp <- getInput
-  case inp of
-       (Tok spos Word t : rest) ->
-         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
-       _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
-  let maybeBar = skipMany
-        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
-  let cAlign = AlignCenter <$ symbol 'c'
-  let lAlign = AlignLeft <$ symbol 'l'
-  let rAlign = AlignRight <$ symbol 'r'
-  let parAlign = AlignLeft <$ symbol 'p'
-  -- aligns from tabularx
-  let xAlign = AlignLeft <$ symbol 'X'
-  let mAlign = AlignLeft <$ symbol 'm'
-  let bAlign = AlignLeft <$ symbol 'b'
-  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
-                                 <|> xAlign <|> mAlign <|> bAlign )
-  let alignPrefix = symbol '>' >> braced
-  let alignSuffix = symbol '<' >> braced
-  let colWidth = try $ do
-        symbol '{'
-        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
-        spaces
-        symbol '}'
-        return $ safeRead ds
-  let alignSpec = do
-        pref <- option [] alignPrefix
-        spaces
-        al <- alignChar
-        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
-                                                 pos <- getPosition
-                                                 report $ SkippedContent s pos
-                                                 return Nothing)
-        spaces
-        suff <- option [] alignSuffix
-        return (al, width, (pref, suff))
-  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
-        symbol '*'
-        spaces
-        ds <- trim . untokenize <$> braced
-        spaces
-        spec <- braced
-        case safeRead ds of
-             Just n  ->
-               getInput >>= setInput . (mconcat (replicate n spec) ++)
-             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
-  bgroup
-  spaces
-  maybeBar
-  aligns' <- many $ try $ spaces >> optional starAlign >>
-                            (alignSpec <* maybeBar)
-  spaces
-  egroup
-  spaces
-  return $ map toSpec aligns'
-  where
-    toColWidth (Just w) | w > 0 = ColWidth w
-    toColWidth _                = ColWidthDefault
-    toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
-              => Text   -- ^ table environment name
-              -> [([Tok], [Tok])] -- ^ pref/suffixes
-              -> LP m Row
-parseTableRow envname prefsufs = do
-  notFollowedBy (spaces *> end_ envname)
-  -- add prefixes and suffixes in token stream:
-  let celltoks (pref, suff) = do
-        prefpos <- getPosition
-        contents <- mconcat <$>
-            many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
-                  <|>
-                   snd <$> withRaw (inlineEnvironment <|> dollarsMath)
-                  <|>
-                   (do notFollowedBy
-                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
-                       count 1 anyTok) )
-
-        suffpos <- getPosition
-        option [] (count 1 amp)
-        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
-  rawcells <- mapM celltoks prefsufs
-  cells <- mapM (parseFromToks parseTableCell) rawcells
-  spaces
-  return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
-  spaces
-  updateState $ \st -> st{ sInTableCell = True }
-  cell' <-   multicolumnCell
-         <|> multirowCell
-         <|> parseSimpleCell
-         <|> parseEmptyCell
-  updateState $ \st -> st{ sInTableCell = False }
-  spaces
-  return cell'
-  where
-    -- The parsing of empty cells is important in LaTeX, especially when dealing
-    -- with multirow/multicolumn. See #6603.
-    parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
-  where
-    alignment = do
-      c <- untoken <$> singleChar
-      return $ case c of
-        "l" -> AlignLeft
-        "r" -> AlignRight
-        "c" -> AlignCenter
-        "*" -> AlignDefault
-        _   -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
-                [Para ils] -> plain (fromList ils)
-                _          -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
-  -- Full prototype for \multirow macro is:
-  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-  -- However, everything except `nrows` and `text` make
-  -- sense in the context of the Pandoc AST
-  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
-  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
-  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
-  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
-  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
-  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
-  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
-  alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
-  let singleCell = do
-        content <- plainify <$> blocks
-        return $ cell alignment (RowSpan 1) (ColSpan span') content
-
-  -- Two possible contents: either a \multirow cell, or content.
-  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-  -- Note that a \multirow cell can be nested in a \multicolumn,
-  -- but not the other way around. See #6603
-  let nestedCell = do
-        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
-        return $ cell
-                  alignment
-                  (RowSpan rs)
-                  (ColSpan span')
-                  (fromList bs)
-
-  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
-  where
-    rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
-  = TableBody attr rhc th' tb'
-  where
-    th' = fixTableRows th
-    tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
-  where
-    fixTableRows' oldHang (Row attr cells : rs)
-      = let (newHang, cells') = fixTableRow oldHang cells
-            rs'               = fixTableRows' newHang rs
-        in Row attr cells' : rs'
-    fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
-  -- If there's overhang, drop cells until their total width meets the
-  -- width of the occupied grid spaces (or we run out)
-  | (n, prefHang, restHang) <- splitHang oldHang
-  , n > 0
-  = let cells' = dropToWidth getCellW n cells
-        (restHang', cells'') = fixTableRow restHang cells'
-    in (prefHang restHang', cells'')
-  -- Otherwise record the overhang of a pending cell and fix the rest
-  -- of the row
-  | c@(Cell _ _ h w _):cells' <- cells
-  = let h' = max 1 h
-        w' = max 1 w
-        oldHang' = dropToWidth getHangW w' oldHang
-        (newHang, cells'') = fixTableRow oldHang' cells'
-    in (toHang w' h' <> newHang, c : cells'')
-  | otherwise
-  = (oldHang, [])
-  where
-    getCellW (Cell _ _ _ w _) = w
-    getHangW = maybe 1 fst
-    getCS (ColSpan n) = n
-
-    toHang c r
-      | r > 1     = [Just (c, r)]
-      | otherwise = replicate (getCS c) Nothing
-
-    -- Take the prefix of the overhang list representing filled grid
-    -- spaces. Also return the remainder and the length of this prefix.
-    splitHang = splitHang' 0 id
-
-    splitHang' !n l (Just (c, r):xs)
-      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
-    splitHang' n l xs = (n, l, xs)
-
-    -- Drop list items until the total width of the dropped items
-    -- exceeds the passed width.
-    dropToWidth _     n l | n < 1 = l
-    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
-    dropToWidth _     _ []        = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
-  when hasWidthParameter $ () <$ (spaces >> tok)
-  skipopts
-  colspecs <- parseAligns
-  let (aligns, widths, prefsufs) = unzip3 colspecs
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  skipMany hline
-  spaces
-  header' <- option [] . try . fmap (:[]) $
-             parseTableRow envname prefsufs <* lbreak <* many1 hline
-  spaces
-  rows <- sepEndBy (parseTableRow envname prefsufs)
-                    (lbreak <* optional (skipMany hline))
-  spaces
-  optional $ controlSeq "caption" *> setCaption
-  spaces
-  optional label
-  spaces
-  optional lbreak
-  spaces
-  lookAhead $ controlSeq "end" -- make sure we're at end
-  let th  = fixTableHead $ TableHead nullAttr header'
-  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
-  let tf  = TableFoot nullAttr []
-  return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
-  where go (Table attr c spec th tb tf) = do
-          st <- getState
-          let mblabel = sLastLabel st
-          capt <- case (sCaption st, mblabel) of
-                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
-                   (Just ils, Just lab) -> do
-                     num <- getNextNumber sLastTableNum
-                     setState
-                       st{ sLastTableNum = num
-                         , sLabels = M.insert lab
-                                    [Str (renderDottedNum num)]
-                                    (sLabels st) }
-                     return $ caption Nothing (plain ils) -- add number??
-                   (Nothing, _)  -> return c
-          let attr' = case (attr, mblabel) of
-                        ((_,classes,kvs), Just ident) ->
-                           (ident,classes,kvs)
-                        _ -> attr
-          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
-        go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b       = Div attr [b]
-
 block :: PandocMonad m => LP m Blocks
 block = do
   res <- (mempty <$ spaces1)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index c2e10570d..4a9fa03ad 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , comment
   , anyTok
   , singleChar
+  , tokWith
   , specialChars
   , endline
   , blankline
@@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawopt
   , overlaySpecification
   , getNextNumber
+  , label
+  , setCaption
+  , resetCaption
+  , env
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do
                Just n  -> [n, 1]
                Nothing -> [1]
 
+label :: PandocMonad m => LP m ()
+label = do
+  controlSeq "label"
+  t <- braced
+  updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+  skipopts
+  ils <- tokWith inline
+  optional $ try $ spaces *> label
+  updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption   = Nothing
+                                      , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+                                 grouped inlineParser
+                            <|> (lookAhead anyControlSeq >> inlineParser)
+                            <|> singleChar'
+  where singleChar' = do
+          Tok _ _ t <- singleChar
+          return $ str t
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..2ea9caf58
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,373 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+  ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+                  => LP m Blocks
+                  -> LP m Inlines
+                  -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+  M.fromList
+  [ ("longtable",  env "longtable" $
+          resetCaption *>
+            simpTable blocks inline "longtable" False >>= addTableCaption)
+  , ("table",  env "table" $
+          skipopts *> resetCaption *> blocks >>= addTableCaption)
+  , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+  , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+  , ("tabular", env "tabular"  $ simpTable blocks inline "tabular" False)
+  ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+  spaces
+  controlSeq "hline" <|>
+    -- booktabs rules:
+    controlSeq "toprule" <|>
+    controlSeq "bottomrule" <|>
+    controlSeq "midrule" <|>
+    controlSeq "endhead" <|>
+    controlSeq "endfirsthead"
+  spaces
+  optional rawopt
+  return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+         <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+  inp <- getInput
+  case inp of
+       (Tok spos Word t : rest) ->
+         setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+       _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+  let maybeBar = skipMany
+        (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+  let cAlign = AlignCenter <$ symbol 'c'
+  let lAlign = AlignLeft <$ symbol 'l'
+  let rAlign = AlignRight <$ symbol 'r'
+  let parAlign = AlignLeft <$ symbol 'p'
+  -- aligns from tabularx
+  let xAlign = AlignLeft <$ symbol 'X'
+  let mAlign = AlignLeft <$ symbol 'm'
+  let bAlign = AlignLeft <$ symbol 'b'
+  let alignChar = splitWordTok *> (  cAlign <|> lAlign <|> rAlign <|> parAlign
+                                 <|> xAlign <|> mAlign <|> bAlign )
+  let alignPrefix = symbol '>' >> braced
+  let alignSuffix = symbol '<' >> braced
+  let colWidth = try $ do
+        symbol '{'
+        ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+        spaces
+        symbol '}'
+        return $ safeRead ds
+  let alignSpec = do
+        pref <- option [] alignPrefix
+        spaces
+        al <- alignChar
+        width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+                                                 pos <- getPosition
+                                                 report $ SkippedContent s pos
+                                                 return Nothing)
+        spaces
+        suff <- option [] alignSuffix
+        return (al, width, (pref, suff))
+  let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+        symbol '*'
+        spaces
+        ds <- trim . untokenize <$> braced
+        spaces
+        spec <- braced
+        case safeRead ds of
+             Just n  ->
+               getInput >>= setInput . (mconcat (replicate n spec) ++)
+             Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+  bgroup
+  spaces
+  maybeBar
+  aligns' <- many $ try $ spaces >> optional starAlign >>
+                            (alignSpec <* maybeBar)
+  spaces
+  egroup
+  spaces
+  return $ map toSpec aligns'
+  where
+    toColWidth (Just w) | w > 0 = ColWidth w
+    toColWidth _                = ColWidthDefault
+    toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+              => LP m Blocks -- ^ block parser
+              -> LP m Inlines -- ^ inline parser
+              -> Text   -- ^ table environment name
+              -> [([Tok], [Tok])] -- ^ pref/suffixes
+              -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+  notFollowedBy (spaces *> end_ envname)
+  -- add prefixes and suffixes in token stream:
+  let celltoks (pref, suff) = do
+        prefpos <- getPosition
+        contents <- mconcat <$>
+            many ( snd <$> withRaw
+                     ((lookAhead (controlSeq "parbox") >>
+                       void blocks) -- #5711
+                      <|>
+                      (lookAhead (controlSeq "begin") >> void inline)
+                      <|>
+                      (lookAhead (symbol '$') >> void inline))
+                  <|>
+                   (do notFollowedBy
+                         (() <$ amp <|> () <$ lbreak <|> end_ envname)
+                       count 1 anyTok) )
+
+        suffpos <- getPosition
+        option [] (count 1 amp)
+        return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+  rawcells <- mapM celltoks prefsufs
+  cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+  spaces
+  return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+  spaces
+  updateState $ \st -> st{ sInTableCell = True }
+  cell' <-   multicolumnCell blocks
+         <|> multirowCell blocks
+         <|> parseSimpleCell
+         <|> parseEmptyCell
+  updateState $ \st -> st{ sInTableCell = False }
+  spaces
+  return cell'
+  where
+    -- The parsing of empty cells is important in LaTeX, especially when dealing
+    -- with multirow/multicolumn. See #6603.
+    parseEmptyCell = spaces $> emptyCell
+    parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+  where
+    alignment = do
+      c <- untoken <$> singleChar
+      return $ case c of
+        "l" -> AlignLeft
+        "r" -> AlignRight
+        "c" -> AlignCenter
+        "*" -> AlignDefault
+        _   -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+                [Para ils] -> plain (fromList ils)
+                _          -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+  -- Full prototype for \multirow macro is:
+  --     \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+  -- However, everything except `nrows` and `text` make
+  -- sense in the context of the Pandoc AST
+  _ <- optional $ symbol '[' *> cellAlignment <* symbol ']'   -- vertical position
+  nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- bigstrut-related
+  _ <- symbol '{' *> manyTill anyTok (symbol '}')             -- Cell width
+  _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']')  -- Length used for fine-tuning
+  content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+  return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+  span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+  alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+  let singleCell = do
+        content <- plainify <$> blocks
+        return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+  -- Two possible contents: either a \multirow cell, or content.
+  -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+  -- Note that a \multirow cell can be nested in a \multicolumn,
+  -- but not the other way around. See #6603
+  let nestedCell = do
+        (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+        return $ cell
+                  alignment
+                  (RowSpan rs)
+                  (ColSpan span')
+                  (fromList bs)
+
+  symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+  where
+    rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+  = TableBody attr rhc th' tb'
+  where
+    th' = fixTableRows th
+    tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+  where
+    fixTableRows' oldHang (Row attr cells : rs)
+      = let (newHang, cells') = fixTableRow oldHang cells
+            rs'               = fixTableRows' newHang rs
+        in Row attr cells' : rs'
+    fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+  -- If there's overhang, drop cells until their total width meets the
+  -- width of the occupied grid spaces (or we run out)
+  | (n, prefHang, restHang) <- splitHang oldHang
+  , n > 0
+  = let cells' = dropToWidth getCellW n cells
+        (restHang', cells'') = fixTableRow restHang cells'
+    in (prefHang restHang', cells'')
+  -- Otherwise record the overhang of a pending cell and fix the rest
+  -- of the row
+  | c@(Cell _ _ h w _):cells' <- cells
+  = let h' = max 1 h
+        w' = max 1 w
+        oldHang' = dropToWidth getHangW w' oldHang
+        (newHang, cells'') = fixTableRow oldHang' cells'
+    in (toHang w' h' <> newHang, c : cells'')
+  | otherwise
+  = (oldHang, [])
+  where
+    getCellW (Cell _ _ _ w _) = w
+    getHangW = maybe 1 fst
+    getCS (ColSpan n) = n
+
+    toHang c r
+      | r > 1     = [Just (c, r)]
+      | otherwise = replicate (getCS c) Nothing
+
+    -- Take the prefix of the overhang list representing filled grid
+    -- spaces. Also return the remainder and the length of this prefix.
+    splitHang = splitHang' 0 id
+
+    splitHang' !n l (Just (c, r):xs)
+      = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+    splitHang' n l xs = (n, l, xs)
+
+    -- Drop list items until the total width of the dropped items
+    -- exceeds the passed width.
+    dropToWidth _     n l | n < 1 = l
+    dropToWidth wproj n (c:cs)    = dropToWidth wproj (n - wproj c) cs
+    dropToWidth _     _ []        = []
+
+simpTable :: PandocMonad m
+          => LP m Blocks
+          -> LP m Inlines
+          -> Text
+          -> Bool
+          -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+  when hasWidthParameter $ () <$ tokWith inline
+  skipopts
+  colspecs <- parseAligns
+  let (aligns, widths, prefsufs) = unzip3 colspecs
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  skipMany hline
+  spaces
+  header' <- option [] . try . fmap (:[]) $
+             parseTableRow blocks inline envname prefsufs <*
+               lbreak <* many1 hline
+  spaces
+  rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+                    (lbreak <* optional (skipMany hline))
+  spaces
+  optional $ controlSeq "caption" *> setCaption inline
+  spaces
+  optional label
+  spaces
+  optional lbreak
+  spaces
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  let th  = fixTableHead $ TableHead nullAttr header'
+  let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+  let tf  = TableFoot nullAttr []
+  return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+  where go (Table attr c spec th tb tf) = do
+          st <- getState
+          let mblabel = sLastLabel st
+          capt <- case (sCaption st, mblabel) of
+                   (Just ils, Nothing)  -> return $ caption Nothing (plain ils)
+                   (Just ils, Just lab) -> do
+                     num <- getNextNumber sLastTableNum
+                     setState
+                       st{ sLastTableNum = num
+                         , sLabels = M.insert lab
+                                    [Str (renderDottedNum num)]
+                                    (sLabels st) }
+                     return $ caption Nothing (plain ils) -- add number??
+                   (Nothing, _)  -> return c
+          let attr' = case (attr, mblabel) of
+                        ((_,classes,kvs), Just ident) ->
+                           (ident,classes,kvs)
+                        _ -> attr
+          return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+        go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b       = Div attr [b]
-- 
cgit v1.2.3


From 2faa57e8e96d9905676e30f62d34c06b074acf76 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:12:09 -0800
Subject: Factor out T.P.Readers.LaTeX.Citation.

---
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs          | 202 +++-------------------------
 src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 ++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |   5 +
 4 files changed, 232 insertions(+), 186 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Citation.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 9149c4f8f..567b650a1 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 831c5df05..2d1b83486 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -56,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47)
@@ -169,7 +170,7 @@ rawLaTeXInline = do
   let toks = tokenize "source" inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
-              (mempty <$ (controlSeq "input" >> skipMany opt >> braced))
+              (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
               inlines
           <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
               inlines
@@ -311,7 +312,7 @@ blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
                 then (\xs -> para (cite xs mempty))
-                       <$> cites NormalCitation False
+                       <$> cites inline NormalCitation False
                 else option mempty $ para <$> bracketed inline
   let lang = mblang >>= babelLangToBCP47
   let langdiv = case lang of
@@ -425,116 +426,6 @@ pDollarsMath n = do
                 else mzero
        _ -> (tk :) <$> pDollarsMath n
 
--- citations
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _      = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) =
-  let k = last ks
-  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-simpleCiteArgs :: PandocMonad m => LP m [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ toList <$> opt
-  second <- optionMaybe $ toList <$> opt
-  keys <- try $ bgroup *> manyTill citationLabel egroup
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> (mempty, s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> (mempty, mempty)
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-citationLabel :: PandocMonad m => LP m Text
-citationLabel  = do
-  sp
-  untokenize <$>
-    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
-          <* sp
-          <* optional (symbol ',')
-          <* sp)
-  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
-
-cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation]
-cites mode multi = try $ do
-  cits <- if multi
-             then do
-               multiprenote <- optionMaybe $ toList <$> paropt
-               multipostnote <- optionMaybe $ toList <$> paropt
-               let (pre, suf) = case (multiprenote, multipostnote) of
-                     (Just s , Nothing) -> (mempty, s)
-                     (Nothing , Just t) -> (mempty, t)
-                     (Just s , Just t ) -> (s, t)
-                     _                  -> (mempty, mempty)
-               tempCits <- many1 simpleCiteArgs
-               case tempCits of
-                 (k:ks) -> case ks of
-                             (_:_) -> return $ (addMprenote pre k : init ks) ++
-                                                 [addMpostnote suf (last ks)]
-                             _ -> return [addMprenote pre (addMpostnote suf k)]
-                 _ -> return [[]]
-             else count 1 simpleCiteArgs
-  let cs = concat cits
-  return $ case mode of
-        AuthorInText -> case cs of
-                             (c:rest) -> c {citationMode = mode} : rest
-                             []       -> []
-        _            -> map (\a -> a {citationMode = mode}) cs
-  where mprenote (k:ks) = (k:ks) ++ [Space]
-        mprenote _ = mempty
-        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
-        mpostnote _ = mempty
-        addMprenote mpn (k:ks) =
-          let mpnfinal = case citationPrefix k of
-                           (_:_) -> mprenote mpn
-                           _ -> mpn
-          in addPrefix mpnfinal (k:ks)
-        addMprenote _ _ = []
-        addMpostnote = addSuffix . mpostnote
-
-citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
-citation name mode multi = do
-  (c,raw) <- withRaw $ cites mode multi
-  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
-
-handleCitationPart :: Inlines -> [Citation]
-handleCitationPart ils =
-  let isCite Cite{} = True
-      isCite _      = False
-      (pref, rest) = break isCite (toList ils)
-  in case rest of
-          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
-          _                -> []
-
-complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines
-complexNatbibCitation mode = try $ do
-  (cs, raw) <-
-    withRaw $ concat <$> do
-      bgroup
-      items <- mconcat <$>
-                many1 (notFollowedBy (symbol ';') >> inline)
-                  `sepBy1` symbol ';'
-      egroup
-      return $ map handleCitationPart items
-  case cs of
-       []       -> mzero
-       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
-                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
-
-inNote :: Inlines -> Inlines
-inNote ils =
-  note $ para $ ils <> str "."
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -553,19 +444,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-opt :: PandocMonad m => LP m Inlines
-opt = do
-  toks <- try (sp *> bracketedToks <* sp)
-  -- now parse the toks as inlines
-  st <- getState
-  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
-  case parsed of
-    Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
-
-paropt :: PandocMonad m => LP m Inlines
-paropt = parenWrapped inline
-
 inBrackets :: Inlines -> Inlines
 inBrackets x = str "[" <> x <> str "]"
 
@@ -629,6 +507,7 @@ inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
+  M.union (citationCommands inline) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -703,7 +582,7 @@ inlineCommands =
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
-                            optional opt
+                            optional rawopt
                             spaces))
   , (",", lit "\8198")
   , ("@", pure mempty)
@@ -761,61 +640,6 @@ inlineCommands =
   , ("proofname", doTerm Translations.Proof)
   , ("glossaryname", doTerm Translations.Glossary)
   , ("lstlistingname", doTerm Translations.Listing)
-  , ("cite", citation "cite" NormalCitation False)
-  , ("Cite", citation "Cite" NormalCitation False)
-  , ("citep", citation "citep" NormalCitation False)
-  , ("citep*", citation "citep*" NormalCitation False)
-  , ("citeal", citation "citeal" NormalCitation False)
-  , ("citealp", citation "citealp" NormalCitation False)
-  , ("citealp*", citation "citealp*" NormalCitation False)
-  , ("autocite", citation "autocite" NormalCitation False)
-  , ("smartcite", citation "smartcite" NormalCitation False)
-  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
-  , ("parencite", citation "parencite" NormalCitation False)
-  , ("supercite", citation "supercite" NormalCitation False)
-  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
-  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
-  , ("citeyear", citation "citeyear" SuppressAuthor False)
-  , ("autocite*", citation "autocite*" SuppressAuthor False)
-  , ("cite*", citation "cite*" SuppressAuthor False)
-  , ("parencite*", citation "parencite*" SuppressAuthor False)
-  , ("textcite", citation "textcite" AuthorInText False)
-  , ("citet", citation "citet" AuthorInText False)
-  , ("citet*", citation "citet*" AuthorInText False)
-  , ("citealt", citation "citealt" AuthorInText False)
-  , ("citealt*", citation "citealt*" AuthorInText False)
-  , ("textcites", citation "textcites" AuthorInText True)
-  , ("cites", citation "cites" NormalCitation True)
-  , ("autocites", citation "autocites" NormalCitation True)
-  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
-  , ("parencites", citation "parencites" NormalCitation True)
-  , ("supercites", citation "supercites" NormalCitation True)
-  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
-  , ("Autocite", citation "Autocite" NormalCitation False)
-  , ("Smartcite", citation "Smartcite" NormalCitation False)
-  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
-  , ("Parencite", citation "Parencite" NormalCitation False)
-  , ("Supercite", citation "Supercite" NormalCitation False)
-  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
-  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
-  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
-  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
-  , ("Cite*", citation "Cite*" SuppressAuthor False)
-  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
-  , ("Textcite", citation "Textcite" AuthorInText False)
-  , ("Textcites", citation "Textcites" AuthorInText True)
-  , ("Cites", citation "Cites" NormalCitation True)
-  , ("Autocites", citation "Autocites" NormalCitation True)
-  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
-  , ("Parencites", citation "Parencites" NormalCitation True)
-  , ("Supercites", citation "Supercites" NormalCitation True)
-  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
-  , ("citetext", complexNatbibCitation NormalCitation)
-  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
-                        complexNatbibCitation AuthorInText)
-                   <|> citation "citeauthor" AuthorInText False)
-  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
-                          addMeta "nocite"))
   , ("hyperlink", hyperlink)
   , ("hypertarget", hypertargetInline)
   -- glossaries package
@@ -918,7 +742,7 @@ inlineCommands =
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
-  optional opt
+  optional rawopt
   x <- tok
   y <- tok
   return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y
@@ -1168,6 +992,16 @@ inline = (mempty <$ comment)
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
 
+opt :: PandocMonad m => LP m Inlines
+opt = do
+  toks <- try (sp *> bracketedToks <* sp)
+  -- now parse the toks as inlines
+  st <- getState
+  parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
+  case parsed of
+    Right result -> return result
+    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
 -- block elements:
 
 preamble :: PandocMonad m => LP m Blocks
@@ -1261,10 +1095,6 @@ insertIncluded defaultExtension f' = do
   getInput >>= setInput . (tokenize f contents ++)
   updateState dropLatestIncludeFile
 
-addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
-addMeta field val = updateState $ \st ->
-   st{ sMeta = addMetaField field val $ sMeta st }
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
new file mode 100644
index 000000000..655823dab
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Citation
+  ( citationCommands
+  , cites
+  )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (mzero)
+import Control.Monad.Trans (lift)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParsecError))
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+
+citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+citationCommands inline =
+  let citation = citationWith inline
+      tok = spaces *> grouped inline
+   in M.fromList
+  [ ("cite", citation "cite" NormalCitation False)
+  , ("Cite", citation "Cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("smartcite", citation "smartcite" NormalCitation False)
+  , ("footcite", inNote <$> citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", inNote <$> citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Smartcite", citation "Smartcite" NormalCitation False)
+  , ("Footcite", inNote <$> citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", inNote <$> citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation inline NormalCitation)
+  , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *>
+                        complexNatbibCitation inline AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>=
+                          addMeta "nocite"))
+  ]
+
+-- citations
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _      = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k = last ks
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation]
+simpleCiteArgs inline = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
+  keys <- try $ bgroup *> manyTill citationLabel egroup
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
+ where
+  opt :: PandocMonad m => LP m Inlines
+  opt = do
+    toks <- try (sp *> bracketedToks <* sp)
+    -- now parse the toks as inlines
+    st <- getState
+    parsed <- lift $
+      runParserT (mconcat <$> many inline) st "bracketed option" toks
+    case parsed of
+      Right result -> return result
+      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+
+
+
+citationLabel :: PandocMonad m => LP m Text
+citationLabel  = do
+  sp
+  untokenize <$>
+    (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
+          <* sp
+          <* optional (symbol ',')
+          <* sp)
+  where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char]
+
+cites :: PandocMonad m
+      => LP m Inlines -> CitationMode -> Bool -> LP m [Citation]
+cites inline mode multi = try $ do
+  let paropt = parenWrapped inline
+  cits <- if multi
+             then do
+               multiprenote <- optionMaybe $ toList <$> paropt
+               multipostnote <- optionMaybe $ toList <$> paropt
+               let (pre, suf) = case (multiprenote, multipostnote) of
+                     (Just s , Nothing) -> (mempty, s)
+                     (Nothing , Just t) -> (mempty, t)
+                     (Just s , Just t ) -> (s, t)
+                     _                  -> (mempty, mempty)
+               tempCits <- many1 $ simpleCiteArgs inline
+               case tempCits of
+                 (k:ks) -> case ks of
+                             (_:_) -> return $ (addMprenote pre k : init ks) ++
+                                                 [addMpostnote suf (last ks)]
+                             _ -> return [addMprenote pre (addMpostnote suf k)]
+                 _ -> return [[]]
+             else count 1 $ simpleCiteArgs inline
+  let cs = concat cits
+  return $ case mode of
+        AuthorInText -> case cs of
+                             (c:rest) -> c {citationMode = mode} : rest
+                             []       -> []
+        _            -> map (\a -> a {citationMode = mode}) cs
+  where mprenote (k:ks) = (k:ks) ++ [Space]
+        mprenote _ = mempty
+        mpostnote (k:ks) = [Str ",", Space] ++ (k:ks)
+        mpostnote _ = mempty
+        addMprenote mpn (k:ks) =
+          let mpnfinal = case citationPrefix k of
+                           (_:_) -> mprenote mpn
+                           _ -> mpn
+          in addPrefix mpnfinal (k:ks)
+        addMprenote _ _ = []
+        addMpostnote = addSuffix . mpostnote
+
+citationWith :: PandocMonad m
+             => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines
+citationWith inline name mode multi = do
+  (c,raw) <- withRaw $ cites inline mode multi
+  return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
+
+handleCitationPart :: Inlines -> [Citation]
+handleCitationPart ils =
+  let isCite Cite{} = True
+      isCite _      = False
+      (pref, rest) = break isCite (toList ils)
+  in case rest of
+          (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs
+          _                -> []
+
+complexNatbibCitation :: PandocMonad m
+                      => LP m Inlines -> CitationMode -> LP m Inlines
+complexNatbibCitation inline mode = try $ do
+  (cs, raw) <-
+    withRaw $ concat <$> do
+      bgroup
+      items <- mconcat <$>
+                many1 (notFollowedBy (symbol ';') >> inline)
+                  `sepBy1` symbol ';'
+      egroup
+      return $ map handleCitationPart items
+  case cs of
+       []       -> mzero
+       (c:cits) -> return $ cite (c{ citationMode = mode }:cits)
+                      (rawInline "latex" $ "\\citetext" <> untokenize raw)
+
+inNote :: Inlines -> Inlines
+inNote ils =
+  note $ para $ ils <> str "."
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 4a9fa03ad..a5a39d3c9 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -85,6 +85,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , setCaption
   , resetCaption
   , env
+  , addMeta
   ) where
 
 import Control.Applicative (many, (<|>))
@@ -947,3 +948,7 @@ tokWith inlineParser = try $ spaces >>
   where singleChar' = do
           Tok _ _ t <- singleChar
           return $ str t
+
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
+addMeta field val = updateState $ \st ->
+   st{ sMeta = addMetaField field val $ sMeta st }
-- 
cgit v1.2.3


From 5e571d963587866957a26d382aeab9935311fb9d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:39:01 -0800
Subject: LaTeX reader: remove two unnecessary parsers in inline.

These are handled anyway by regularSymbol.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2d1b83486..3935c92ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -975,9 +975,7 @@ inline = (mempty <$ comment)
      <|> doubleQuote
      <|> singleQuote
      <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "”" <$ symbol '”')
      <|> (str "’" <$ symbol '\'')
-     <|> (str "’" <$ symbol '’')
      <|> (str "\160" <$ symbol '~')
      <|> dollarsMath
      <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-- 
cgit v1.2.3


From 564c39beef36bf008fa5d2c840560ef064152e7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 09:49:34 -0800
Subject: Move setDefaultLanguage to T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs      | 16 ++--------------
 src/Text/Pandoc/Readers/LaTeX/Lang.hs | 22 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 3935c92ef..2155379db 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -44,7 +44,7 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
                                       readFileFromDirs, report, setResourcePath,
-                                      setTranslations, translateTerm)
+                                      translateTerm)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
@@ -59,7 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
-                                       babelLangToBCP47)
+                                       babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
@@ -1856,15 +1856,3 @@ block = do
 blocks :: PandocMonad m => LP m Blocks
 blocks = mconcat <$> many block
 
-setDefaultLanguage :: PandocMonad m => LP m Blocks
-setDefaultLanguage = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  polylang <- untokenize <$> braced
-  case M.lookup polylang polyglossiaLangToBCP47 of
-       Nothing -> return mempty -- TODO mzero? warning?
-       Just langFunc -> do
-         let l = langFunc o
-         setTranslations l
-         updateState $ setMeta "lang" $ str (renderLang l)
-         return mempty
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 5f634818e..adbeaa6d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -12,13 +12,31 @@ Functions for parsing polyglossia and babel language specifiers to
 BCP47 'Lang'.
 -}
 module Text.Pandoc.Readers.LaTeX.Lang
-  ( polyglossiaLangToBCP47
+  ( setDefaultLanguage
+  , polyglossiaLangToBCP47
   , babelLangToBCP47
   )
 where
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang(..))
+import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import Text.Pandoc.Class (PandocMonad(..), setTranslations)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, option)
+import Text.Pandoc.Builder (Blocks, setMeta, str)
+
+setDefaultLanguage :: PandocMonad m => LP m Blocks
+setDefaultLanguage = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  polylang <- untokenize <$> braced
+  case M.lookup polylang polyglossiaLangToBCP47 of
+       Nothing -> return mempty -- TODO mzero? warning?
+       Just langFunc -> do
+         let l = langFunc o
+         setTranslations l
+         updateState $ setMeta "lang" $ str (renderLang l)
+         return mempty
 
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
-- 
cgit v1.2.3


From f6cf03857b59776f4f44ea831787231f7f93da96 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 12:52:41 -0800
Subject: LaTeX reader efficiency improvements.

In conjunction with other changes this makes the reader
almost twice as fast on our benchmark as it was on Feb. 10.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 73 +++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 31 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2155379db..4956b90cb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -191,12 +191,6 @@ inlineCommand = do
 word :: PandocMonad m => LP m Inlines
 word = str . untoken <$> satisfyTok isWordTok
 
-regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = str . untoken <$> satisfyTok isRegularSymbol
-  where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
-        isRegularSymbol _                = False
-        isSpecial c = c `Set.member` specialChars
-
 inlineGroup :: PandocMonad m => LP m Inlines
 inlineGroup = do
   ils <- grouped inline
@@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
 
 inline :: PandocMonad m => LP m Inlines
-inline = (mempty <$ comment)
-     <|> (space  <$ whitespace)
-     <|> (softbreak <$ endline)
-     <|> word
-     <|> macroDef (rawInline "latex")
-     <|> inlineCommand'
-     <|> inlineEnvironment
-     <|> inlineGroup
-     <|> (symbol '-' *>
-           option (str "-") (symbol '-' *>
-             option (str "–") (str "—" <$ symbol '-')))
-     <|> doubleQuote
-     <|> singleQuote
-     <|> (str "”" <$ try (symbol '\'' >> symbol '\''))
-     <|> (str "’" <$ symbol '\'')
-     <|> (str "\160" <$ symbol '~')
-     <|> dollarsMath
-     <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
-     <|> (str . T.singleton <$> primEscape)
-     <|> regularSymbol
-     <|> (do res <- symbolIn "#^'`\"[]&"
-             pos <- getPosition
-             let s = untoken res
-             report $ ParsingUnescaped s pos
-             return $ str s)
+inline = do
+  Tok pos toktype t <- lookAhead anyTok
+  let symbolAsString = str . untoken <$> anySymbol
+  let unescapedSymbolAsString =
+        do s <- untoken <$> anySymbol
+           report $ ParsingUnescaped s pos
+           return $ str s
+  case toktype of
+    Comment     -> mempty <$ comment
+    Spaces      -> space <$ whitespace
+    Newline     -> softbreak <$ endline
+    Word        -> word
+    Esc1        -> str . T.singleton <$> primEscape
+    Esc2        -> str . T.singleton <$> primEscape
+    Symbol      ->
+      case t of
+        "-"     -> symbol '-' *>
+                    option (str "-") (symbol '-' *>
+                      option (str "–") (str "—" <$ symbol '-'))
+        "'"     -> symbol '\'' *>
+                  option (str "’") (str  "”" <$ symbol '\'')
+        "~"     -> str "\160" <$ symbol '~'
+        "`"     -> doubleQuote <|> singleQuote <|> symbolAsString
+        "\""    -> doubleQuote <|> singleQuote <|> symbolAsString
+        "“"     -> doubleQuote <|> symbolAsString
+        "‘"     -> singleQuote <|> symbolAsString
+        "$"     -> dollarsMath <|> unescapedSymbolAsString
+        "|"     -> (guardEnabled Ext_literate_haskell *>
+                    symbol '|' *> doLHSverb) <|> symbolAsString
+        "{"     -> inlineGroup
+        "#"     -> unescapedSymbolAsString
+        "&"     -> unescapedSymbolAsString
+        "_"     -> unescapedSymbolAsString
+        "^"     -> unescapedSymbolAsString
+        "\\"    -> mzero
+        "}"     -> mzero
+        _       -> symbolAsString
+    CtrlSeq _   -> macroDef (rawInline "latex")
+                  <|> inlineCommand'
+                  <|> inlineEnvironment
+                  <|> inlineGroup
+    _           -> mzero
 
 inlines :: PandocMonad m => LP m Inlines
 inlines = mconcat <$> many inline
-- 
cgit v1.2.3


From cc543cf5b64cccc91ad6a1a455831b8c3f6d80b2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 14:34:04 -0800
Subject: LaTeX reader: another small efficiency improvement.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4956b90cb..4062e8a53 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1855,12 +1855,18 @@ orderedList' = try $ do
 
 block :: PandocMonad m => LP m Blocks
 block = do
-  res <- (mempty <$ spaces1)
-    <|> environment
-    <|> macroDef (rawBlock "latex")
-    <|> blockCommand
-    <|> paragraph
-    <|> grouped block
+  Tok _ toktype _ <- lookAhead anyTok
+  res <- (case toktype of
+            Newline           -> mempty <$ spaces1
+            Spaces            -> mempty <$ spaces1
+            Comment           -> mempty <$ spaces1
+            Word              -> paragraph
+            CtrlSeq "begin"   -> environment
+            CtrlSeq _         -> macroDef (rawBlock "latex")
+                               <|> blockCommand
+            _                 -> mzero)
+          <|> paragraph
+          <|> grouped block
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-- 
cgit v1.2.3


From 7229d068c9f63b6f5cda198815a1bd03473cfdcf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 15:18:31 -0800
Subject: Markdown reader efficiency improvements.

Benchmarks show that these make the reader 13-17% faster,
depending on extensions.
---
 src/Text/Pandoc/Readers/Markdown.hs | 390 +++++++++++++++++++-----------------
 1 file changed, 208 insertions(+), 182 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e46553dd8..91691c675 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -247,51 +247,60 @@ titleBlock :: PandocMonad m => MarkdownParser m ()
 titleBlock = pandocTitleBlock <|> mmdTitleBlock
 
 pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
-pandocTitleBlock = try $ do
+pandocTitleBlock = do
   guardEnabled Ext_pandoc_title_block
   lookAhead (char '%')
-  title <- option mempty titleLine
-  author <- option (return []) authorsLine
-  date <- option mempty dateLine
-  optional blanklines
-  let meta' = do title' <- title
-                 author' <- author
-                 date' <- date
-                 return $
-                     (if null title' then id else B.setMeta "title" title')
-                   . (if null author' then id else B.setMeta "author" author')
-                   . (if null date' then id else B.setMeta "date" date')
-                   $ nullMeta
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
+  try $ do
+    title <- option mempty titleLine
+    author <- option (return []) authorsLine
+    date <- option mempty dateLine
+    optional blanklines
+    let meta' = do title' <- title
+                   author' <- author
+                   date' <- date
+                   return $
+                       (if null title'
+                           then id
+                           else B.setMeta "title" title')
+                     . (if null author'
+                           then id
+                           else B.setMeta "author" author')
+                     . (if null date'
+                           then id
+                           else B.setMeta "date" date')
+                     $ nullMeta
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
 yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = try $ do
+yamlMetaBlock = do
   guardEnabled Ext_yaml_metadata_block
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-              $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-  -- Since `<>` is left-biased, existing values are not touched:
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
-  return mempty
+  try $ do
+    string "---"
+    blankline
+    notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+    rawYamlLines <- manyTill anyLine stopLine
+    -- by including --- and ..., we allow yaml blocks with just comments:
+    let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+    optional blanklines
+    newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
+                $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+    -- Since `<>` is left-biased, existing values are not touched:
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
+    return mempty
 
 stopLine :: PandocMonad m => MarkdownParser m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
-mmdTitleBlock = try $ do
+mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
-  firstPair <- kvPair False
-  restPairs <- many (kvPair True)
-  let kvPairs = firstPair : restPairs
-  blanklines
-  updateState $ \st -> st{ stateMeta' = stateMeta' st <>
-                             return (Meta $ M.fromList kvPairs) }
+  try $ do
+    firstPair <- kvPair False
+    restPairs <- many (kvPair True)
+    let kvPairs = firstPair : restPairs
+    blanklines
+    updateState $ \st -> st{ stateMeta' = stateMeta' st <>
+                               return (Meta $ M.fromList kvPairs) }
 
 kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
 kvPair allowEmpty = try $ do
@@ -661,15 +670,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
 codeBlockFenced = try $ do
   indentchars <- nonindentSpaces
   let indentLevel = T.length indentchars
-  c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
+  c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
      <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
   size <- blockDelimiter (== c) Nothing
   skipMany spaceChar
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_fenced_code_attributes >> attributes)
+         ((guardEnabled Ext_fenced_code_attributes >> try attributes)
           <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
   blankline
   contents <- T.intercalate "\n" <$>
@@ -1157,11 +1166,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
 --
 
 lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-lineBlock = try $ do
+lineBlock = do
   guardEnabled Ext_line_blocks
-  lines' <- lineBlockLines >>=
-            mapM (parseFromString' (trimInlinesF <$> inlines))
-  return $ B.lineBlock <$> sequence lines'
+  try $ do
+    lines' <- lineBlockLines >>=
+              mapM (parseFromString' (trimInlinesF <$> inlines))
+    return $ B.lineBlock <$> sequence lines'
 
 --
 -- Tables
@@ -1263,11 +1273,12 @@ multilineRow indices = do
 -- Parses a table caption:  inlines beginning with 'Table:'
 -- and followed by blank lines.
 tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
-tableCaption = try $ do
+tableCaption = do
   guardEnabled Ext_table_captions
-  skipNonindentSpaces
-  (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
-  trimInlinesF <$> inlines1 <* blanklines
+  try $ do
+    skipNonindentSpaces
+    (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
+    trimInlinesF <$> inlines1 <* blanklines
 
 -- Parse a simple table with '---' header and one line per row.
 simpleTable :: PandocMonad m
@@ -1436,15 +1447,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks)
 table = try $ do
   frontCaption <- option Nothing (Just <$> tableCaption)
   (aligns, widths, heads, lns) <-
-         try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable False) <|>
-         try (guardEnabled Ext_simple_tables >>
-                (simpleTable True <|> simpleTable False)) <|>
-         try (guardEnabled Ext_multiline_tables >>
-                multilineTable True) <|>
-         try (guardEnabled Ext_grid_tables >>
-                (gridTable False <|> gridTable True)) <?> "table"
+         (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|>
+         (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|>
+         (guardEnabled Ext_simple_tables >>
+                try (simpleTable True <|> simpleTable False)) <|>
+         (guardEnabled Ext_multiline_tables >>
+                try (multilineTable True)) <|>
+         (guardEnabled Ext_grid_tables >>
+                try (gridTable False <|> gridTable True)) <?> "table"
   optional blanklines
   caption <- case frontCaption of
                   Nothing -> option (return mempty) tableCaption
@@ -1478,35 +1488,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
 inlines1 = mconcat <$> many1 inline
 
 inline :: PandocMonad m => MarkdownParser m (F Inlines)
-inline = choice [ whitespace
-                , bareURL
-                , str
-                , endline
-                , code
-                , strongOrEmph
-                , note
-                , cite
-                , bracketedSpan
-                , link
-                , image
-                , math
-                , strikeout
-                , subscript
-                , superscript
-                , inlineNote  -- after superscript because of ^[link](/foo)^
-                , autoLink
-                , spanHtml
-                , rawHtmlInline
-                , escapedNewline
-                , escapedChar
-                , rawLaTeXInline'
-                , exampleRef
-                , smart
-                , return . B.singleton <$> charRef
-                , emoji
-                , symbol
-                , ltSign
-                ] <?> "inline"
+inline = do
+  c <- lookAhead anyChar
+  ((case c of
+     ' '     -> whitespace
+     '\t'    -> whitespace
+     '\n'    -> endline
+     '`'     -> code
+     '_'     -> strongOrEmph
+     '*'     -> strongOrEmph
+     '^'     -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^
+     '['     -> note <|> cite <|> bracketedSpan <|> link
+     '!'     -> image
+     '$'     -> math
+     '~'     -> strikeout <|> subscript
+     '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
+     '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '@'     -> exampleRef
+     '"'     -> smart
+     '\''    -> smart
+     '\8216' -> smart
+     '\145'  -> smart
+     '\8220' -> smart
+     '\147'  -> smart
+     '-'     -> smart
+     '.'     -> smart
+     '&'     -> return . B.singleton <$> charRef
+     ':'     -> emoji
+     _       -> mzero)
+   <|> bareURL
+   <|> str
+   <|> symbol) <?> "inline"
 
 escapedChar' :: PandocMonad m => MarkdownParser m Char
 escapedChar' = try $ do
@@ -1517,11 +1529,12 @@ escapedChar' = try $ do
      <|> oneOf "\\`*_{}[]()>#+-.!~\""
 
 escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
-escapedNewline = try $ do
+escapedNewline = do
   guardEnabled Ext_escaped_line_breaks
-  char '\\'
-  lookAhead (char '\n') -- don't consume the newline (see #3730)
-  return $ return B.linebreak
+  try $ do
+    char '\\'
+    lookAhead (char '\n') -- don't consume the newline (see #3730)
+    return $ return B.linebreak
 
 escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
 escapedChar = do
@@ -1543,19 +1556,20 @@ ltSign = do
 -- whole document has been parsed.  But we need this parser
 -- here in case citations is disabled.
 exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
-exampleRef = try $ do
+exampleRef = do
   guardEnabled Ext_example_lists
-  char '@'
-  lab <- mconcat . map T.pack <$>
-                    many (many1 alphaNum <|>
-                          try (do c <- char '_' <|> char '-'
-                                  cs <- many1 alphaNum
-                                  return (c:cs)))
-  return $ do
-    st <- askF
-    return $ case M.lookup lab (stateExamples st) of
-                  Just n  -> B.str $ tshow n
-                  Nothing -> B.str $ "@" <> lab
+  try $ do
+    char '@'
+    lab <- mconcat . map T.pack <$>
+                      many (many1 alphaNum <|>
+                            try (do c <- char '_' <|> char '-'
+                                    cs <- many1 alphaNum
+                                    return (c:cs)))
+    return $ do
+      st <- askF
+      return $ case M.lookup lab (stateExamples st) of
+                    Just n  -> B.str $ tshow n
+                    Nothing -> B.str $ "@" <> lab
 
 symbol :: PandocMonad m => MarkdownParser m (F Inlines)
 symbol = do
@@ -1582,10 +1596,10 @@ code = try $ do
                   >> count (length starts) (char '`')
                   >> notFollowedBy (char '`'))
   rawattr <-
-     (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
+     (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute))
     <|>
      (Right <$> option ("",[],[])
-         (try (guardEnabled Ext_inline_code_attributes >> attributes)))
+         (guardEnabled Ext_inline_code_attributes >> try attributes))
   return $ return $
     case rawattr of
          Left syn   -> B.rawInline syn result
@@ -1678,20 +1692,22 @@ strikeout = fmap B.strikeout <$>
           strikeEnd   = try $ string "~~"
 
 superscript :: PandocMonad m => MarkdownParser m (F Inlines)
-superscript = fmap B.superscript <$> try (do
+superscript = do
   guardEnabled Ext_superscript
-  char '^'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '^'))
+  fmap B.superscript <$> try (do
+    char '^'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '^'))
 
 subscript :: PandocMonad m => MarkdownParser m (F Inlines)
-subscript = fmap B.subscript <$> try (do
+subscript = do
   guardEnabled Ext_subscript
-  char '~'
-  mconcat <$> many1Till (do notFollowedBy spaceChar
-                            notFollowedBy newline
-                            inline) (char '~'))
+  fmap B.subscript <$> try (do
+    char '~'
+    mconcat <$> many1Till (do notFollowedBy spaceChar
+                              notFollowedBy newline
+                              inline) (char '~'))
 
 whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
 whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
@@ -1792,15 +1808,16 @@ link = try $ do
   regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
 
 bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
-bracketedSpan = try $ do
+bracketedSpan = do
   guardEnabled Ext_bracketed_spans
-  (lab,_) <- reference
-  attr <- attributes
-  return $ if isSmallCaps attr
-              then B.smallcaps <$> lab
-              else if isUnderline attr
-                      then B.underline <$> lab
-                      else B.spanWith attr <$> lab
+  try $ do
+    (lab,_) <- reference
+    attr <- attributes
+    return $ if isSmallCaps attr
+                then B.smallcaps <$> lab
+                else if isUnderline attr
+                        then B.underline <$> lab
+                        else B.spanWith attr <$> lab
 
 -- | We treat a span as SmallCaps if class is "smallcaps" (and
 -- no other attributes are set or if style is "font-variant:small-caps"
@@ -1879,12 +1896,13 @@ dropBrackets = dropRB . dropLB
         dropLB xs                          = xs
 
 bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
-bareURL = try $ do
+bareURL = do
   guardEnabled Ext_autolink_bare_uris
   getState >>= guard . stateAllowLinks
-  (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-  notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
-  return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
+  try $ do
+    (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
+    notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
+    return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
 
 autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
 autoLink = try $ do
@@ -1937,21 +1955,23 @@ note = try $ do
           return $ B.note $ walk adjustCite contents'
 
 inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
-inlineNote = try $ do
+inlineNote = do
   guardEnabled Ext_inline_notes
-  char '^'
-  updateState $ \st -> st{ stateInNote = True
-                         , stateNoteNumber = stateNoteNumber st + 1 }
-  contents <- inlinesInBalancedBrackets
-  updateState $ \st -> st{ stateInNote = False }
-  return $ B.note . B.para <$> contents
+  try $ do
+    char '^'
+    updateState $ \st -> st{ stateInNote = True
+                           , stateNoteNumber = stateNoteNumber st + 1 }
+    contents <- inlinesInBalancedBrackets
+    updateState $ \st -> st{ stateInNote = False }
+    return $ B.note . B.para <$> contents
 
 rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
-rawLaTeXInline' = try $ do
+rawLaTeXInline' = do
   guardEnabled Ext_raw_tex
   notFollowedBy' rawConTeXtEnvironment
-  s <- rawLaTeXInline
-  return $ return $ B.rawInline "tex" s -- "tex" because it might be context
+  try $ do
+    s <- rawLaTeXInline
+    return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
 rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
 rawConTeXtEnvironment = try $ do
@@ -1970,55 +1990,60 @@ inBrackets parser = do
   return $ "[" <> contents <> "]"
 
 spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
-spanHtml = try $ do
+spanHtml = do
   guardEnabled Ext_native_spans
-  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
-  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
-  let ident = fromMaybe "" $ lookup "id" attrs
-  let classes = maybe [] T.words $ lookup "class" attrs
-  let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-  return $ if isSmallCaps (ident, classes, keyvals)
-              then B.smallcaps <$> contents
-              else if isUnderline (ident, classes, keyvals)
-                      then B.underline <$> contents
-                      else B.spanWith (ident, classes, keyvals) <$> contents
+  try $ do
+    (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+    contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
+    let ident = fromMaybe "" $ lookup "id" attrs
+    let classes = maybe [] T.words $ lookup "class" attrs
+    let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+    return $ if isSmallCaps (ident, classes, keyvals)
+                then B.smallcaps <$> contents
+                else if isUnderline (ident, classes, keyvals)
+                        then B.underline <$> contents
+                        else B.spanWith (ident, classes, keyvals) <$> contents
 
 divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
-divHtml = try $ do
+divHtml = do
   guardEnabled Ext_native_divs
-  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-  -- we set stateInHtmlBlock so that closing tags that can be either block or
-  -- inline will not be parsed as inline tags
-  oldInHtmlBlock <- stateInHtmlBlock <$> getState
-  updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
-  bls <- option "" (blankline >> option "" blanklines)
-  contents <- mconcat <$>
-              many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
-  closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
-  if closed
-     then do
-       updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
-       let ident = fromMaybe "" $ lookup "id" attrs
-       let classes = maybe [] T.words $ lookup "class" attrs
-       let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
-       return $ B.divWith (ident, classes, keyvals) <$> contents
-     else -- avoid backtracing
-       return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
+  try $ do
+    (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
+    -- we set stateInHtmlBlock so that closing tags that can be either block
+    -- or inline will not be parsed as inline tags
+    oldInHtmlBlock <- stateInHtmlBlock <$> getState
+    updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
+    bls <- option "" (blankline >> option "" blanklines)
+    contents <- mconcat <$>
+                many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+    closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
+    if closed
+       then do
+         updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
+         let ident = fromMaybe "" $ lookup "id" attrs
+         let classes = maybe [] T.words $ lookup "class" attrs
+         let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+         return $ B.divWith (ident, classes, keyvals) <$> contents
+       else -- avoid backtracing
+         return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
 
 divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
-divFenced = try $ do
+divFenced = do
   guardEnabled Ext_fenced_divs
-  string ":::"
-  skipMany (char ':')
-  skipMany spaceChar
-  attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
-  skipMany spaceChar
-  skipMany (char ':')
-  blankline
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
-  bs <- mconcat <$> manyTill block divFenceEnd
-  updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
-  return $ B.divWith attribs <$> bs
+  try $ do
+    string ":::"
+    skipMany (char ':')
+    skipMany spaceChar
+    attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
+    skipMany spaceChar
+    skipMany (char ':')
+    blankline
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
+    bs <- mconcat <$> manyTill block divFenceEnd
+    updateState $ \st ->
+      st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
+    return $ B.divWith attribs <$> bs
 
 divFenceEnd :: PandocMonad m => MarkdownParser m ()
 divFenceEnd = try $ do
@@ -2050,14 +2075,15 @@ emojiChars :: [Char]
 emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
 
 emoji :: PandocMonad m => MarkdownParser m (F Inlines)
-emoji = try $ do
+emoji = do
   guardEnabled Ext_emoji
-  char ':'
-  emojikey <- many1Char (oneOf emojiChars)
-  char ':'
-  case emojiToInline emojikey of
-    Just i -> return (return $ B.singleton i)
-    Nothing -> mzero
+  try $ do
+    char ':'
+    emojikey <- many1Char (oneOf emojiChars)
+    char ':'
+    case emojiToInline emojikey of
+      Just i -> return (return $ B.singleton i)
+      Nothing -> mzero
 
 -- Citations
 
-- 
cgit v1.2.3


From 36456070c4cf8a6a122bc2ec05e86dc75fe49551 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 15:36:46 -0800
Subject: Fix bug in last commit.

---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 91691c675..b1b99dfe5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1504,7 +1504,7 @@ inline = do
      '~'     -> strikeout <|> subscript
      '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
      '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
-     '@'     -> exampleRef
+     '@'     -> cite <|> exampleRef
      '"'     -> smart
      '\''    -> smart
      '\8216' -> smart
-- 
cgit v1.2.3


From d2bb0c7c8d599e6cd2aaef787b207bbfa66d4b9e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 21:05:25 -0800
Subject: Factor out T.P.Readers.LaTeX.Math.

---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs      | 201 ++-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 ++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+), 193 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Math.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 61b1de0dd..56a9491f2 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -632,6 +632,7 @@ library
                    Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4062e8a53..772263578 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,6 @@ import Data.List (intercalate)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
-import qualified Data.Sequence as Seq
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
@@ -57,6 +56,11 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
+import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
+                                       inlineEnvironment,
+                                       mathDisplay, mathInline,
+                                       newtheorem, theoremstyle, proof,
+                                       theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
@@ -387,39 +391,6 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
-mathDisplay :: Text -> Inlines
-mathDisplay = displayMath . trimMath
-
-mathInline :: Text -> Inlines
-mathInline = math . trimMath
-
-dollarsMath :: PandocMonad m => LP m Inlines
-dollarsMath = do
-  symbol '$'
-  display <- option False (True <$ symbol '$')
-  (do contents <- try $ untokenize <$> pDollarsMath 0
-      if display
-         then mathDisplay contents <$ symbol '$'
-         else return $ mathInline contents)
-   <|> (guard display >> return (mathInline ""))
-
--- Int is number of embedded groupings
-pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
-pDollarsMath n = do
-  tk@(Tok _ toktype t) <- anyTok
-  case toktype of
-       Symbol | t == "$"
-              , n == 0 -> return []
-              | t == "\\" -> do
-                  tk' <- anyTok
-                  (tk :) . (tk' :) <$> pDollarsMath n
-              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
-              | t == "}" ->
-                if n > 0
-                then (tk :) <$> pDollarsMath (n-1)
-                else mzero
-       _ -> (tk :) <$> pDollarsMath n
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -452,51 +423,6 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       , isEscapable c = t
       | otherwise = "\\" <> t
 
-mathEnvWith :: PandocMonad m
-            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
-mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
-   where inner x = case innerEnv of
-                        Nothing -> x
-                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
-                                   "\\end{" <> y <> "}"
-
-mathEnv :: PandocMonad m => Text -> LP m Text
-mathEnv name = do
-  skipopts
-  optional blankline
-  res <- manyTill anyTok (end_ name)
-  return $ stripTrailingNewlines $ untokenize res
-
-inlineEnvironment :: PandocMonad m => LP m Inlines
-inlineEnvironment = try $ do
-  controlSeq "begin"
-  name <- untokenize <$> braced
-  M.findWithDefault mzero name inlineEnvironments
-
-inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineEnvironments = M.fromList [
-    ("displaymath", mathEnvWith id Nothing "displaymath")
-  , ("math", math <$> mathEnv "math")
-  , ("equation", mathEnvWith id Nothing "equation")
-  , ("equation*", mathEnvWith id Nothing "equation*")
-  , ("gather", mathEnvWith id (Just "gathered") "gather")
-  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
-  , ("multline", mathEnvWith id (Just "gathered") "multline")
-  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
-  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
-  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
-  , ("align", mathEnvWith id (Just "aligned") "align")
-  , ("align*", mathEnvWith id (Just "aligned") "align*")
-  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
-  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
-  , ("dmath", mathEnvWith id Nothing "dmath")
-  , ("dmath*", mathEnvWith id Nothing "dmath*")
-  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
-  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
-  , ("darray", mathEnvWith id (Just "aligned") "darray")
-  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
-  ]
-
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands =
   M.union inlineLanguageCommands $
@@ -1354,7 +1280,7 @@ blockCommands = M.fromList
    , ("address", mempty <$ (skipopts *> tok >>= addMeta "address"))
    , ("signature", mempty <$ (skipopts *> authors))
    , ("date", mempty <$ (skipopts *> tok >>= addMeta "date"))
-   , ("newtheorem", newtheorem)
+   , ("newtheorem", newtheorem inline)
    , ("theoremstyle", theoremstyle)
    -- KOMA-Script metadata commands
    , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle"))
@@ -1473,7 +1399,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("lilypond", rawVerbEnv "lilypond")
    , ("ly", rawVerbEnv "ly")
    -- amsthm
-   , ("proof", proof)
+   , ("proof", proof blocks opt)
    -- etoolbox
    , ("ifstrequal", ifstrequal)
    , ("newtoggle", braced >>= newToggle)
@@ -1494,128 +1420,17 @@ filecontents = try $ do
     st{ sFileContents = M.insert fp txt (sFileContents st) }
   return mempty
 
-theoremstyle :: PandocMonad m => LP m Blocks
-theoremstyle = do
-  stylename <- untokenize <$> braced
-  let mbstyle = case stylename of
-                  "plain"      -> Just PlainStyle
-                  "definition" -> Just DefinitionStyle
-                  "remark"     -> Just RemarkStyle
-                  _            -> Nothing
-  case mbstyle of
-    Nothing  -> return ()
-    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
-  return mempty
-
-newtheorem :: PandocMonad m => LP m Blocks
-newtheorem = do
-  number <- option True (False <$ symbol '*' <* sp)
-  name <- untokenize <$> braced
-  sp
-  series <- option Nothing $ Just . untokenize <$> bracketedToks
-  sp
-  showName <- tok
-  sp
-  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
-  sty <- sLastTheoremStyle <$> getState
-  let spec = TheoremSpec { theoremName = showName
-                         , theoremStyle = sty
-                         , theoremSeries = series
-                         , theoremSyncTo = syncTo
-                         , theoremNumber = number
-                         , theoremLastNum = DottedNum [0] }
-  tmap <- sTheoremMap <$> getState
-  updateState $ \s -> s{ sTheoremMap =
-                            M.insert name spec tmap }
-  return mempty
-
-proof :: PandocMonad m => LP m Blocks
-proof = do
-  title <- option (B.text "Proof") opt
-  bs <- env "proof" blocks
-  return $
-    B.divWith ("", ["proof"], []) $
-      addQed $ addTitle (B.emph (title <> ".")) bs
-
-addTitle :: Inlines -> Blocks -> Blocks
-addTitle ils bs =
-  case B.toList bs of
-    (Para xs : rest)
-      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
-    _ -> B.para ils <> bs
-
-addQed :: Blocks -> Blocks
-addQed bs =
-  case Seq.viewr (B.unMany bs) of
-    s Seq.:> Para ils
-      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
-    _ -> bs <> B.para qedSign
- where
-  qedSign = B.str "\xa0\x25FB"
-
 environment :: PandocMonad m => LP m Blocks
 environment = try $ do
   controlSeq "begin"
   name <- untokenize <$> braced
   M.findWithDefault mzero name environments <|>
-    theoremEnvironment name <|>
+    theoremEnvironment blocks opt name <|>
     if M.member name (inlineEnvironments
                        :: M.Map Text (LP PandocPure Inlines))
        then mzero
        else try (rawEnv name) <|> rawVerbEnv name
 
-theoremEnvironment :: PandocMonad m => Text -> LP m Blocks
-theoremEnvironment name = do
-  tmap <- sTheoremMap <$> getState
-  case M.lookup name tmap of
-    Nothing -> mzero
-    Just tspec -> do
-       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
-       mblabel <- option Nothing $ Just . untokenize <$>
-                   try (spaces >> controlSeq "label" >> spaces >> braced)
-       bs <- env name blocks
-       number <-
-         if theoremNumber tspec
-            then do
-               let name' = fromMaybe name $ theoremSeries tspec
-               num <- getNextNumber
-                   (maybe (DottedNum [0]) theoremLastNum .
-                    M.lookup name' . sTheoremMap)
-               updateState $ \s ->
-                 s{ sTheoremMap =
-                       M.adjust
-                       (\spec -> spec{ theoremLastNum = num })
-                       name'
-                       (sTheoremMap s)
-                  }
-
-               case mblabel of
-                 Just ident ->
-                   updateState $ \s ->
-                     s{ sLabels = M.insert ident
-                         (B.toList $
-                           theoremName tspec <> "\160" <>
-                           str (renderDottedNum num)) (sLabels s) }
-                 Nothing -> return ()
-               return $ space <> B.text (renderDottedNum num)
-            else return mempty
-       let titleEmph = case theoremStyle tspec of
-                         PlainStyle      -> B.strong
-                         DefinitionStyle -> B.strong
-                         RemarkStyle     -> B.emph
-       let title = titleEmph (theoremName tspec <> number)
-                      <> optTitle <> "." <> space
-       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
-              $ case theoremStyle tspec of
-                  PlainStyle -> walk italicize bs
-                  _          -> bs
-
-italicize :: Block -> Block
-italicize x@(Para [Image{}]) = x -- see #6925
-italicize (Para ils) = Para [Emph ils]
-italicize (Plain ils) = Plain [Emph ils]
-italicize x = x
-
 rawEnv :: PandocMonad m => Text -> LP m Blocks
 rawEnv name = do
   exts <- getOption readerExtensions
diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs
new file mode 100644
index 000000000..5b49a0376
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs
@@ -0,0 +1,221 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Math
+  ( dollarsMath
+  , inlineEnvironments
+  , inlineEnvironment
+  , mathInline
+  , mathDisplay
+  , theoremstyle
+  , theoremEnvironment
+  , newtheorem
+  , proof
+  )
+where
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Builder as B
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (trimMath, stripTrailingNewlines)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import Control.Monad (guard, mzero)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+dollarsMath :: PandocMonad m => LP m Inlines
+dollarsMath = do
+  symbol '$'
+  display <- option False (True <$ symbol '$')
+  (do contents <- try $ untokenize <$> pDollarsMath 0
+      if display
+         then mathDisplay contents <$ symbol '$'
+         else return $ mathInline contents)
+   <|> (guard display >> return (mathInline ""))
+
+-- Int is number of embedded groupings
+pDollarsMath :: PandocMonad m => Int -> LP m [Tok]
+pDollarsMath n = do
+  tk@(Tok _ toktype t) <- anyTok
+  case toktype of
+       Symbol | t == "$"
+              , n == 0 -> return []
+              | t == "\\" -> do
+                  tk' <- anyTok
+                  (tk :) . (tk' :) <$> pDollarsMath n
+              | t == "{" -> (tk :) <$> pDollarsMath (n+1)
+              | t == "}" ->
+                if n > 0
+                then (tk :) <$> pDollarsMath (n-1)
+                else mzero
+       _ -> (tk :) <$> pDollarsMath n
+
+mathDisplay :: Text -> Inlines
+mathDisplay = displayMath . trimMath
+
+mathInline :: Text -> Inlines
+mathInline = math . trimMath
+
+mathEnvWith :: PandocMonad m
+            => (Inlines -> a) -> Maybe Text -> Text -> LP m a
+mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
+   where inner x = case innerEnv of
+                        Nothing -> x
+                        Just y  -> "\\begin{" <> y <> "}\n" <> x <>
+                                   "\\end{" <> y <> "}"
+
+mathEnv :: PandocMonad m => Text -> LP m Text
+mathEnv name = do
+  skipopts
+  optional blankline
+  res <- manyTill anyTok (end_ name)
+  return $ stripTrailingNewlines $ untokenize res
+
+inlineEnvironment :: PandocMonad m => LP m Inlines
+inlineEnvironment = try $ do
+  controlSeq "begin"
+  name <- untokenize <$> braced
+  M.findWithDefault mzero name inlineEnvironments
+
+inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines)
+inlineEnvironments = M.fromList [
+    ("displaymath", mathEnvWith id Nothing "displaymath")
+  , ("math", math <$> mathEnv "math")
+  , ("equation", mathEnvWith id Nothing "equation")
+  , ("equation*", mathEnvWith id Nothing "equation*")
+  , ("gather", mathEnvWith id (Just "gathered") "gather")
+  , ("gather*", mathEnvWith id (Just "gathered") "gather*")
+  , ("multline", mathEnvWith id (Just "gathered") "multline")
+  , ("multline*", mathEnvWith id (Just "gathered") "multline*")
+  , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray")
+  , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*")
+  , ("align", mathEnvWith id (Just "aligned") "align")
+  , ("align*", mathEnvWith id (Just "aligned") "align*")
+  , ("alignat", mathEnvWith id (Just "aligned") "alignat")
+  , ("alignat*", mathEnvWith id (Just "aligned") "alignat*")
+  , ("dmath", mathEnvWith id Nothing "dmath")
+  , ("dmath*", mathEnvWith id Nothing "dmath*")
+  , ("dgroup", mathEnvWith id (Just "aligned") "dgroup")
+  , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*")
+  , ("darray", mathEnvWith id (Just "aligned") "darray")
+  , ("darray*", mathEnvWith id (Just "aligned") "darray*")
+  ]
+
+theoremstyle :: PandocMonad m => LP m Blocks
+theoremstyle = do
+  stylename <- untokenize <$> braced
+  let mbstyle = case stylename of
+                  "plain"      -> Just PlainStyle
+                  "definition" -> Just DefinitionStyle
+                  "remark"     -> Just RemarkStyle
+                  _            -> Nothing
+  case mbstyle of
+    Nothing  -> return ()
+    Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty }
+  return mempty
+
+newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks
+newtheorem inline = do
+  number <- option True (False <$ symbol '*' <* sp)
+  name <- untokenize <$> braced
+  sp
+  series <- option Nothing $ Just . untokenize <$> bracketedToks
+  sp
+  showName <- tokWith inline
+  sp
+  syncTo <- option Nothing $ Just . untokenize <$> bracketedToks
+  sty <- sLastTheoremStyle <$> getState
+  let spec = TheoremSpec { theoremName = showName
+                         , theoremStyle = sty
+                         , theoremSeries = series
+                         , theoremSyncTo = syncTo
+                         , theoremNumber = number
+                         , theoremLastNum = DottedNum [0] }
+  tmap <- sTheoremMap <$> getState
+  updateState $ \s -> s{ sTheoremMap =
+                            M.insert name spec tmap }
+  return mempty
+
+theoremEnvironment :: PandocMonad m
+                   => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks
+theoremEnvironment blocks opt name = do
+  tmap <- sTheoremMap <$> getState
+  case M.lookup name tmap of
+    Nothing -> mzero
+    Just tspec -> do
+       optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt
+       mblabel <- option Nothing $ Just . untokenize <$>
+                   try (spaces >> controlSeq "label" >> spaces >> braced)
+       bs <- env name blocks
+       number <-
+         if theoremNumber tspec
+            then do
+               let name' = fromMaybe name $ theoremSeries tspec
+               num <- getNextNumber
+                   (maybe (DottedNum [0]) theoremLastNum .
+                    M.lookup name' . sTheoremMap)
+               updateState $ \s ->
+                 s{ sTheoremMap =
+                       M.adjust
+                       (\spec -> spec{ theoremLastNum = num })
+                       name'
+                       (sTheoremMap s)
+                  }
+
+               case mblabel of
+                 Just ident ->
+                   updateState $ \s ->
+                     s{ sLabels = M.insert ident
+                         (B.toList $
+                           theoremName tspec <> "\160" <>
+                           str (renderDottedNum num)) (sLabels s) }
+                 Nothing -> return ()
+               return $ space <> B.text (renderDottedNum num)
+            else return mempty
+       let titleEmph = case theoremStyle tspec of
+                         PlainStyle      -> B.strong
+                         DefinitionStyle -> B.strong
+                         RemarkStyle     -> B.emph
+       let title = titleEmph (theoremName tspec <> number)
+                      <> optTitle <> "." <> space
+       return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title
+              $ case theoremStyle tspec of
+                  PlainStyle -> walk italicize bs
+                  _          -> bs
+
+
+
+proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks
+proof blocks opt = do
+  title <- option (B.text "Proof") opt
+  bs <- env "proof" blocks
+  return $
+    B.divWith ("", ["proof"], []) $
+      addQed $ addTitle (B.emph (title <> ".")) bs
+
+addTitle :: Inlines -> Blocks -> Blocks
+addTitle ils bs =
+  case B.toList bs of
+    (Para xs : rest)
+      -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest)
+    _ -> B.para ils <> bs
+
+addQed :: Blocks -> Blocks
+addQed bs =
+  case Seq.viewr (B.unMany bs) of
+    s Seq.:> Para ils
+      -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign))
+    _ -> bs <> B.para qedSign
+ where
+  qedSign = B.str "\xa0\x25FB"
+
+italicize :: Block -> Block
+italicize x@(Para [Image{}]) = x -- see #6925
+italicize (Para ils) = Para [Emph ils]
+italicize (Plain ils) = Plain [Emph ils]
+italicize x = x
+
+
-- 
cgit v1.2.3


From 2463fbf61d2ea8636e70c44624dc5bc1668fa4fd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 21:43:11 -0800
Subject: LaTeX writer: use function instead of map for accent lookup.

---
 src/Text/Pandoc/Writers/LaTeX.hs | 52 +++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e8a187599..e31ec9d52 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -262,7 +262,7 @@ stringToLaTeX context zs = do
         isUrl = ctx == URLString
         mbAccentCmd =
           if writerPreferAscii opts && ctx == TextString
-             then uncons xs >>= \(c,_) -> M.lookup c accents
+             then uncons xs >>= \(c,_) -> lookupAccent c
              else Nothing
         emits s =
           case mbAccentCmd of
@@ -350,32 +350,30 @@ stringToLaTeX context zs = do
                   _   -> emitc x
            | otherwise -> emitc x
 
-accents :: M.Map Char String
-accents = M.fromList
-  [ ('\779' , "\\H")
-  , ('\768' , "\\`")
-  , ('\769' , "\\'")
-  , ('\770' , "\\^")
-  , ('\771' , "\\~")
-  , ('\776' , "\\\"")
-  , ('\775' , "\\.")
-  , ('\772' , "\\=")
-  , ('\781' , "\\|")
-  , ('\817' , "\\b")
-  , ('\807' , "\\c")
-  , ('\783' , "\\G")
-  , ('\777' , "\\h")
-  , ('\803' , "\\d")
-  , ('\785' , "\\f")
-  , ('\778' , "\\r")
-  , ('\865' , "\\t")
-  , ('\782' , "\\U")
-  , ('\780' , "\\v")
-  , ('\774' , "\\u")
-  , ('\808' , "\\k")
-  , ('\785' , "\\newtie")
-  , ('\8413', "\\textcircled")
-  ]
+lookupAccent :: Char -> Maybe String
+lookupAccent '\779'  = Just "\\H"
+lookupAccent '\768'  = Just "\\`"
+lookupAccent '\769'  = Just "\\'"
+lookupAccent '\770'  = Just "\\^"
+lookupAccent '\771'  = Just "\\~"
+lookupAccent '\776'  = Just "\\\""
+lookupAccent '\775'  = Just "\\."
+lookupAccent '\772'  = Just "\\="
+lookupAccent '\781'  = Just "\\|"
+lookupAccent '\817'  = Just "\\b"
+lookupAccent '\807'  = Just "\\c"
+lookupAccent '\783'  = Just "\\G"
+lookupAccent '\777'  = Just "\\h"
+lookupAccent '\803'  = Just "\\d"
+lookupAccent '\785'  = Just "\\f"
+lookupAccent '\778'  = Just "\\r"
+lookupAccent '\865'  = Just "\\t"
+lookupAccent '\782'  = Just "\\U"
+lookupAccent '\780'  = Just "\\v"
+lookupAccent '\774'  = Just "\\u"
+lookupAccent '\808'  = Just "\\k"
+lookupAccent '\8413' = Just "\\textcircled"
+lookupAccent _       = Nothing
 
 toLabel :: PandocMonad m => Text -> LW m Text
 toLabel z = go `fmap` stringToLaTeX URLString z
-- 
cgit v1.2.3


From 7e38b8e55a49f027b6aaa690d84225b0374eb057 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 22:51:26 -0800
Subject: T.P.Readers.LaTeX: Don't export tokenize, untokenize.

[API change]

These were only exported for testing, which seems the
wrong thing to do.  They don't belong in the public
API and are not really usable as they are, without access
to the Tok type which is not exported.

Removed the tokenize/untokenize roundtrip test.

We put a quickcheck property in the comments which
may be used when this code is touched (if it is).
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  2 --
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  9 +++++++++
 test/Tests/Readers/LaTeX.hs              | 17 +----------------
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 772263578..e63fbc185 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -22,8 +22,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
                                    inlineCommand,
-                                   tokenize,
-                                   untokenize
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a5a39d3c9..db58b333d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -292,6 +292,15 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
            Left e   -> Prelude.fail (show e)
            Right s' -> return s'
 
+{-
+When tokenize or untokenize change, test with this
+QuickCheck property:
+
+> tokUntokRoundtrip :: String -> Bool
+> tokUntokRoundtrip s =
+>   let t = T.pack s in untokenize (tokenize "random" t) == t
+-}
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs
index 8385b751e..9388fd040 100644
--- a/test/Tests/Readers/LaTeX.hs
+++ b/test/Tests/Readers/LaTeX.hs
@@ -15,10 +15,8 @@ module Tests.Readers.LaTeX (tests) where
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Pandoc.Readers.LaTeX (tokenize, untokenize)
 import Test.Tasty
 import Test.Tasty.HUnit
-import Test.Tasty.QuickCheck
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
@@ -47,21 +45,8 @@ simpleTable' aligns rows
   where
     toRow = Row nullAttr . map simpleCell
 
-tokUntokRt :: String -> Bool
-tokUntokRt s = untokenize (tokenize "random" t) == t
-  where t = T.pack s
-
 tests :: [TestTree]
-tests = [ testGroup "tokenization"
-          [ testCase "tokenizer round trip on test case" $ do
-                 orig <- UTF8.readFile "../test/latex-reader.latex"
-                 let new = untokenize $ tokenize "../test/latex-reader.latex"
-                             orig
-                 assertEqual "untokenize . tokenize is identity" orig new
-          , testProperty "untokenize . tokenize is identity" tokUntokRt
-          ]
-
-        , testGroup "basic"
+tests = [ testGroup "basic"
           [ "simple" =:
             "word" =?> para "word"
           , "space" =:
-- 
cgit v1.2.3


From 6a6291d9e3c1ce9c3453318d87239b6f9260d924 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 23:05:35 -0800
Subject: Change T.P.Readers.LaTeX.SIunitx to export a command map...

instead of individual commands.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 11 ++---------
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 21 ++++++++++++++-------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index e63fbc185..304584072 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
-import Text.Pandoc.Readers.LaTeX.SIunitx
+import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
@@ -426,6 +426,7 @@ inlineCommands =
   M.union inlineLanguageCommands $
   M.union (accentCommands tok) $
   M.union (citationCommands inline) $
+  M.union (siunitxCommands tok) $
   M.fromList
   [ ("emph", extractSpaces emph <$> tok)
   , ("textit", extractSpaces emph <$> tok)
@@ -594,14 +595,6 @@ inlineCommands =
   , ("Acfp", doAcronymPlural "full")
   , ("Acsp", doAcronymPlural "abbrv")
   , ("Aclp", doAcronymPlural "long")
-  -- siuntix
-  , ("si", skipopts *> dosi tok)
-  , ("SI", doSI tok)
-  , ("SIrange", doSIrange True tok)
-  , ("numrange", doSIrange False tok)
-  , ("numlist", doSInumlist)
-  , ("num", doSInum)
-  , ("ang", doSIang)
   -- hyphenat
   , ("bshyp", lit "\\\173")
   , ("fshyp", lit "/\173")
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index db9c276e7..1952f4e1a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,12 +1,6 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
-  ( dosi
-  , doSI
-  , doSIrange
-  , doSInum
-  , doSInumlist
-  , doSIang
-  )
+  ( siunitxCommands )
 where
 import Text.Pandoc.Builder
 import Text.Pandoc.Readers.LaTeX.Parsing
@@ -21,6 +15,19 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
 
+
+siunitxCommands :: PandocMonad m
+                 => LP m Inlines -> M.Map Text (LP m Inlines)
+siunitxCommands tok = M.fromList
+  [ ("si", skipopts *> dosi tok)
+  , ("SI", doSI tok)
+  , ("SIrange", doSIrange True tok)
+  , ("numrange", doSIrange False tok)
+  , ("numlist", doSInumlist)
+  , ("num", doSInum)
+  , ("ang", doSIang)
+  ]
+
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
 dosi tok = grouped (siUnit tok) <|> siUnit tok
 
-- 
cgit v1.2.3


From 3793ed8beb5da0a8afbe8d3b121c2a1cd5bece44 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 28 Feb 2021 23:43:55 -0800
Subject: Removed unnecessary pragmas.

---
 src/Text/Pandoc/Readers/LaTeX.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 304584072..9ad94e417 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,6 +1,4 @@
 {-# LANGUAGE CPP                   #-}
-{-# LANGUAGE FlexibleInstances     #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
-- 
cgit v1.2.3


From e1454fe0d0e2f1cb4e9c5753f095a1f0a8580ffe Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 1 Mar 2021 14:14:58 +0100
Subject: Jira writer: use Span identifiers as anchors

Closes: tarleb/jira-wiki-markup#3.
---
 src/Text/Pandoc/Writers/Jira.hs | 4 +++-
 test/Tests/Writers/Jira.hs      | 9 ++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index c21085a4f..131896201 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -292,7 +292,9 @@ quotedToJira qtype xs = do
 spanToJira :: PandocMonad m
            => Attr -> [Inline]
            -> JiraConverter m [Jira.Inline]
-spanToJira (_, _classes, _) = toJiraInlines
+spanToJira (ident, _classes, _attribs) inls = case ident of
+  "" -> toJiraInlines inls
+  _  -> (Jira.Anchor ident :) <$> toJiraInlines inls
 
 registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline]
 registerNotes contents = do
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index 93d830c94..aff8348d4 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -3,6 +3,7 @@ module Tests.Writers.Jira (tests) where
 
 import Data.Text (unpack)
 import Test.Tasty
+import Test.Tasty.HUnit (HasCallStack)
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
@@ -12,7 +13,7 @@ jira :: (ToPandoc a) => a -> String
 jira = unpack . purely (writeJira def) . toPandoc
 
 infix 4 =:
-(=:) :: (ToString a, ToPandoc a)
+(=:) :: (ToString a, ToPandoc a, HasCallStack)
      => String -> (a, String) -> TestTree
 (=:) = test jira
 
@@ -61,5 +62,11 @@ tests =
         linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?>
         "[~johndoe]"
       ]
+
+    , testGroup "spans"
+      [ "id is used as anchor" =:
+        spanWith ("unicorn", [], []) (str "Unicorn") =?>
+        "{anchor:unicorn}Unicorn"
+      ]
     ]
   ]
-- 
cgit v1.2.3


From 382f0e23d22b15aaa9fe2aeb6117ef0a102e379d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Mar 2021 08:55:42 -0800
Subject: Factor out T.P.Readers.LaTeX.Macro.

---
 pandoc.cabal                           |   1 +
 src/Text/Pandoc/Readers/LaTeX.hs       | 141 +-----------------------------
 src/Text/Pandoc/Readers/LaTeX/Macro.hs | 153 +++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 139 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Macro.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 56a9491f2..c3317f24b 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -634,6 +634,7 @@ library
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Math,
                    Text.Pandoc.Readers.LaTeX.Table,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad94e417..fa77595b9 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -47,8 +47,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
-import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
-                                        ArgSpec (..), Tok (..), TokType (..))
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
@@ -58,6 +57,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        newtheorem, theoremstyle, proof,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
+import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
@@ -1027,143 +1027,6 @@ authors = try $ do
   egroup
   addMeta "author" (map trimInlines auths)
 
-macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
-macroDef constructor = do
-    (_, s) <- withRaw (commandDef <|> environmentDef)
-    (constructor (untokenize s) <$
-      guardDisabled Ext_latex_macros)
-     <|> return mempty
-  where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
-          guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
-        environmentDef = do
-          mbenv <- newenvironment
-          case mbenv of
-            Nothing -> return ()
-            Just (name, macro1, macro2) ->
-              guardDisabled Ext_latex_macros <|>
-                do updateState $ \s -> s{ sMacros =
-                    M.insert name macro1 (sMacros s) }
-                   updateState $ \s -> s{ sMacros =
-                    M.insert ("end" <> name) macro2 (sMacros s) }
-        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
-        -- is equivalent to
-        -- @\newcommand{\envname}[n-args][default]{begin}@
-        -- @\newcommand{\endenvname}@
-
-letmacro :: PandocMonad m => LP m (Text, Macro)
-letmacro = do
-  controlSeq "let"
-  (name, contents) <- withVerbatimMode $ do
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    optional $ symbol '='
-    spaces
-    -- we first parse in verbatim mode, and then expand macros,
-    -- because we don't want \let\foo\bar to turn into
-    -- \let\foo hello if we have previously \def\bar{hello}
-    contents <- bracedOrToken
-    return (name, contents)
-  contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
-
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
-  -- we use withVerbatimMode, because macros are to be expanded
-  -- at point of use, not point of definition
-  withVerbatimMode $ do
-    controlSeq "def"
-    Tok _ (CtrlSeq name) _ <- anyControlSeq
-    argspecs <- many (argspecArg <|> argspecPattern)
-    contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
-
-argspecArg :: PandocMonad m => LP m ArgSpec
-argspecArg = do
-  Tok _ (Arg i) _ <- satisfyTok isArgTok
-  return $ ArgNum i
-
-argspecPattern :: PandocMonad m => LP m ArgSpec
-argspecPattern =
-  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
-                              (toktype' == Symbol || toktype' == Word) &&
-                              (txt /= "{" && txt /= "\\" && txt /= "}")))
-
-newcommand :: PandocMonad m => LP m (Text, Macro)
-newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
-                             controlSeq "renewcommand" <|>
-                             controlSeq "providecommand" <|>
-                             controlSeq "DeclareMathOperator" <|>
-                             controlSeq "DeclareRobustCommand"
-  withVerbatimMode $ do
-    Tok _ (CtrlSeq name) txt <- do
-      optional (symbol '*')
-      anyControlSeq <|>
-        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    let argspecs = map ArgNum [1..numargs]
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    spaces
-    contents' <- bracedOrToken
-    let contents =
-         case mtype of
-              "DeclareMathOperator" ->
-                 Tok pos (CtrlSeq "mathop") "\\mathop"
-                 : Tok pos Symbol "{"
-                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
-                 : Tok pos Symbol "{"
-                 : (contents' ++
-                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
-              _                     -> contents'
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-        Just macro
-          | mtype == "newcommand" -> do
-              report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
-
-newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
-newenvironment = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
-                             controlSeq "renewenvironment" <|>
-                             controlSeq "provideenvironment"
-  withVerbatimMode $ do
-    optional $ symbol '*'
-    spaces
-    name <- untokenize <$> braced
-    spaces
-    numargs <- option 0 $ try bracketedNum
-    spaces
-    optarg <- option Nothing $ Just <$> try bracketedToks
-    let argspecs = map (\i -> ArgNum i) [1..numargs]
-    startcontents <- spaces >> bracedOrToken
-    endcontents <- spaces >> bracedOrToken
-    macros <- sMacros <$> getState
-    case M.lookup name macros of
-         Just _
-           | mtype == "newenvironment" -> do
-               report $ MacroAlreadyDefined name pos
-               return Nothing
-           | mtype == "provideenvironment" ->
-               return Nothing
-         _ -> return $ Just (name,
-                      Macro ExpandWhenUsed argspecs optarg startcontents,
-                      Macro ExpandWhenUsed [] Nothing endcontents)
-
-bracketedNum :: PandocMonad m => LP m Int
-bracketedNum = do
-  ds <- untokenize <$> bracketedToks
-  case safeRead ds of
-       Just i -> return i
-       _      -> return 0
-
 looseItem :: PandocMonad m => LP m Blocks
 looseItem = do
   inListItem <- sInListItem <$> getState
diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
new file mode 100644
index 000000000..607f5438c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -0,0 +1,153 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Macro
+  ( macroDef
+  )
+where
+import Text.Pandoc.Extensions (Extension(..))
+import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined))
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Class
+import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
+                            optional, space, spaces, withRaw, (<|>))
+import Control.Applicative ((<|>), optional)
+import qualified Data.Map as M
+import Data.Text (Text)
+
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
+macroDef constructor = do
+    (_, s) <- withRaw (commandDef <|> environmentDef)
+    (constructor (untokenize s) <$
+      guardDisabled Ext_latex_macros)
+     <|> return mempty
+  where commandDef = do
+          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          guardDisabled Ext_latex_macros <|>
+           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+        environmentDef = do
+          mbenv <- newenvironment
+          case mbenv of
+            Nothing -> return ()
+            Just (name, macro1, macro2) ->
+              guardDisabled Ext_latex_macros <|>
+                do updateState $ \s -> s{ sMacros =
+                    M.insert name macro1 (sMacros s) }
+                   updateState $ \s -> s{ sMacros =
+                    M.insert ("end" <> name) macro2 (sMacros s) }
+        -- @\newenvironment{envname}[n-args][default]{begin}{end}@
+        -- is equivalent to
+        -- @\newcommand{\envname}[n-args][default]{begin}@
+        -- @\newcommand{\endenvname}@
+
+letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro = do
+  controlSeq "let"
+  (name, contents) <- withVerbatimMode $ do
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    optional $ symbol '='
+    spaces
+    -- we first parse in verbatim mode, and then expand macros,
+    -- because we don't want \let\foo\bar to turn into
+    -- \let\foo hello if we have previously \def\bar{hello}
+    contents <- bracedOrToken
+    return (name, contents)
+  contents' <- doMacros' 0 contents
+  return (name, Macro ExpandWhenDefined [] Nothing contents')
+
+defmacro :: PandocMonad m => LP m (Text, Macro)
+defmacro = try $
+  -- we use withVerbatimMode, because macros are to be expanded
+  -- at point of use, not point of definition
+  withVerbatimMode $ do
+    controlSeq "def"
+    Tok _ (CtrlSeq name) _ <- anyControlSeq
+    argspecs <- many (argspecArg <|> argspecPattern)
+    contents <- bracedOrToken
+    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+
+argspecArg :: PandocMonad m => LP m ArgSpec
+argspecArg = do
+  Tok _ (Arg i) _ <- satisfyTok isArgTok
+  return $ ArgNum i
+
+argspecPattern :: PandocMonad m => LP m ArgSpec
+argspecPattern =
+  Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) ->
+                              (toktype' == Symbol || toktype' == Word) &&
+                              (txt /= "{" && txt /= "\\" && txt /= "}")))
+
+newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+                             controlSeq "renewcommand" <|>
+                             controlSeq "providecommand" <|>
+                             controlSeq "DeclareMathOperator" <|>
+                             controlSeq "DeclareRobustCommand"
+  withVerbatimMode $ do
+    Tok _ (CtrlSeq name) txt <- do
+      optional (symbol '*')
+      anyControlSeq <|>
+        (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}')
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    let argspecs = map ArgNum [1..numargs]
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    spaces
+    contents' <- bracedOrToken
+    let contents =
+         case mtype of
+              "DeclareMathOperator" ->
+                 Tok pos (CtrlSeq "mathop") "\\mathop"
+                 : Tok pos Symbol "{"
+                 : Tok pos (CtrlSeq "mathrm") "\\mathrm"
+                 : Tok pos Symbol "{"
+                 : (contents' ++
+                   [ Tok pos Symbol "}", Tok pos Symbol "}" ])
+              _                     -> contents'
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+        Just macro
+          | mtype == "newcommand" -> do
+              report $ MacroAlreadyDefined txt pos
+              return (name, macro)
+          | mtype == "providecommand" -> return (name, macro)
+        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+
+newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
+newenvironment = do
+  pos <- getPosition
+  Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|>
+                             controlSeq "renewenvironment" <|>
+                             controlSeq "provideenvironment"
+  withVerbatimMode $ do
+    optional $ symbol '*'
+    spaces
+    name <- untokenize <$> braced
+    spaces
+    numargs <- option 0 $ try bracketedNum
+    spaces
+    optarg <- option Nothing $ Just <$> try bracketedToks
+    let argspecs = map (\i -> ArgNum i) [1..numargs]
+    startcontents <- spaces >> bracedOrToken
+    endcontents <- spaces >> bracedOrToken
+    macros <- sMacros <$> getState
+    case M.lookup name macros of
+         Just _
+           | mtype == "newenvironment" -> do
+               report $ MacroAlreadyDefined name pos
+               return Nothing
+           | mtype == "provideenvironment" ->
+               return Nothing
+         _ -> return $ Just (name,
+                      Macro ExpandWhenUsed argspecs optarg startcontents,
+                      Macro ExpandWhenUsed [] Nothing endcontents)
+
+bracketedNum :: PandocMonad m => LP m Int
+bracketedNum = do
+  ds <- untokenize <$> bracketedToks
+  case safeRead ds of
+       Just i -> return i
+       _      -> return 0
-- 
cgit v1.2.3


From 7f1b933aaacf4a01fb23fa4989f190098e96e702 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 1 Mar 2021 09:02:10 -0800
Subject: Make T.P.Readers.LaTeX.Types an unexported module.

[API change]

This is really an implementation detail that shouldn't be
exposed in the public API.
---
 pandoc.cabal                     | 2 +-
 src/Text/Pandoc/Readers/LaTeX.hs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index c3317f24b..ad325ee24 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -518,7 +518,6 @@ library
                    Text.Pandoc.Readers,
                    Text.Pandoc.Readers.HTML,
                    Text.Pandoc.Readers.LaTeX,
-                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.Markdown,
                    Text.Pandoc.Readers.CommonMark,
                    Text.Pandoc.Readers.Creole,
@@ -627,6 +626,7 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
+                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.LaTeX.Parsing,
                    Text.Pandoc.Readers.LaTeX.Lang,
                    Text.Pandoc.Readers.LaTeX.SIunitx,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fa77595b9..fc85f0545 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -19,7 +19,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
                                    applyMacros,
                                    rawLaTeXInline,
                                    rawLaTeXBlock,
-                                   inlineCommand,
+                                   inlineCommand
                                  ) where
 
 import Control.Applicative (many, optional, (<|>))
-- 
cgit v1.2.3


From 2097411e4f4da0f0cd2fb4fdbb4759b6da600289 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 2 Mar 2021 21:06:56 -0800
Subject: Split up T.P.Writers.Markdown...

with T.P.Writers.Markdown.Types and T.P.Writers.Markdown.Inline.
The module was difficult to compile on low-memory system.s
---
 pandoc.cabal                               |   2 +
 src/Text/Pandoc/Writers/Markdown.hs        | 597 +---------------------------
 src/Text/Pandoc/Writers/Markdown/Inline.hs | 601 +++++++++++++++++++++++++++++
 src/Text/Pandoc/Writers/Markdown/Types.hs  |  81 ++++
 4 files changed, 690 insertions(+), 591 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/Markdown/Inline.hs
 create mode 100644 src/Text/Pandoc/Writers/Markdown/Types.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index ad325ee24..3aa29b477 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -665,6 +665,8 @@ library
                    Text.Pandoc.Writers.LaTeX.Notes,
                    Text.Pandoc.Writers.LaTeX.Table,
                    Text.Pandoc.Writers.LaTeX.Types,
+                   Text.Pandoc.Writers.Markdown.Types,
+                   Text.Pandoc.Writers.Markdown.Inline,
                    Text.Pandoc.Writers.Roff,
                    Text.Pandoc.Writers.Powerpoint.Presentation,
                    Text.Pandoc.Writers.Powerpoint.Output,
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index d33246a63..533bcc071 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -22,15 +22,13 @@ module Text.Pandoc.Writers.Markdown (
   writePlain) where
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum, isDigit)
 import Data.Default
-import Data.List (find, intersperse, sortOn, transpose)
+import Data.List (intersperse, sortOn, transpose)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, mapMaybe)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Network.HTTP (urlEncode)
 import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Definition
@@ -44,59 +42,11 @@ import Text.Pandoc.Templates (renderTemplate)
 import Text.DocTemplates (Val(..), Context(..), FromContext(..))
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Writers.Math (texMathToInlines)
-import Text.Pandoc.XML (toHtml5Entities)
-import Data.Coerce (coerce)
-
-type Notes = [[Block]]
-type Ref   = (Text, Target, Attr)
-type Refs  = [Ref]
-
-type MD m = ReaderT WriterEnv (StateT WriterState m)
-
-evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
-evalMD md env st = evalStateT (runReaderT md env) st
-
-data WriterEnv = WriterEnv { envInList          :: Bool
-                           , envVariant         :: MarkdownVariant
-                           , envRefShortcutable :: Bool
-                           , envBlockLevel      :: Int
-                           , envEscapeSpaces    :: Bool
-                           }
-
-data MarkdownVariant =
-      PlainText
-    | Commonmark
-    | Markdown
-    deriving (Show, Eq)
-
-instance Default WriterEnv
-  where def = WriterEnv { envInList          = False
-                        , envVariant         = Markdown
-                        , envRefShortcutable = True
-                        , envBlockLevel      = 0
-                        , envEscapeSpaces    = False
-                        }
-
-data WriterState = WriterState { stNotes   :: Notes
-                               , stPrevRefs :: Refs
-                               , stRefs    :: Refs
-                               , stKeys    :: M.Map Key
-                                                (M.Map (Target, Attr) Int)
-                               , stLastIdx  :: Int
-                               , stIds     :: Set.Set Text
-                               , stNoteNum :: Int
-                               }
-
-instance Default WriterState
-  where def = WriterState{ stNotes = []
-                         , stPrevRefs = []
-                         , stRefs = []
-                         , stKeys = M.empty
-                         , stLastIdx = 0
-                         , stIds = Set.empty
-                         , stNoteNum = 1
-                         }
+import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown)
+import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
+                                           WriterState(..),
+                                           WriterEnv(..),
+                                           Ref, Refs, MD, evalMD)
 
 -- | Convert Pandoc to Markdown.
 writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text
@@ -297,49 +247,6 @@ noteToMarkdown opts num blocks = do
               then hang (writerTabStop opts) (marker <> spacer) contents
               else marker <> spacer <> contents
 
--- | Escape special characters for Markdown.
-escapeText :: WriterOptions -> Text -> Text
-escapeText opts = T.pack . go . T.unpack
-  where
-  go [] = []
-  go (c:cs) =
-    case c of
-       '<' | isEnabled Ext_all_symbols_escapable opts ->
-              '\\' : '<' : go cs
-           | otherwise -> "&lt;" ++ go cs
-       '>' | isEnabled Ext_all_symbols_escapable opts ->
-              '\\' : '>' : go cs
-           | otherwise -> "&gt;" ++ go cs
-       '@' | isEnabled Ext_citations opts ->
-               case cs of
-                    (d:_)
-                      | isAlphaNum d || d == '_'
-                         -> '\\':'@':go cs
-                    _ -> '@':go cs
-       _ | c `elem` ['\\','`','*','_','[',']','#'] ->
-              '\\':c:go cs
-       '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
-       '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
-       '~' | isEnabled Ext_subscript opts ||
-             isEnabled Ext_strikeout opts -> '\\':'~':go cs
-       '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs
-       '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs
-       '"' | isEnabled Ext_smart opts -> '\\':'"':go cs
-       '-' | isEnabled Ext_smart opts ->
-              case cs of
-                   '-':_ -> '\\':'-':go cs
-                   _     -> '-':go cs
-       '.' | isEnabled Ext_smart opts ->
-              case cs of
-                   '.':'.':rest -> '\\':'.':'.':'.':go rest
-                   _            -> '.':go cs
-       _   -> case cs of
-                '_':x:xs
-                  | isEnabled Ext_intraword_underscores opts
-                  , isAlphaNum c
-                  , isAlphaNum x -> c : '_' : x : go xs
-                _                -> c : go cs
-
 attrsToMarkdown :: Attr -> Doc Text
 attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
         where attribId = case attribs of
@@ -912,499 +819,7 @@ blockListToMarkdown opts blocks = do
         | otherwise                   = RawBlock "markdown" "&nbsp;\n"
   mconcat <$> mapM (blockToMarkdown opts) (fixBlocks blocks)
 
-getKey :: Doc Text -> Key
-getKey = toKey . render Nothing
-
-findUsableIndex :: [Text] -> Int -> Int
-findUsableIndex lbls i = if tshow i `elem` lbls
-                         then findUsableIndex lbls (i + 1)
-                         else i
-
-getNextIndex :: PandocMonad m => MD m Int
-getNextIndex = do
-  prevRefs <- gets stPrevRefs
-  refs <- gets stRefs
-  i <- (+ 1) <$> gets stLastIdx
-  modify $ \s -> s{ stLastIdx = i }
-  let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs
-  return $ findUsableIndex refLbls i
-
--- | Get reference for target; if none exists, create unique one and return.
---   Prefer label if possible; otherwise, generate a unique key.
-getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text
-getReference attr label target = do
-  refs <- gets stRefs
-  case find (\(_,t,a) -> t == target && a == attr) refs of
-    Just (ref, _, _) -> return ref
-    Nothing       -> do
-      keys <- gets stKeys
-      let key = getKey label
-      let rawkey = coerce key
-      case M.lookup key keys of
-           Nothing -> do -- no other refs with this label
-             (lab', idx) <- if T.null rawkey ||
-                                 T.length rawkey > 999 ||
-                                 T.any (\c -> c == '[' || c == ']') rawkey
-                               then do
-                                 i <- getNextIndex
-                                 return (tshow i, i)
-                               else
-                                 return (render Nothing label, 0)
-             modify (\s -> s{
-               stRefs = (lab', target, attr) : refs,
-               stKeys = M.insert (getKey label)
-                           (M.insert (target, attr) idx mempty)
-                                 (stKeys s) })
-             return lab'
-
-           Just km ->    -- we have refs with this label
-             case M.lookup (target, attr) km of
-                  Just i -> do
-                    let lab' = render Nothing $
-                               label <> if i == 0
-                                           then mempty
-                                           else literal (tshow i)
-                    -- make sure it's in stRefs; it may be
-                    -- a duplicate that was printed in a previous
-                    -- block:
-                    when ((lab', target, attr) `notElem` refs) $
-                       modify (\s -> s{
-                         stRefs = (lab', target, attr) : refs })
-                    return lab'
-                  Nothing -> do -- but this one is to a new target
-                    i <- getNextIndex
-                    let lab' = tshow i
-                    modify (\s -> s{
-                       stRefs = (lab', target, attr) : refs,
-                       stKeys = M.insert key
-                                   (M.insert (target, attr) i km)
-                                         (stKeys s) })
-                    return lab'
-
--- | Convert list of Pandoc inline elements to markdown.
-inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text)
-inlineListToMarkdown opts lst = do
-  inlist <- asks envInList
-  go (if inlist then avoidBadWrapsInList lst else lst)
-  where go [] = return empty
-        go (x@Math{}:y@(Str t):zs)
-          | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058
-          = liftM2 (<>) (inlineToMarkdown opts x)
-              (go (RawInline (Format "html") "<!-- -->" : y : zs))
-        go (i:is) = case i of
-            Link {} -> case is of
-                -- If a link is followed by another link, or '[', '(' or ':'
-                -- then we don't shortcut
-                Link {}:_                                       -> unshortcutable
-                Space:Link {}:_                                 -> unshortcutable
-                Space:(Str(thead -> Just '[')):_                -> unshortcutable
-                Space:(RawInline _ (thead -> Just '[')):_       -> unshortcutable
-                Space:(Cite _ _):_                              -> unshortcutable
-                SoftBreak:Link {}:_                             -> unshortcutable
-                SoftBreak:(Str(thead -> Just '[')):_            -> unshortcutable
-                SoftBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
-                SoftBreak:(Cite _ _):_                          -> unshortcutable
-                LineBreak:Link {}:_                             -> unshortcutable
-                LineBreak:(Str(thead -> Just '[')):_            -> unshortcutable
-                LineBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
-                LineBreak:(Cite _ _):_                          -> unshortcutable
-                (Cite _ _):_                                    -> unshortcutable
-                Str (thead -> Just '['):_                       -> unshortcutable
-                Str (thead -> Just '('):_                       -> unshortcutable
-                Str (thead -> Just ':'):_                       -> unshortcutable
-                (RawInline _ (thead -> Just '[')):_             -> unshortcutable
-                (RawInline _ (thead -> Just '(')):_             -> unshortcutable
-                (RawInline _ (thead -> Just ':')):_             -> unshortcutable
-                (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable
-                _                                               -> shortcutable
-            _ -> shortcutable
-          where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
-                unshortcutable = do
-                    iMark <- local
-                             (\env -> env { envRefShortcutable = False })
-                             (inlineToMarkdown opts i)
-                    fmap (iMark <>) (go is)
-                thead = fmap fst . T.uncons
-
-isSp :: Inline -> Bool
-isSp Space     = True
-isSp SoftBreak = True
-isSp _         = False
-
-avoidBadWrapsInList :: [Inline] -> [Inline]
-avoidBadWrapsInList [] = []
-avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s =
-  Str (" >" <> cs) : avoidBadWrapsInList xs
-avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))]
-  | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]]
-avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs)
-  | T.null cs && isSp s && c `elem` ['-','*','+'] =
-    Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList (s:Str cs:Space:xs)
-  | isSp s && isOrderedListMarker cs =
-    Str (" " <> cs) : Space : avoidBadWrapsInList xs
-avoidBadWrapsInList [s, Str cs]
-  | isSp s && isOrderedListMarker cs = [Str $ " " <> cs]
-avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
-
-isOrderedListMarker :: Text -> Bool
-isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) &&
-              isRight (runParser (anyOrderedListMarker >> eof)
-                       defaultParserState "" xs)
-
-isRight :: Either a b -> Bool
-isRight (Right _) = True
-isRight (Left  _) = False
-
--- | Convert Pandoc inline element to markdown.
-inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text)
-inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) =
-  case lookup "data-emoji" kvs of
-       Just emojiname | isEnabled Ext_emoji opts ->
-            return $ ":" <> literal emojiname <> ":"
-       _ -> inlineToMarkdown opts (Str s)
-inlineToMarkdown opts (Span attrs ils) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts ils
-  return $ case attrs of
-             (_,["csl-block"],_) -> (cr <>)
-             (_,["csl-left-margin"],_) -> (cr <>)
-             (_,["csl-indent"],_) -> (cr <>)
-             _ -> id
-         $ case variant of
-                PlainText -> contents
-                _     | attrs == nullAttr -> contents
-                      | isEnabled Ext_bracketed_spans opts ->
-                        let attrs' = if attrs /= nullAttr
-                                        then attrsToMarkdown attrs
-                                        else empty
-                        in "[" <> contents <> "]" <> attrs'
-                      | isEnabled Ext_raw_html opts ||
-                        isEnabled Ext_native_spans opts ->
-                        tagWithAttrs "span" attrs <> contents <> literal "</span>"
-                      | otherwise -> contents
-inlineToMarkdown _ (Emph []) = return empty
-inlineToMarkdown opts (Emph lst) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts lst
-  return $ case variant of
-             PlainText
-               | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_"
-               | otherwise ->  contents
-             _ -> "*" <> contents <> "*"
-inlineToMarkdown _ (Underline []) = return empty
-inlineToMarkdown opts (Underline lst) = do
-  variant <- asks envVariant
-  contents <- inlineListToMarkdown opts lst
-  case variant of
-    PlainText -> return contents
-    _     | isEnabled Ext_bracketed_spans opts ->
-            return $ "[" <> contents <> "]" <> "{.ul}"
-          | isEnabled Ext_native_spans opts ->
-            return $ tagWithAttrs "span" ("", ["underline"], [])
-              <> contents
-              <> literal "</span>"
-          | isEnabled Ext_raw_html opts ->
-            return $ "<u>" <> contents <> "</u>"
-          | otherwise -> inlineToMarkdown opts (Emph lst)
-inlineToMarkdown _ (Strong []) = return empty
-inlineToMarkdown opts (Strong lst) = do
-  variant <- asks envVariant
-  case variant of
-    PlainText ->
-             inlineListToMarkdown opts $
-               if isEnabled Ext_gutenberg opts
-                  then capitalize lst
-                  else lst
-    _ -> do
-       contents <- inlineListToMarkdown opts lst
-       return $ "**" <> contents <> "**"
-inlineToMarkdown _ (Strikeout []) = return empty
-inlineToMarkdown opts (Strikeout lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_strikeout opts
-              then "~~" <> contents <> "~~"
-              else if isEnabled Ext_raw_html opts
-                       then "<s>" <> contents <> "</s>"
-                       else contents
-inlineToMarkdown _ (Superscript []) = return empty
-inlineToMarkdown opts (Superscript lst) =
-  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
-    contents <- inlineListToMarkdown opts lst
-    if isEnabled Ext_superscript opts
-       then return $ "^" <> contents <> "^"
-       else if isEnabled Ext_raw_html opts
-                then return $ "<sup>" <> contents <> "</sup>"
-                else
-                  case traverse toSuperscriptInline lst of
-                    Just xs' | not (writerPreferAscii opts)
-                      -> inlineListToMarkdown opts xs'
-                    _ -> do
-                      let rendered = render Nothing contents
-                      return $
-                        case mapM toSuperscript (T.unpack rendered) of
-                           Just r  -> literal $ T.pack r
-                           Nothing -> literal $ "^(" <> rendered <> ")"
-inlineToMarkdown _ (Subscript []) = return empty
-inlineToMarkdown opts (Subscript lst) =
-  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
-    contents <- inlineListToMarkdown opts lst
-    if isEnabled Ext_subscript opts
-       then return $ "~" <> contents <> "~"
-       else if isEnabled Ext_raw_html opts
-                then return $ "<sub>" <> contents <> "</sub>"
-                else
-                  case traverse toSubscriptInline lst of
-                    Just xs' | not (writerPreferAscii opts)
-                      -> inlineListToMarkdown opts xs'
-                    _ -> do
-                      let rendered = render Nothing contents
-                      return $
-                        case mapM toSuperscript (T.unpack rendered) of
-                           Just r  -> literal $ T.pack r
-                           Nothing -> literal $ "_(" <> rendered <> ")"
-inlineToMarkdown opts (SmallCaps lst) = do
-  variant <- asks envVariant
-  if variant /= PlainText &&
-     (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
-     then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst)
-     else inlineListToMarkdown opts $ capitalize lst
-inlineToMarkdown opts (Quoted SingleQuote lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_smart opts
-              then "'" <> contents <> "'"
-              else
-                if writerPreferAscii opts
-                   then "&lsquo;" <> contents <> "&rsquo;"
-                   else "‘" <> contents <> "’"
-inlineToMarkdown opts (Quoted DoubleQuote lst) = do
-  contents <- inlineListToMarkdown opts lst
-  return $ if isEnabled Ext_smart opts
-              then "\"" <> contents <> "\""
-              else
-                if writerPreferAscii opts
-                   then "&ldquo;" <> contents <> "&rdquo;"
-                   else "“" <> contents <> "”"
-inlineToMarkdown opts (Code attr str) = do
-  let tickGroups = filter (T.any (== '`')) $ T.group str
-  let longest    = if null tickGroups
-                     then 0
-                     else maximum $ map T.length tickGroups
-  let marker     = T.replicate (longest + 1) "`"
-  let spacer     = if longest == 0 then "" else " "
-  let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
-                      then attrsToMarkdown attr
-                      else empty
-  variant <- asks envVariant
-  case variant of
-     PlainText -> return $ literal str
-     _     ->  return $ literal
-                  (marker <> spacer <> str <> spacer <> marker) <> attrs
-inlineToMarkdown opts (Str str) = do
-  variant <- asks envVariant
-  let str' = (if writerPreferAscii opts
-                 then toHtml5Entities
-                 else id) .
-             (if isEnabled Ext_smart opts
-                 then unsmartify opts
-                 else id) .
-             (if variant == PlainText
-                 then id
-                 else escapeText opts) $ str
-  return $ literal str'
-inlineToMarkdown opts (Math InlineMath str) =
-  case writerHTMLMathMethod opts of
-       WebTeX url -> inlineToMarkdown opts
-                       (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str))
-       _ | isEnabled Ext_tex_math_dollars opts ->
-             return $ "$" <> literal str <> "$"
-         | isEnabled Ext_tex_math_single_backslash opts ->
-             return $ "\\(" <> literal str <> "\\)"
-         | isEnabled Ext_tex_math_double_backslash opts ->
-             return $ "\\\\(" <> literal str <> "\\\\)"
-         | otherwise -> do
-             variant <- asks envVariant
-             texMathToInlines InlineMath str >>=
-               inlineListToMarkdown opts .
-                 (if variant == PlainText then makeMathPlainer else id)
-inlineToMarkdown opts (Math DisplayMath str) =
-  case writerHTMLMathMethod opts of
-      WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
-             inlineToMarkdown opts (Image nullAttr [Str str]
-                    (url <> T.pack (urlEncode $ T.unpack str), str))
-      _ | isEnabled Ext_tex_math_dollars opts ->
-            return $ "$$" <> literal str <> "$$"
-        | isEnabled Ext_tex_math_single_backslash opts ->
-            return $ "\\[" <> literal str <> "\\]"
-        | isEnabled Ext_tex_math_double_backslash opts ->
-            return $ "\\\\[" <> literal str <> "\\\\]"
-        | otherwise -> (\x -> cr <> x <> cr) `fmap`
-            (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
-inlineToMarkdown opts il@(RawInline f str) = do
-  let tickGroups = filter (T.any (== '`')) $ T.group str
-  let numticks   = if null tickGroups
-                     then 1
-                     else 1 + maximum (map T.length tickGroups)
-  variant <- asks envVariant
-  let Format fmt = f
-  let rawAttribInline = return $
-         literal (T.replicate numticks "`") <> literal str <>
-         literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}"
-  let renderEmpty = mempty <$ report (InlineNotRendered il)
-  case variant of
-    PlainText -> renderEmpty
-    Commonmark
-      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
-         -> return $ literal str
-    Markdown
-      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
-                  "markdown_mmd", "markdown_strict"]
-         -> return $ literal str
-    _ | isEnabled Ext_raw_attribute opts -> rawAttribInline
-      | f `elem` ["html", "html5", "html4"]
-      , isEnabled Ext_raw_html opts
-         -> return $ literal str
-      | f `elem` ["latex", "tex"]
-      , isEnabled Ext_raw_tex opts
-         -> return $ literal str
-    _ -> renderEmpty
-
-
-inlineToMarkdown opts LineBreak = do
-  variant <- asks envVariant
-  if variant == PlainText || isEnabled Ext_hard_line_breaks opts
-     then return cr
-     else return $
-          if isEnabled Ext_escaped_line_breaks opts
-             then "\\" <> cr
-             else "  " <> cr
-inlineToMarkdown _ Space = do
-  escapeSpaces <- asks envEscapeSpaces
-  return $ if escapeSpaces then "\\ " else space
-inlineToMarkdown opts SoftBreak = do
-  escapeSpaces <- asks envEscapeSpaces
-  let space' = if escapeSpaces then "\\ " else space
-  return $ case writerWrapText opts of
-                WrapNone     -> space'
-                WrapAuto     -> space'
-                WrapPreserve -> cr
-inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
-inlineToMarkdown opts (Cite (c:cs) lst)
-  | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
-  | otherwise =
-      if citationMode c == AuthorInText
-         then do
-           suffs <- inlineListToMarkdown opts $ citationSuffix c
-           rest <- mapM convertOne cs
-           let inbr = suffs <+> joincits rest
-               br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
-           return $ literal ("@" <> citationId c) <+> br
-         else do
-           cits <- mapM convertOne (c:cs)
-           return $ literal "[" <> joincits cits <> literal "]"
-  where
-        joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
-        convertOne Citation { citationId      = k
-                            , citationPrefix  = pinlines
-                            , citationSuffix  = sinlines
-                            , citationMode    = m }
-                               = do
-           pdoc <- inlineListToMarkdown opts pinlines
-           sdoc <- inlineListToMarkdown opts sinlines
-           let k' = literal (modekey m <> "@" <> k)
-               r = case sinlines of
-                        Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
-                        _                                         -> k' <+> sdoc
-           return $ pdoc <+> r
-        modekey SuppressAuthor = "-"
-        modekey _              = ""
-inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
-  variant <- asks envVariant
-  linktext <- inlineListToMarkdown opts txt
-  let linktitle = if T.null tit
-                     then empty
-                     else literal $ " \"" <> tit <> "\""
-  let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src)
-  let useAuto = isURI src &&
-                case txt of
-                      [Str s] | escapeURI s == srcSuffix -> True
-                      _       -> False
-  let useRefLinks = writerReferenceLinks opts && not useAuto
-  shortcutable <- asks envRefShortcutable
-  let useShortcutRefLinks = shortcutable &&
-                            isEnabled Ext_shortcut_reference_links opts
-  reftext <- if useRefLinks
-                then literal <$> getReference attr linktext (src, tit)
-                else return mempty
-  case variant of
-    PlainText
-      | useAuto -> return $ literal srcSuffix
-      | otherwise -> return linktext
-    _ | useAuto -> return $ "<" <> literal srcSuffix <> ">"
-      | useRefLinks ->
-           let first  = "[" <> linktext <> "]"
-               second = if getKey linktext == getKey reftext
-                           then if useShortcutRefLinks
-                                   then ""
-                                   else "[]"
-                           else "[" <> reftext <> "]"
-           in  return $ first <> second
-      | isEnabled Ext_raw_html opts
-      , not (isEnabled Ext_link_attributes opts)
-      , attr /= nullAttr -> -- use raw HTML to render attributes
-          literal . T.strip <$>
-            writeHtml5String opts{ writerTemplate = Nothing }
-            (Pandoc nullMeta [Plain [lnk]])
-      | otherwise -> return $
-         "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <>
-         linkAttributes opts attr
-inlineToMarkdown opts img@(Image attr alternate (source, tit))
-  | isEnabled Ext_raw_html opts &&
-    not (isEnabled Ext_link_attributes opts) &&
-    attr /= nullAttr = -- use raw HTML
-    literal . T.strip <$>
-      writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
-  | otherwise = do
-  variant <- asks envVariant
-  let txt = if null alternate || alternate == [Str source]
-                                 -- to prevent autolinks
-               then [Str ""]
-               else alternate
-  linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
-  return $ case variant of
-             PlainText -> "[" <> linkPart <> "]"
-             _     -> "!" <> linkPart
-inlineToMarkdown opts (Note contents) = do
-  modify (\st -> st{ stNotes = contents : stNotes st })
-  st <- get
-  let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1)
-  if isEnabled Ext_footnotes opts
-     then return $ "[^" <> ref <> "]"
-     else return $ "[" <> ref <> "]"
-
-makeMathPlainer :: [Inline] -> [Inline]
-makeMathPlainer = walk go
-  where
-  go (Emph xs) = Span nullAttr xs
-  go x         = x
-
 lineBreakToSpace :: Inline -> Inline
 lineBreakToSpace LineBreak = Space
 lineBreakToSpace SoftBreak = Space
 lineBreakToSpace x         = x
-
-toSubscriptInline :: Inline -> Maybe Inline
-toSubscriptInline Space = Just Space
-toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
-toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s)
-toSubscriptInline LineBreak = Just LineBreak
-toSubscriptInline SoftBreak = Just SoftBreak
-toSubscriptInline _ = Nothing
-
-toSuperscriptInline :: Inline -> Maybe Inline
-toSuperscriptInline Space = Just Space
-toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
-toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s)
-toSuperscriptInline LineBreak = Just LineBreak
-toSuperscriptInline SoftBreak = Just SoftBreak
-toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
new file mode 100644
index 000000000..19157701e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -0,0 +1,601 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns        #-}
+{- |
+   Module      : Text.Pandoc.Writers.Markdown.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.Markdown.Inline (
+  inlineListToMarkdown
+  ) where
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Char (isAlphaNum, isDigit)
+import Data.List (find, intersperse)
+import qualified Data.Map as M
+import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.HTTP (urlEncode)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
+import Text.DocLayout
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.HTML (writeHtml5String)
+import Text.Pandoc.Writers.Math (texMathToInlines)
+import Text.Pandoc.XML (toHtml5Entities)
+import Data.Coerce (coerce)
+import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
+                                           WriterState(..),
+                                           WriterEnv(..), MD)
+
+-- | Escape special characters for Markdown.
+escapeText :: WriterOptions -> Text -> Text
+escapeText opts = T.pack . go . T.unpack
+  where
+  go [] = []
+  go (c:cs) =
+    case c of
+       '<' | isEnabled Ext_all_symbols_escapable opts ->
+              '\\' : '<' : go cs
+           | otherwise -> "&lt;" ++ go cs
+       '>' | isEnabled Ext_all_symbols_escapable opts ->
+              '\\' : '>' : go cs
+           | otherwise -> "&gt;" ++ go cs
+       '@' | isEnabled Ext_citations opts ->
+               case cs of
+                    (d:_)
+                      | isAlphaNum d || d == '_'
+                         -> '\\':'@':go cs
+                    _ -> '@':go cs
+       _ | c `elem` ['\\','`','*','_','[',']','#'] ->
+              '\\':c:go cs
+       '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
+       '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
+       '~' | isEnabled Ext_subscript opts ||
+             isEnabled Ext_strikeout opts -> '\\':'~':go cs
+       '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs
+       '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs
+       '"' | isEnabled Ext_smart opts -> '\\':'"':go cs
+       '-' | isEnabled Ext_smart opts ->
+              case cs of
+                   '-':_ -> '\\':'-':go cs
+                   _     -> '-':go cs
+       '.' | isEnabled Ext_smart opts ->
+              case cs of
+                   '.':'.':rest -> '\\':'.':'.':'.':go rest
+                   _            -> '.':go cs
+       _   -> case cs of
+                '_':x:xs
+                  | isEnabled Ext_intraword_underscores opts
+                  , isAlphaNum c
+                  , isAlphaNum x -> c : '_' : x : go xs
+                _                -> c : go cs
+
+attrsToMarkdown :: Attr -> Doc Text
+attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
+        where attribId = case attribs of
+                                ("",_,_) -> empty
+                                (i,_,_)  -> "#" <> escAttr i
+              attribClasses = case attribs of
+                                (_,[],_) -> empty
+                                (_,cs,_) -> hsep $
+                                            map (escAttr . ("."<>))
+                                            cs
+              attribKeys = case attribs of
+                                (_,_,[]) -> empty
+                                (_,_,ks) -> hsep $
+                                            map (\(k,v) -> escAttr k
+                                              <> "=\"" <>
+                                              escAttr v <> "\"") ks
+              escAttr          = mconcat . map escAttrChar . T.unpack
+              escAttrChar '"'  = literal "\\\""
+              escAttrChar '\\' = literal "\\\\"
+              escAttrChar c    = literal $ T.singleton c
+
+linkAttributes :: WriterOptions -> Attr -> Doc Text
+linkAttributes opts attr =
+  if isEnabled Ext_link_attributes opts && attr /= nullAttr
+     then attrsToMarkdown attr
+     else empty
+
+getKey :: Doc Text -> Key
+getKey = toKey . render Nothing
+
+findUsableIndex :: [Text] -> Int -> Int
+findUsableIndex lbls i = if tshow i `elem` lbls
+                         then findUsableIndex lbls (i + 1)
+                         else i
+
+getNextIndex :: PandocMonad m => MD m Int
+getNextIndex = do
+  prevRefs <- gets stPrevRefs
+  refs <- gets stRefs
+  i <- (+ 1) <$> gets stLastIdx
+  modify $ \s -> s{ stLastIdx = i }
+  let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs
+  return $ findUsableIndex refLbls i
+
+-- | Get reference for target; if none exists, create unique one and return.
+--   Prefer label if possible; otherwise, generate a unique key.
+getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text
+getReference attr label target = do
+  refs <- gets stRefs
+  case find (\(_,t,a) -> t == target && a == attr) refs of
+    Just (ref, _, _) -> return ref
+    Nothing       -> do
+      keys <- gets stKeys
+      let key = getKey label
+      let rawkey = coerce key
+      case M.lookup key keys of
+           Nothing -> do -- no other refs with this label
+             (lab', idx) <- if T.null rawkey ||
+                                 T.length rawkey > 999 ||
+                                 T.any (\c -> c == '[' || c == ']') rawkey
+                               then do
+                                 i <- getNextIndex
+                                 return (tshow i, i)
+                               else
+                                 return (render Nothing label, 0)
+             modify (\s -> s{
+               stRefs = (lab', target, attr) : refs,
+               stKeys = M.insert (getKey label)
+                           (M.insert (target, attr) idx mempty)
+                                 (stKeys s) })
+             return lab'
+
+           Just km ->    -- we have refs with this label
+             case M.lookup (target, attr) km of
+                  Just i -> do
+                    let lab' = render Nothing $
+                               label <> if i == 0
+                                           then mempty
+                                           else literal (tshow i)
+                    -- make sure it's in stRefs; it may be
+                    -- a duplicate that was printed in a previous
+                    -- block:
+                    when ((lab', target, attr) `notElem` refs) $
+                       modify (\s -> s{
+                         stRefs = (lab', target, attr) : refs })
+                    return lab'
+                  Nothing -> do -- but this one is to a new target
+                    i <- getNextIndex
+                    let lab' = tshow i
+                    modify (\s -> s{
+                       stRefs = (lab', target, attr) : refs,
+                       stKeys = M.insert key
+                                   (M.insert (target, attr) i km)
+                                         (stKeys s) })
+                    return lab'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text)
+inlineListToMarkdown opts lst = do
+  inlist <- asks envInList
+  go (if inlist then avoidBadWrapsInList lst else lst)
+  where go [] = return empty
+        go (x@Math{}:y@(Str t):zs)
+          | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058
+          = liftM2 (<>) (inlineToMarkdown opts x)
+              (go (RawInline (Format "html") "<!-- -->" : y : zs))
+        go (i:is) = case i of
+            Link {} -> case is of
+                -- If a link is followed by another link, or '[', '(' or ':'
+                -- then we don't shortcut
+                Link {}:_                                       -> unshortcutable
+                Space:Link {}:_                                 -> unshortcutable
+                Space:(Str(thead -> Just '[')):_                -> unshortcutable
+                Space:(RawInline _ (thead -> Just '[')):_       -> unshortcutable
+                Space:(Cite _ _):_                              -> unshortcutable
+                SoftBreak:Link {}:_                             -> unshortcutable
+                SoftBreak:(Str(thead -> Just '[')):_            -> unshortcutable
+                SoftBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
+                SoftBreak:(Cite _ _):_                          -> unshortcutable
+                LineBreak:Link {}:_                             -> unshortcutable
+                LineBreak:(Str(thead -> Just '[')):_            -> unshortcutable
+                LineBreak:(RawInline _ (thead -> Just '[')):_   -> unshortcutable
+                LineBreak:(Cite _ _):_                          -> unshortcutable
+                (Cite _ _):_                                    -> unshortcutable
+                Str (thead -> Just '['):_                       -> unshortcutable
+                Str (thead -> Just '('):_                       -> unshortcutable
+                Str (thead -> Just ':'):_                       -> unshortcutable
+                (RawInline _ (thead -> Just '[')):_             -> unshortcutable
+                (RawInline _ (thead -> Just '(')):_             -> unshortcutable
+                (RawInline _ (thead -> Just ':')):_             -> unshortcutable
+                (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable
+                _                                               -> shortcutable
+            _ -> shortcutable
+          where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
+                unshortcutable = do
+                    iMark <- local
+                             (\env -> env { envRefShortcutable = False })
+                             (inlineToMarkdown opts i)
+                    fmap (iMark <>) (go is)
+                thead = fmap fst . T.uncons
+
+isSp :: Inline -> Bool
+isSp Space     = True
+isSp SoftBreak = True
+isSp _         = False
+
+avoidBadWrapsInList :: [Inline] -> [Inline]
+avoidBadWrapsInList [] = []
+avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s =
+  Str (" >" <> cs) : avoidBadWrapsInList xs
+avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))]
+  | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]]
+avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs)
+  | T.null cs && isSp s && c `elem` ['-','*','+'] =
+    Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList (s:Str cs:Space:xs)
+  | isSp s && isOrderedListMarker cs =
+    Str (" " <> cs) : Space : avoidBadWrapsInList xs
+avoidBadWrapsInList [s, Str cs]
+  | isSp s && isOrderedListMarker cs = [Str $ " " <> cs]
+avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
+
+isOrderedListMarker :: Text -> Bool
+isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) &&
+              isRight (runParser (anyOrderedListMarker >> eof)
+                       defaultParserState "" xs)
+ where
+  isRight (Right _) = True
+  isRight (Left  _) = False
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text)
+inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) =
+  case lookup "data-emoji" kvs of
+       Just emojiname | isEnabled Ext_emoji opts ->
+            return $ ":" <> literal emojiname <> ":"
+       _ -> inlineToMarkdown opts (Str s)
+inlineToMarkdown opts (Span attrs ils) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts ils
+  return $ case attrs of
+             (_,["csl-block"],_) -> (cr <>)
+             (_,["csl-left-margin"],_) -> (cr <>)
+             (_,["csl-indent"],_) -> (cr <>)
+             _ -> id
+         $ case variant of
+                PlainText -> contents
+                _     | attrs == nullAttr -> contents
+                      | isEnabled Ext_bracketed_spans opts ->
+                        let attrs' = if attrs /= nullAttr
+                                        then attrsToMarkdown attrs
+                                        else empty
+                        in "[" <> contents <> "]" <> attrs'
+                      | isEnabled Ext_raw_html opts ||
+                        isEnabled Ext_native_spans opts ->
+                        tagWithAttrs "span" attrs <> contents <> literal "</span>"
+                      | otherwise -> contents
+inlineToMarkdown _ (Emph []) = return empty
+inlineToMarkdown opts (Emph lst) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts lst
+  return $ case variant of
+             PlainText
+               | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_"
+               | otherwise ->  contents
+             _ -> "*" <> contents <> "*"
+inlineToMarkdown _ (Underline []) = return empty
+inlineToMarkdown opts (Underline lst) = do
+  variant <- asks envVariant
+  contents <- inlineListToMarkdown opts lst
+  case variant of
+    PlainText -> return contents
+    _     | isEnabled Ext_bracketed_spans opts ->
+            return $ "[" <> contents <> "]" <> "{.ul}"
+          | isEnabled Ext_native_spans opts ->
+            return $ tagWithAttrs "span" ("", ["underline"], [])
+              <> contents
+              <> literal "</span>"
+          | isEnabled Ext_raw_html opts ->
+            return $ "<u>" <> contents <> "</u>"
+          | otherwise -> inlineToMarkdown opts (Emph lst)
+inlineToMarkdown _ (Strong []) = return empty
+inlineToMarkdown opts (Strong lst) = do
+  variant <- asks envVariant
+  case variant of
+    PlainText ->
+             inlineListToMarkdown opts $
+               if isEnabled Ext_gutenberg opts
+                  then capitalize lst
+                  else lst
+    _ -> do
+       contents <- inlineListToMarkdown opts lst
+       return $ "**" <> contents <> "**"
+inlineToMarkdown _ (Strikeout []) = return empty
+inlineToMarkdown opts (Strikeout lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_strikeout opts
+              then "~~" <> contents <> "~~"
+              else if isEnabled Ext_raw_html opts
+                       then "<s>" <> contents <> "</s>"
+                       else contents
+inlineToMarkdown _ (Superscript []) = return empty
+inlineToMarkdown opts (Superscript lst) =
+  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
+    contents <- inlineListToMarkdown opts lst
+    if isEnabled Ext_superscript opts
+       then return $ "^" <> contents <> "^"
+       else if isEnabled Ext_raw_html opts
+                then return $ "<sup>" <> contents <> "</sup>"
+                else
+                  case traverse toSuperscriptInline lst of
+                    Just xs' | not (writerPreferAscii opts)
+                      -> inlineListToMarkdown opts xs'
+                    _ -> do
+                      let rendered = render Nothing contents
+                      return $
+                        case mapM toSuperscript (T.unpack rendered) of
+                           Just r  -> literal $ T.pack r
+                           Nothing -> literal $ "^(" <> rendered <> ")"
+inlineToMarkdown _ (Subscript []) = return empty
+inlineToMarkdown opts (Subscript lst) =
+  local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do
+    contents <- inlineListToMarkdown opts lst
+    if isEnabled Ext_subscript opts
+       then return $ "~" <> contents <> "~"
+       else if isEnabled Ext_raw_html opts
+                then return $ "<sub>" <> contents <> "</sub>"
+                else
+                  case traverse toSubscriptInline lst of
+                    Just xs' | not (writerPreferAscii opts)
+                      -> inlineListToMarkdown opts xs'
+                    _ -> do
+                      let rendered = render Nothing contents
+                      return $
+                        case mapM toSuperscript (T.unpack rendered) of
+                           Just r  -> literal $ T.pack r
+                           Nothing -> literal $ "_(" <> rendered <> ")"
+inlineToMarkdown opts (SmallCaps lst) = do
+  variant <- asks envVariant
+  if variant /= PlainText &&
+     (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts)
+     then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst)
+     else inlineListToMarkdown opts $ capitalize lst
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_smart opts
+              then "'" <> contents <> "'"
+              else
+                if writerPreferAscii opts
+                   then "&lsquo;" <> contents <> "&rsquo;"
+                   else "‘" <> contents <> "’"
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+  contents <- inlineListToMarkdown opts lst
+  return $ if isEnabled Ext_smart opts
+              then "\"" <> contents <> "\""
+              else
+                if writerPreferAscii opts
+                   then "&ldquo;" <> contents <> "&rdquo;"
+                   else "“" <> contents <> "”"
+inlineToMarkdown opts (Code attr str) = do
+  let tickGroups = filter (T.any (== '`')) $ T.group str
+  let longest    = if null tickGroups
+                     then 0
+                     else maximum $ map T.length tickGroups
+  let marker     = T.replicate (longest + 1) "`"
+  let spacer     = if longest == 0 then "" else " "
+  let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
+                      then attrsToMarkdown attr
+                      else empty
+  variant <- asks envVariant
+  case variant of
+     PlainText -> return $ literal str
+     _     ->  return $ literal
+                  (marker <> spacer <> str <> spacer <> marker) <> attrs
+inlineToMarkdown opts (Str str) = do
+  variant <- asks envVariant
+  let str' = (if writerPreferAscii opts
+                 then toHtml5Entities
+                 else id) .
+             (if isEnabled Ext_smart opts
+                 then unsmartify opts
+                 else id) .
+             (if variant == PlainText
+                 then id
+                 else escapeText opts) $ str
+  return $ literal str'
+inlineToMarkdown opts (Math InlineMath str) =
+  case writerHTMLMathMethod opts of
+       WebTeX url -> inlineToMarkdown opts
+                       (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str))
+       _ | isEnabled Ext_tex_math_dollars opts ->
+             return $ "$" <> literal str <> "$"
+         | isEnabled Ext_tex_math_single_backslash opts ->
+             return $ "\\(" <> literal str <> "\\)"
+         | isEnabled Ext_tex_math_double_backslash opts ->
+             return $ "\\\\(" <> literal str <> "\\\\)"
+         | otherwise -> do
+             variant <- asks envVariant
+             texMathToInlines InlineMath str >>=
+               inlineListToMarkdown opts .
+                 (if variant == PlainText then makeMathPlainer else id)
+inlineToMarkdown opts (Math DisplayMath str) =
+  case writerHTMLMathMethod opts of
+      WebTeX url -> (\x -> blankline <> x <> blankline) `fmap`
+             inlineToMarkdown opts (Image nullAttr [Str str]
+                    (url <> T.pack (urlEncode $ T.unpack str), str))
+      _ | isEnabled Ext_tex_math_dollars opts ->
+            return $ "$$" <> literal str <> "$$"
+        | isEnabled Ext_tex_math_single_backslash opts ->
+            return $ "\\[" <> literal str <> "\\]"
+        | isEnabled Ext_tex_math_double_backslash opts ->
+            return $ "\\\\[" <> literal str <> "\\\\]"
+        | otherwise -> (\x -> cr <> x <> cr) `fmap`
+            (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
+inlineToMarkdown opts il@(RawInline f str) = do
+  let tickGroups = filter (T.any (== '`')) $ T.group str
+  let numticks   = if null tickGroups
+                     then 1
+                     else 1 + maximum (map T.length tickGroups)
+  variant <- asks envVariant
+  let Format fmt = f
+  let rawAttribInline = return $
+         literal (T.replicate numticks "`") <> literal str <>
+         literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}"
+  let renderEmpty = mempty <$ report (InlineNotRendered il)
+  case variant of
+    PlainText -> renderEmpty
+    Commonmark
+      | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"]
+         -> return $ literal str
+    Markdown
+      | f `elem` ["markdown", "markdown_github", "markdown_phpextra",
+                  "markdown_mmd", "markdown_strict"]
+         -> return $ literal str
+    _ | isEnabled Ext_raw_attribute opts -> rawAttribInline
+      | f `elem` ["html", "html5", "html4"]
+      , isEnabled Ext_raw_html opts
+         -> return $ literal str
+      | f `elem` ["latex", "tex"]
+      , isEnabled Ext_raw_tex opts
+         -> return $ literal str
+    _ -> renderEmpty
+
+
+inlineToMarkdown opts LineBreak = do
+  variant <- asks envVariant
+  if variant == PlainText || isEnabled Ext_hard_line_breaks opts
+     then return cr
+     else return $
+          if isEnabled Ext_escaped_line_breaks opts
+             then "\\" <> cr
+             else "  " <> cr
+inlineToMarkdown _ Space = do
+  escapeSpaces <- asks envEscapeSpaces
+  return $ if escapeSpaces then "\\ " else space
+inlineToMarkdown opts SoftBreak = do
+  escapeSpaces <- asks envEscapeSpaces
+  let space' = if escapeSpaces then "\\ " else space
+  return $ case writerWrapText opts of
+                WrapNone     -> space'
+                WrapAuto     -> space'
+                WrapPreserve -> cr
+inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (Cite (c:cs) lst)
+  | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst
+  | otherwise =
+      if citationMode c == AuthorInText
+         then do
+           suffs <- inlineListToMarkdown opts $ citationSuffix c
+           rest <- mapM convertOne cs
+           let inbr = suffs <+> joincits rest
+               br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
+           return $ literal ("@" <> citationId c) <+> br
+         else do
+           cits <- mapM convertOne (c:cs)
+           return $ literal "[" <> joincits cits <> literal "]"
+  where
+        joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
+        convertOne Citation { citationId      = k
+                            , citationPrefix  = pinlines
+                            , citationSuffix  = sinlines
+                            , citationMode    = m }
+                               = do
+           pdoc <- inlineListToMarkdown opts pinlines
+           sdoc <- inlineListToMarkdown opts sinlines
+           let k' = literal (modekey m <> "@" <> k)
+               r = case sinlines of
+                        Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
+                        _                                         -> k' <+> sdoc
+           return $ pdoc <+> r
+        modekey SuppressAuthor = "-"
+        modekey _              = ""
+inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
+  variant <- asks envVariant
+  linktext <- inlineListToMarkdown opts txt
+  let linktitle = if T.null tit
+                     then empty
+                     else literal $ " \"" <> tit <> "\""
+  let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src)
+  let useAuto = isURI src &&
+                case txt of
+                      [Str s] | escapeURI s == srcSuffix -> True
+                      _       -> False
+  let useRefLinks = writerReferenceLinks opts && not useAuto
+  shortcutable <- asks envRefShortcutable
+  let useShortcutRefLinks = shortcutable &&
+                            isEnabled Ext_shortcut_reference_links opts
+  reftext <- if useRefLinks
+                then literal <$> getReference attr linktext (src, tit)
+                else return mempty
+  case variant of
+    PlainText
+      | useAuto -> return $ literal srcSuffix
+      | otherwise -> return linktext
+    _ | useAuto -> return $ "<" <> literal srcSuffix <> ">"
+      | useRefLinks ->
+           let first  = "[" <> linktext <> "]"
+               second = if getKey linktext == getKey reftext
+                           then if useShortcutRefLinks
+                                   then ""
+                                   else "[]"
+                           else "[" <> reftext <> "]"
+           in  return $ first <> second
+      | isEnabled Ext_raw_html opts
+      , not (isEnabled Ext_link_attributes opts)
+      , attr /= nullAttr -> -- use raw HTML to render attributes
+          literal . T.strip <$>
+            writeHtml5String opts{ writerTemplate = Nothing }
+            (Pandoc nullMeta [Plain [lnk]])
+      | otherwise -> return $
+         "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <>
+         linkAttributes opts attr
+inlineToMarkdown opts img@(Image attr alternate (source, tit))
+  | isEnabled Ext_raw_html opts &&
+    not (isEnabled Ext_link_attributes opts) &&
+    attr /= nullAttr = -- use raw HTML
+    literal . T.strip <$>
+      writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
+  | otherwise = do
+  variant <- asks envVariant
+  let txt = if null alternate || alternate == [Str source]
+                                 -- to prevent autolinks
+               then [Str ""]
+               else alternate
+  linkPart <- inlineToMarkdown opts (Link attr txt (source, tit))
+  return $ case variant of
+             PlainText -> "[" <> linkPart <> "]"
+             _     -> "!" <> linkPart
+inlineToMarkdown opts (Note contents) = do
+  modify (\st -> st{ stNotes = contents : stNotes st })
+  st <- get
+  let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1)
+  if isEnabled Ext_footnotes opts
+     then return $ "[^" <> ref <> "]"
+     else return $ "[" <> ref <> "]"
+
+makeMathPlainer :: [Inline] -> [Inline]
+makeMathPlainer = walk go
+  where
+  go (Emph xs) = Span nullAttr xs
+  go x         = x
+
+toSubscriptInline :: Inline -> Maybe Inline
+toSubscriptInline Space = Just Space
+toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils
+toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s)
+toSubscriptInline LineBreak = Just LineBreak
+toSubscriptInline SoftBreak = Just SoftBreak
+toSubscriptInline _ = Nothing
+
+toSuperscriptInline :: Inline -> Maybe Inline
+toSuperscriptInline Space = Just Space
+toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils
+toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s)
+toSuperscriptInline LineBreak = Just LineBreak
+toSuperscriptInline SoftBreak = Just SoftBreak
+toSuperscriptInline _ = Nothing
diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs
new file mode 100644
index 000000000..a1d0d14e4
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown/Types.hs
@@ -0,0 +1,81 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+   Module      : Text.Pandoc.Writers.Markdown.Types
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.Markdown.Types (
+  MarkdownVariant(..),
+  WriterState(..),
+  WriterEnv(..),
+  Notes,
+  Ref,
+  Refs,
+  MD,
+  evalMD
+  ) where
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Default
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import Data.Text (Text)
+import Text.Pandoc.Parsing (Key)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+
+type Notes = [[Block]]
+type Ref   = (Text, Target, Attr)
+type Refs  = [Ref]
+
+type MD m = ReaderT WriterEnv (StateT WriterState m)
+
+evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a
+evalMD md env st = evalStateT (runReaderT md env) st
+
+data WriterEnv = WriterEnv { envInList          :: Bool
+                           , envVariant         :: MarkdownVariant
+                           , envRefShortcutable :: Bool
+                           , envBlockLevel      :: Int
+                           , envEscapeSpaces    :: Bool
+                           }
+
+data MarkdownVariant =
+      PlainText
+    | Commonmark
+    | Markdown
+    deriving (Show, Eq)
+
+instance Default WriterEnv
+  where def = WriterEnv { envInList          = False
+                        , envVariant         = Markdown
+                        , envRefShortcutable = True
+                        , envBlockLevel      = 0
+                        , envEscapeSpaces    = False
+                        }
+
+data WriterState = WriterState { stNotes   :: Notes
+                               , stPrevRefs :: Refs
+                               , stRefs    :: Refs
+                               , stKeys    :: M.Map Key
+                                                (M.Map (Target, Attr) Int)
+                               , stLastIdx  :: Int
+                               , stIds     :: Set.Set Text
+                               , stNoteNum :: Int
+                               }
+
+instance Default WriterState
+  where def = WriterState{ stNotes = []
+                         , stPrevRefs = []
+                         , stRefs = []
+                         , stKeys = M.empty
+                         , stLastIdx = 0
+                         , stIds = Set.empty
+                         , stNoteNum = 1
+                         }
+
+
-- 
cgit v1.2.3


From 827ecdd2de935d377399a68aaa52b3fb6ab5b607 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 2 Mar 2021 21:33:58 -0800
Subject: Split out T.P.Writers.LaTeX.Lang.

---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Writers/LaTeX.hs      | 192 +++-------------------------------
 src/Text/Pandoc/Writers/LaTeX/Lang.hs | 191 +++++++++++++++++++++++++++++++++
 3 files changed, 204 insertions(+), 180 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Lang.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 3aa29b477..416641a96 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -664,6 +664,7 @@ library
                    Text.Pandoc.Writers.LaTeX.Caption,
                    Text.Pandoc.Writers.LaTeX.Notes,
                    Text.Pandoc.Writers.LaTeX.Table,
+                   Text.Pandoc.Writers.LaTeX.Lang,
                    Text.Pandoc.Writers.LaTeX.Types,
                    Text.Pandoc.Writers.Markdown.Types,
                    Text.Pandoc.Writers.Markdown.Inline,
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e31ec9d52..6b1d44b23 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -45,6 +45,8 @@ import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
 import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX)
 import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
+import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia,
+                                       toBabel)
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import qualified Data.Text.Normalize as Normalize
@@ -926,6 +928,16 @@ labelFor ident = do
   ref <- literal `fmap` toLabel ident
   return $ text "\\label" <> braces ref
 
+-- Determine listings language from list of class attributes.
+getListingsLanguage :: [Text] -> Maybe Text
+getListingsLanguage xs
+  = foldr ((<|>) . toListingsLanguage) Nothing xs
+
+mbBraced :: Text -> Text
+mbBraced x = if not (T.all isAlphaNum x)
+                then "{" <> x <> "}"
+                else x
+
 -- | Convert list of inline elements to LaTeX.
 inlineListToLaTeX :: PandocMonad m
                   => [Inline]  -- ^ Inlines to convert
@@ -1363,16 +1375,6 @@ citationsToBiblatex (c:cs)
 
 citationsToBiblatex _ = return empty
 
--- Determine listings language from list of class attributes.
-getListingsLanguage :: [Text] -> Maybe Text
-getListingsLanguage xs
-  = foldr ((<|>) . toListingsLanguage) Nothing xs
-
-mbBraced :: Text -> Text
-mbBraced x = if not (T.all isAlphaNum x)
-                then "{" <> x <> "}"
-                else x
-
 -- Extract a key from divs and spans
 extract :: Text -> Block -> [Text]
 extract key (Div attr _)     = lookKey key attr
@@ -1390,174 +1392,4 @@ extractInline _ _               = []
 lookKey :: Text -> Attr -> [Text]
 lookKey key (_,_,kvs) =  maybe [] T.words $ lookup key kvs
 
--- In environments \Arabic instead of \arabic is used
-toPolyglossiaEnv :: Lang -> (Text, Text)
-toPolyglossiaEnv l =
-  case toPolyglossia l of
-    ("arabic", o) -> ("Arabic", o)
-    x             -> x
-
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Polyglossia (language, options) tuple
--- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
-toPolyglossia :: Lang -> (Text, Text)
-toPolyglossia (Lang "ar" _ "DZ" _)        = ("arabic", "locale=algeria")
-toPolyglossia (Lang "ar" _ "IQ" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "JO" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LB" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LY" _)        = ("arabic", "locale=libya")
-toPolyglossia (Lang "ar" _ "MA" _)        = ("arabic", "locale=morocco")
-toPolyglossia (Lang "ar" _ "MR" _)        = ("arabic", "locale=mauritania")
-toPolyglossia (Lang "ar" _ "PS" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "SY" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "TN" _)        = ("arabic", "locale=tunisia")
-toPolyglossia (Lang "de" _ _ vars)
-  | "1901" `elem` vars                    = ("german", "spelling=old")
-toPolyglossia (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                    = ("german", "variant=austrian, spelling=old")
-toPolyglossia (Lang "de" _ "AT" _)        = ("german", "variant=austrian")
-toPolyglossia (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                    = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ "CH" _)        = ("german", "variant=swiss")
-toPolyglossia (Lang "de" _ _ _)           = ("german", "")
-toPolyglossia (Lang "dsb" _ _ _)          = ("lsorbian", "")
-toPolyglossia (Lang "el" _ "polyton" _)   = ("greek",   "variant=poly")
-toPolyglossia (Lang "en" _ "AU" _)        = ("english", "variant=australian")
-toPolyglossia (Lang "en" _ "CA" _)        = ("english", "variant=canadian")
-toPolyglossia (Lang "en" _ "GB" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "NZ" _)        = ("english", "variant=newzealand")
-toPolyglossia (Lang "en" _ "UK" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "US" _)        = ("english", "variant=american")
-toPolyglossia (Lang "grc" _ _ _)          = ("greek",   "variant=ancient")
-toPolyglossia (Lang "hsb" _ _  _)         = ("usorbian", "")
-toPolyglossia (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars               = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _)        = ("portuguese", "variant=brazilian")
-toPolyglossia (Lang "sl" _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
 
--- Takes a list of the constituents of a BCP 47 language code and
--- converts it to a Babel language string.
--- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
--- List of supported languages (slightly outdated):
--- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
-toBabel :: Lang -> Text
-toBabel (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                  = "austrian"
-  | otherwise                           = "naustrian"
-toBabel (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                  = "swissgerman"
-  | otherwise                           = "nswissgerman"
-toBabel (Lang "de" _ _ vars)
-  | "1901" `elem` vars                  = "german"
-  | otherwise                           = "ngerman"
-toBabel (Lang "dsb" _ _ _)              = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
-  | "polyton" `elem` vars               = "polutonikogreek"
-toBabel (Lang "en" _ "AU" _)            = "australian"
-toBabel (Lang "en" _ "CA" _)            = "canadian"
-toBabel (Lang "en" _ "GB" _)            = "british"
-toBabel (Lang "en" _ "NZ" _)            = "newzealand"
-toBabel (Lang "en" _ "UK" _)            = "british"
-toBabel (Lang "en" _ "US" _)            = "american"
-toBabel (Lang "fr" _ "CA" _)            = "canadien"
-toBabel (Lang "fra" _ _ vars)
-  | "aca" `elem` vars                   = "acadian"
-toBabel (Lang "grc" _ _ _)              = "polutonikogreek"
-toBabel (Lang "hsb" _ _ _)              = "uppersorbian"
-toBabel (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars             = "classiclatin"
-toBabel (Lang "pt" _ "BR" _)            = "brazilian"
-toBabel (Lang "sl" _ _ _)               = "slovene"
-toBabel x                               = commonFromBcp47 x
-
--- Takes a list of the constituents of a BCP 47 language code
--- and converts it to a string shared by Babel and Polyglossia.
--- https://tools.ietf.org/html/bcp47#section-2.1
-commonFromBcp47 :: Lang -> Text
-commonFromBcp47 (Lang "sr" "Cyrl" _ _)          = "serbianc"
-commonFromBcp47 (Lang "zh" "Latn" _ vars)
-  | "pinyin" `elem` vars                        = "pinyin"
-commonFromBcp47 (Lang l _ _ _) = fromIso l
-  where
-    fromIso "af"  = "afrikaans"
-    fromIso "am"  = "amharic"
-    fromIso "ar"  = "arabic"
-    fromIso "as"  = "assamese"
-    fromIso "ast" = "asturian"
-    fromIso "bg"  = "bulgarian"
-    fromIso "bn"  = "bengali"
-    fromIso "bo"  = "tibetan"
-    fromIso "br"  = "breton"
-    fromIso "ca"  = "catalan"
-    fromIso "cy"  = "welsh"
-    fromIso "cs"  = "czech"
-    fromIso "cop" = "coptic"
-    fromIso "da"  = "danish"
-    fromIso "dv"  = "divehi"
-    fromIso "el"  = "greek"
-    fromIso "en"  = "english"
-    fromIso "eo"  = "esperanto"
-    fromIso "es"  = "spanish"
-    fromIso "et"  = "estonian"
-    fromIso "eu"  = "basque"
-    fromIso "fa"  = "farsi"
-    fromIso "fi"  = "finnish"
-    fromIso "fr"  = "french"
-    fromIso "fur" = "friulan"
-    fromIso "ga"  = "irish"
-    fromIso "gd"  = "scottish"
-    fromIso "gez" = "ethiopic"
-    fromIso "gl"  = "galician"
-    fromIso "he"  = "hebrew"
-    fromIso "hi"  = "hindi"
-    fromIso "hr"  = "croatian"
-    fromIso "hu"  = "magyar"
-    fromIso "hy"  = "armenian"
-    fromIso "ia"  = "interlingua"
-    fromIso "id"  = "indonesian"
-    fromIso "ie"  = "interlingua"
-    fromIso "is"  = "icelandic"
-    fromIso "it"  = "italian"
-    fromIso "ja"  = "japanese"
-    fromIso "km"  = "khmer"
-    fromIso "kmr" = "kurmanji"
-    fromIso "kn"  = "kannada"
-    fromIso "ko"  = "korean"
-    fromIso "la"  = "latin"
-    fromIso "lo"  = "lao"
-    fromIso "lt"  = "lithuanian"
-    fromIso "lv"  = "latvian"
-    fromIso "ml"  = "malayalam"
-    fromIso "mn"  = "mongolian"
-    fromIso "mr"  = "marathi"
-    fromIso "nb"  = "norsk"
-    fromIso "nl"  = "dutch"
-    fromIso "nn"  = "nynorsk"
-    fromIso "no"  = "norsk"
-    fromIso "nqo" = "nko"
-    fromIso "oc"  = "occitan"
-    fromIso "pa"  = "panjabi"
-    fromIso "pl"  = "polish"
-    fromIso "pms" = "piedmontese"
-    fromIso "pt"  = "portuguese"
-    fromIso "rm"  = "romansh"
-    fromIso "ro"  = "romanian"
-    fromIso "ru"  = "russian"
-    fromIso "sa"  = "sanskrit"
-    fromIso "se"  = "samin"
-    fromIso "sk"  = "slovak"
-    fromIso "sq"  = "albanian"
-    fromIso "sr"  = "serbian"
-    fromIso "sv"  = "swedish"
-    fromIso "syr" = "syriac"
-    fromIso "ta"  = "tamil"
-    fromIso "te"  = "telugu"
-    fromIso "th"  = "thai"
-    fromIso "ti"  = "ethiopic"
-    fromIso "tk"  = "turkmen"
-    fromIso "tr"  = "turkish"
-    fromIso "uk"  = "ukrainian"
-    fromIso "ur"  = "urdu"
-    fromIso "vi"  = "vietnamese"
-    fromIso _     = ""
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
new file mode 100644
index 000000000..41aafee48
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -0,0 +1,191 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Lang
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Lang
+  ( toPolyglossiaEnv,
+    toPolyglossia,
+    toBabel
+  ) where
+import Data.Text (Text)
+import Text.Pandoc.BCP47 (Lang (..))
+
+
+-- In environments \Arabic instead of \arabic is used
+toPolyglossiaEnv :: Lang -> (Text, Text)
+toPolyglossiaEnv l =
+  case toPolyglossia l of
+    ("arabic", o) -> ("Arabic", o)
+    x             -> x
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Polyglossia (language, options) tuple
+-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
+toPolyglossia :: Lang -> (Text, Text)
+toPolyglossia (Lang "ar" _ "DZ" _)        = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ "IQ" _)        = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "JO" _)        = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LB" _)        = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "LY" _)        = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ "MA" _)        = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ "MR" _)        = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ "PS" _)        = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "SY" _)        = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ "TN" _)        = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars)
+  | "1901" `elem` vars                    = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ "AT" vars)
+  | "1901" `elem` vars                    = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ "AT" _)        = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ "CH" vars)
+  | "1901" `elem` vars                    = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ "CH" _)        = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _)           = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _)          = ("lsorbian", "")
+toPolyglossia (Lang "el" _ "polyton" _)   = ("greek",   "variant=poly")
+toPolyglossia (Lang "en" _ "AU" _)        = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ "CA" _)        = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ "GB" _)        = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "NZ" _)        = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ "UK" _)        = ("english", "variant=british")
+toPolyglossia (Lang "en" _ "US" _)        = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _)          = ("greek",   "variant=ancient")
+toPolyglossia (Lang "hsb" _ _  _)         = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars)
+  | "x-classic" `elem` vars               = ("latin", "variant=classic")
+toPolyglossia (Lang "pt" _ "BR" _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "sl" _ _ _)           = ("slovenian", "")
+toPolyglossia x                           = (commonFromBcp47 x, "")
+
+-- Takes a list of the constituents of a BCP 47 language code and
+-- converts it to a Babel language string.
+-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
+-- List of supported languages (slightly outdated):
+-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
+toBabel :: Lang -> Text
+toBabel (Lang "de" _ "AT" vars)
+  | "1901" `elem` vars                  = "austrian"
+  | otherwise                           = "naustrian"
+toBabel (Lang "de" _ "CH" vars)
+  | "1901" `elem` vars                  = "swissgerman"
+  | otherwise                           = "nswissgerman"
+toBabel (Lang "de" _ _ vars)
+  | "1901" `elem` vars                  = "german"
+  | otherwise                           = "ngerman"
+toBabel (Lang "dsb" _ _ _)              = "lowersorbian"
+toBabel (Lang "el" _ _ vars)
+  | "polyton" `elem` vars               = "polutonikogreek"
+toBabel (Lang "en" _ "AU" _)            = "australian"
+toBabel (Lang "en" _ "CA" _)            = "canadian"
+toBabel (Lang "en" _ "GB" _)            = "british"
+toBabel (Lang "en" _ "NZ" _)            = "newzealand"
+toBabel (Lang "en" _ "UK" _)            = "british"
+toBabel (Lang "en" _ "US" _)            = "american"
+toBabel (Lang "fr" _ "CA" _)            = "canadien"
+toBabel (Lang "fra" _ _ vars)
+  | "aca" `elem` vars                   = "acadian"
+toBabel (Lang "grc" _ _ _)              = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _)              = "uppersorbian"
+toBabel (Lang "la" _ _ vars)
+  | "x-classic" `elem` vars             = "classiclatin"
+toBabel (Lang "pt" _ "BR" _)            = "brazilian"
+toBabel (Lang "sl" _ _ _)               = "slovene"
+toBabel x                               = commonFromBcp47 x
+
+-- Takes a list of the constituents of a BCP 47 language code
+-- and converts it to a string shared by Babel and Polyglossia.
+-- https://tools.ietf.org/html/bcp47#section-2.1
+commonFromBcp47 :: Lang -> Text
+commonFromBcp47 (Lang "sr" "Cyrl" _ _)          = "serbianc"
+commonFromBcp47 (Lang "zh" "Latn" _ vars)
+  | "pinyin" `elem` vars                        = "pinyin"
+commonFromBcp47 (Lang l _ _ _) = fromIso l
+  where
+    fromIso "af"  = "afrikaans"
+    fromIso "am"  = "amharic"
+    fromIso "ar"  = "arabic"
+    fromIso "as"  = "assamese"
+    fromIso "ast" = "asturian"
+    fromIso "bg"  = "bulgarian"
+    fromIso "bn"  = "bengali"
+    fromIso "bo"  = "tibetan"
+    fromIso "br"  = "breton"
+    fromIso "ca"  = "catalan"
+    fromIso "cy"  = "welsh"
+    fromIso "cs"  = "czech"
+    fromIso "cop" = "coptic"
+    fromIso "da"  = "danish"
+    fromIso "dv"  = "divehi"
+    fromIso "el"  = "greek"
+    fromIso "en"  = "english"
+    fromIso "eo"  = "esperanto"
+    fromIso "es"  = "spanish"
+    fromIso "et"  = "estonian"
+    fromIso "eu"  = "basque"
+    fromIso "fa"  = "farsi"
+    fromIso "fi"  = "finnish"
+    fromIso "fr"  = "french"
+    fromIso "fur" = "friulan"
+    fromIso "ga"  = "irish"
+    fromIso "gd"  = "scottish"
+    fromIso "gez" = "ethiopic"
+    fromIso "gl"  = "galician"
+    fromIso "he"  = "hebrew"
+    fromIso "hi"  = "hindi"
+    fromIso "hr"  = "croatian"
+    fromIso "hu"  = "magyar"
+    fromIso "hy"  = "armenian"
+    fromIso "ia"  = "interlingua"
+    fromIso "id"  = "indonesian"
+    fromIso "ie"  = "interlingua"
+    fromIso "is"  = "icelandic"
+    fromIso "it"  = "italian"
+    fromIso "ja"  = "japanese"
+    fromIso "km"  = "khmer"
+    fromIso "kmr" = "kurmanji"
+    fromIso "kn"  = "kannada"
+    fromIso "ko"  = "korean"
+    fromIso "la"  = "latin"
+    fromIso "lo"  = "lao"
+    fromIso "lt"  = "lithuanian"
+    fromIso "lv"  = "latvian"
+    fromIso "ml"  = "malayalam"
+    fromIso "mn"  = "mongolian"
+    fromIso "mr"  = "marathi"
+    fromIso "nb"  = "norsk"
+    fromIso "nl"  = "dutch"
+    fromIso "nn"  = "nynorsk"
+    fromIso "no"  = "norsk"
+    fromIso "nqo" = "nko"
+    fromIso "oc"  = "occitan"
+    fromIso "pa"  = "panjabi"
+    fromIso "pl"  = "polish"
+    fromIso "pms" = "piedmontese"
+    fromIso "pt"  = "portuguese"
+    fromIso "rm"  = "romansh"
+    fromIso "ro"  = "romanian"
+    fromIso "ru"  = "russian"
+    fromIso "sa"  = "sanskrit"
+    fromIso "se"  = "samin"
+    fromIso "sk"  = "slovak"
+    fromIso "sq"  = "albanian"
+    fromIso "sr"  = "serbian"
+    fromIso "sv"  = "swedish"
+    fromIso "syr" = "syriac"
+    fromIso "ta"  = "tamil"
+    fromIso "te"  = "telugu"
+    fromIso "th"  = "thai"
+    fromIso "ti"  = "ethiopic"
+    fromIso "tk"  = "turkmen"
+    fromIso "tr"  = "turkish"
+    fromIso "uk"  = "ukrainian"
+    fromIso "ur"  = "urdu"
+    fromIso "vi"  = "vietnamese"
+    fromIso _     = ""
-- 
cgit v1.2.3


From fe483c653b34897346e3ab6e0e26de88ecee4447 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 2 Mar 2021 21:57:13 -0800
Subject: Split out T.P.Writers.LaTeX.Citation.

---
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/Writers/LaTeX.hs          | 148 +-----------------------
 src/Text/Pandoc/Writers/LaTeX/Citation.hs | 181 ++++++++++++++++++++++++++++++
 3 files changed, 188 insertions(+), 142 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Citation.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 416641a96..135ee2f26 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -666,6 +666,7 @@ library
                    Text.Pandoc.Writers.LaTeX.Table,
                    Text.Pandoc.Writers.LaTeX.Lang,
                    Text.Pandoc.Writers.LaTeX.Types,
+                   Text.Pandoc.Writers.LaTeX.Citation,
                    Text.Pandoc.Writers.Markdown.Types,
                    Text.Pandoc.Writers.Markdown.Inline,
                    Text.Pandoc.Writers.Roff,
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6b1d44b23..84c96a507 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -20,9 +20,8 @@ module Text.Pandoc.Writers.LaTeX (
   ) where
 import Control.Applicative ((<|>))
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace,
-                  isPunctuation, ord)
-import Data.List (foldl', intersperse, nubBy, (\\), uncons)
+import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord)
+import Data.List (intersperse, nubBy, (\\), uncons)
 import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
 import qualified Data.Map as M
 import Data.Text (Text)
@@ -44,6 +43,8 @@ import Text.Pandoc.Slides
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
 import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX)
+import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib,
+                                           citationsToBiblatex)
 import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
 import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia,
                                        toBabel)
@@ -1026,8 +1027,8 @@ inlineToLaTeX (Cite cits lst) = do
   st <- get
   let opts = stOptions st
   case writerCiteMethod opts of
-     Natbib   -> citationsToNatbib cits
-     Biblatex -> citationsToBiblatex cits
+     Natbib   -> citationsToNatbib inlineListToLaTeX cits
+     Biblatex -> citationsToBiblatex inlineListToLaTeX cits
      _        -> inlineListToLaTeX lst
 
 inlineToLaTeX (Code (_,classes,kvs) str) = do
@@ -1238,143 +1239,6 @@ protectCode x = [x]
 setEmptyLine :: PandocMonad m => Bool -> LW m ()
 setEmptyLine b = modify $ \st -> st{ stEmptyLine = b }
 
-citationsToNatbib :: PandocMonad m => [Citation] -> LW m (Doc Text)
-citationsToNatbib
-            [one]
-  = citeCommand c p s k
-  where
-    Citation { citationId = k
-             , citationPrefix = p
-             , citationSuffix = s
-             , citationMode = m
-             }
-      = one
-    c = case m of
-             AuthorInText   -> "citet"
-             SuppressAuthor -> "citeyearpar"
-             NormalCitation -> "citep"
-
-citationsToNatbib cits
-  | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
-  = citeCommand "citep" p s ks
-  where
-     noPrefix  = all (null . citationPrefix)
-     noSuffix  = all (null . citationSuffix)
-     ismode m  = all ((==) m  . citationMode)
-     p         = citationPrefix  $
-                 head cits
-     s         = citationSuffix  $
-                 last cits
-     ks        = T.intercalate ", " $ map citationId cits
-
-citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
-     author <- citeCommand "citeauthor" [] [] (citationId c)
-     cits   <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs)
-     return $ author <+> cits
-
-citationsToNatbib cits = do
-  cits' <- mapM convertOne cits
-  return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
-  where
-    combineTwo a b | isEmpty a = b
-                   | otherwise = a <> text "; " <> b
-    convertOne Citation { citationId = k
-                        , citationPrefix = p
-                        , citationSuffix = s
-                        , citationMode = m
-                        }
-        = case m of
-               AuthorInText   -> citeCommand "citealt"  p s k
-               SuppressAuthor -> citeCommand "citeyear" p s k
-               NormalCitation -> citeCommand "citealp"  p s k
-
-citeCommand :: PandocMonad m
-            => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text)
-citeCommand c p s k = do
-  args <- citeArguments p s k
-  return $ literal ("\\" <> c) <> args
-
-type Prefix = [Inline]
-type Suffix = [Inline]
-type CiteId = Text
-data CiteGroup = CiteGroup Prefix Suffix [CiteId]
-
-citeArgumentsList :: PandocMonad m
-              => CiteGroup -> LW m (Doc Text)
-citeArgumentsList (CiteGroup _ _ []) = return empty
-citeArgumentsList (CiteGroup pfxs sfxs ids) = do
-      pdoc <- inlineListToLaTeX pfxs
-      sdoc <- inlineListToLaTeX sfxs'
-      return $ optargs pdoc sdoc <>
-              braces (literal (T.intercalate "," (reverse ids)))
-      where sfxs' = stripLocatorBraces $ case sfxs of
-                (Str t : r) -> case T.uncons t of
-                  Just (x, xs)
-                    | T.null xs
-                    , isPunctuation x -> dropWhile (== Space) r
-                    | isPunctuation x -> Str xs : r
-                  _ -> sfxs
-                _   -> sfxs
-            optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of
-                 (True, True ) -> empty
-                 (True, False) -> brackets sdoc
-                 (_   , _    ) -> brackets pdoc <> brackets sdoc
-
-citeArguments :: PandocMonad m
-              => [Inline] -> [Inline] -> Text -> LW m (Doc Text)
-citeArguments p s k = citeArgumentsList (CiteGroup p s [k])
-
--- strip off {} used to define locator in pandoc-citeproc; see #5722
-stripLocatorBraces :: [Inline] -> [Inline]
-stripLocatorBraces = walk go
-  where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs
-        go x        = x
-
-citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text)
-citationsToBiblatex
-            [one]
-  = citeCommand cmd p s k
-    where
-       Citation { citationId = k
-                , citationPrefix = p
-                , citationSuffix = s
-                , citationMode = m
-                } = one
-       cmd = case m of
-                  SuppressAuthor -> "autocite*"
-                  AuthorInText   -> "textcite"
-                  NormalCitation -> "autocite"
-
-citationsToBiblatex (c:cs)
-  | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs)
-    = do
-      let cmd = case citationMode c of
-                    SuppressAuthor -> "\\autocite*"
-                    AuthorInText   -> "\\textcite"
-                    NormalCitation -> "\\autocite"
-      return $ text cmd <>
-               braces (literal (T.intercalate "," (map citationId (c:cs))))
-  | otherwise
-    = do
-      let cmd = case citationMode c of
-                    SuppressAuthor -> "\\autocites*"
-                    AuthorInText   -> "\\textcites"
-                    NormalCitation -> "\\autocites"
-
-      groups <- mapM citeArgumentsList (reverse (foldl' grouper [] (c:cs)))
-
-      return $ text cmd <> mconcat groups
-
-  where grouper prev cit = case prev of
-         ((CiteGroup oPfx oSfx ids):rest)
-             | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest
-         _ -> CiteGroup pfx sfx [cid] : prev
-         where pfx = citationPrefix cit
-               sfx = citationSuffix cit
-               cid = citationId cit
-
-citationsToBiblatex _ = return empty
-
 -- Extract a key from divs and spans
 extract :: Text -> Block -> [Text]
 extract key (Div attr _)     = lookKey key attr
diff --git a/src/Text/Pandoc/Writers/LaTeX/Citation.hs b/src/Text/Pandoc/Writers/LaTeX/Citation.hs
new file mode 100644
index 000000000..f48a43d7a
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Citation.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Citation
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Citation
+  ( citationsToNatbib,
+    citationsToBiblatex
+  ) where
+
+import Data.Text (Text)
+import Data.Char (isPunctuation)
+import qualified Data.Text as T
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Data.List (foldl')
+import Text.DocLayout (Doc, brackets, empty, (<+>), text, isEmpty, literal,
+                       braces)
+import Text.Pandoc.Walk
+import Text.Pandoc.Writers.LaTeX.Types ( LW )
+
+citationsToNatbib :: PandocMonad m
+                  => ([Inline] -> LW m (Doc Text))
+                  -> [Citation]
+                  -> LW m (Doc Text)
+citationsToNatbib inlineListToLaTeX [one]
+  = citeCommand inlineListToLaTeX c p s k
+  where
+    Citation { citationId = k
+             , citationPrefix = p
+             , citationSuffix = s
+             , citationMode = m
+             }
+      = one
+    c = case m of
+             AuthorInText   -> "citet"
+             SuppressAuthor -> "citeyearpar"
+             NormalCitation -> "citep"
+
+citationsToNatbib inlineListToLaTeX cits
+  | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
+  = citeCommand inlineListToLaTeX "citep" p s ks
+  where
+     noPrefix  = all (null . citationPrefix)
+     noSuffix  = all (null . citationSuffix)
+     ismode m  = all ((==) m  . citationMode)
+     p         = citationPrefix  $
+                 head cits
+     s         = citationSuffix  $
+                 last cits
+     ks        = T.intercalate ", " $ map citationId cits
+
+citationsToNatbib inlineListToLaTeX (c:cs)
+  | citationMode c == AuthorInText = do
+     author <- citeCommand inlineListToLaTeX "citeauthor" [] [] (citationId c)
+     cits   <- citationsToNatbib inlineListToLaTeX
+                  (c { citationMode = SuppressAuthor } : cs)
+     return $ author <+> cits
+
+citationsToNatbib inlineListToLaTeX cits = do
+  cits' <- mapM convertOne cits
+  return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}"
+  where
+    citeCommand' = citeCommand inlineListToLaTeX
+    combineTwo a b | isEmpty a = b
+                   | otherwise = a <> text "; " <> b
+    convertOne Citation { citationId = k
+                        , citationPrefix = p
+                        , citationSuffix = s
+                        , citationMode = m
+                        }
+        = case m of
+               AuthorInText   -> citeCommand' "citealt"  p s k
+               SuppressAuthor -> citeCommand' "citeyear" p s k
+               NormalCitation -> citeCommand' "citealp"  p s k
+
+citeCommand :: PandocMonad m
+            => ([Inline] -> LW m (Doc Text))
+            -> Text
+            -> [Inline]
+            -> [Inline]
+            -> Text
+            -> LW m (Doc Text)
+citeCommand inlineListToLaTeX c p s k = do
+  args <- citeArguments inlineListToLaTeX p s k
+  return $ literal ("\\" <> c) <> args
+
+type Prefix = [Inline]
+type Suffix = [Inline]
+type CiteId = Text
+data CiteGroup = CiteGroup Prefix Suffix [CiteId]
+
+citeArgumentsList :: PandocMonad m
+              => ([Inline] -> LW m (Doc Text))
+              -> CiteGroup
+              -> LW m (Doc Text)
+citeArgumentsList _inlineListToLaTeX (CiteGroup _ _ []) = return empty
+citeArgumentsList inlineListToLaTeX (CiteGroup pfxs sfxs ids) = do
+      pdoc <- inlineListToLaTeX pfxs
+      sdoc <- inlineListToLaTeX sfxs'
+      return $ optargs pdoc sdoc <>
+              braces (literal (T.intercalate "," (reverse ids)))
+      where sfxs' = stripLocatorBraces $ case sfxs of
+                (Str t : r) -> case T.uncons t of
+                  Just (x, xs)
+                    | T.null xs
+                    , isPunctuation x -> dropWhile (== Space) r
+                    | isPunctuation x -> Str xs : r
+                  _ -> sfxs
+                _   -> sfxs
+            optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of
+                 (True, True ) -> empty
+                 (True, False) -> brackets sdoc
+                 (_   , _    ) -> brackets pdoc <> brackets sdoc
+
+citeArguments :: PandocMonad m
+              => ([Inline] -> LW m (Doc Text))
+              -> [Inline]
+              -> [Inline]
+              -> Text
+              -> LW m (Doc Text)
+citeArguments inlineListToLaTeX p s k =
+  citeArgumentsList inlineListToLaTeX (CiteGroup p s [k])
+
+-- strip off {} used to define locator in pandoc-citeproc; see #5722
+stripLocatorBraces :: [Inline] -> [Inline]
+stripLocatorBraces = walk go
+  where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs
+        go x        = x
+
+citationsToBiblatex :: PandocMonad m
+                    => ([Inline] -> LW m (Doc Text))
+                    -> [Citation] -> LW m (Doc Text)
+citationsToBiblatex inlineListToLaTeX [one]
+  = citeCommand inlineListToLaTeX cmd p s k
+    where
+       Citation { citationId = k
+                , citationPrefix = p
+                , citationSuffix = s
+                , citationMode = m
+                } = one
+       cmd = case m of
+                  SuppressAuthor -> "autocite*"
+                  AuthorInText   -> "textcite"
+                  NormalCitation -> "autocite"
+
+citationsToBiblatex inlineListToLaTeX (c:cs)
+  | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs)
+    = do
+      let cmd = case citationMode c of
+                    SuppressAuthor -> "\\autocite*"
+                    AuthorInText   -> "\\textcite"
+                    NormalCitation -> "\\autocite"
+      return $ text cmd <>
+               braces (literal (T.intercalate "," (map citationId (c:cs))))
+  | otherwise
+    = do
+      let cmd = case citationMode c of
+                    SuppressAuthor -> "\\autocites*"
+                    AuthorInText   -> "\\textcites"
+                    NormalCitation -> "\\autocites"
+
+      groups <- mapM (citeArgumentsList inlineListToLaTeX)
+                     (reverse (foldl' grouper [] (c:cs)))
+
+      return $ text cmd <> mconcat groups
+
+  where grouper prev cit = case prev of
+         ((CiteGroup oPfx oSfx ids):rest)
+             | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest
+         _ -> CiteGroup pfx sfx [cid] : prev
+         where pfx = citationPrefix cit
+               sfx = citationSuffix cit
+               cid = citationId cit
+
+citationsToBiblatex _ _ = return empty
-- 
cgit v1.2.3


From e8e5ffe1f4ecaff6f4e21af04ba593d64f4061f4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 2 Mar 2021 22:36:13 -0800
Subject: Split out T.P.Writers.LaTeX.Util.

---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/Writers/LaTeX.hs      | 254 ++-----------------------------
 src/Text/Pandoc/Writers/LaTeX/Util.hs | 274 ++++++++++++++++++++++++++++++++++
 3 files changed, 286 insertions(+), 243 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/LaTeX/Util.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 135ee2f26..d457a0620 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -667,6 +667,7 @@ library
                    Text.Pandoc.Writers.LaTeX.Lang,
                    Text.Pandoc.Writers.LaTeX.Types,
                    Text.Pandoc.Writers.LaTeX.Citation,
+                   Text.Pandoc.Writers.LaTeX.Util,
                    Text.Pandoc.Writers.Markdown.Types,
                    Text.Pandoc.Writers.Markdown.Inline,
                    Text.Pandoc.Writers.Roff,
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 84c96a507..180aaa44d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -18,10 +18,9 @@ module Text.Pandoc.Writers.LaTeX (
     writeLaTeX
   , writeBeamer
   ) where
-import Control.Applicative ((<|>))
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord)
-import Data.List (intersperse, nubBy, (\\), uncons)
+import Data.Char (isDigit)
+import Data.List (intersperse, nubBy, (\\))
 import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing)
 import qualified Data.Map as M
 import Data.Text (Text)
@@ -33,7 +32,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
-                                 styleToLaTeX, toListingsLanguage)
+                                 styleToLaTeX)
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
@@ -46,11 +45,12 @@ import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX)
 import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib,
                                            citationsToBiblatex)
 import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState)
-import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia,
-                                       toBabel)
+import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossia, toBabel)
+import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..),
+                                       toLabel, inCmd,
+                                       wrapDiv, hypertarget, labelFor,
+                                       getListingsLanguage, mbBraced)
 import Text.Pandoc.Writers.Shared
-import Text.Printf (printf)
-import qualified Data.Text.Normalize as Normalize
 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
 -- | Convert Pandoc to LaTeX.
@@ -244,152 +244,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
        Nothing  -> main
        Just tpl -> renderTemplate tpl context'
 
-data StringContext = TextString
-                   | URLString
-                   | CodeString
-                   deriving (Eq)
-
--- escape things as needed for LaTeX
-stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text
-stringToLaTeX context zs = do
-  opts <- gets stOptions
-  return $ T.pack $
-    foldr (go opts context) mempty $ T.unpack $
-    if writerPreferAscii opts
-       then Normalize.normalize Normalize.NFD zs
-       else zs
- where
-  go :: WriterOptions -> StringContext -> Char -> String -> String
-  go opts ctx x xs   =
-    let ligatures = isEnabled Ext_smart opts && ctx == TextString
-        isUrl = ctx == URLString
-        mbAccentCmd =
-          if writerPreferAscii opts && ctx == TextString
-             then uncons xs >>= \(c,_) -> lookupAccent c
-             else Nothing
-        emits s =
-          case mbAccentCmd of
-               Just cmd ->
-                 cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent
-               Nothing  -> s <> xs
-        emitc c =
-          case mbAccentCmd of
-               Just cmd ->
-                 cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent
-               Nothing  -> c : xs
-        emitcseq cs =
-          case xs of
-            c:_ | isLetter c
-                , ctx == TextString
-                             -> cs <> " " <> xs
-                | isSpace c  -> cs <> "{}" <> xs
-                | ctx == TextString
-                             -> cs <> xs
-            _ -> cs <> "{}" <> xs
-        emitquote cs =
-          case xs of
-            '`':_  -> cs <> "\\," <> xs -- add thin space
-            '\'':_ -> cs <> "\\," <> xs -- add thin space
-            _      -> cs <> xs
-    in case x of
-         '?' | ligatures ->  -- avoid ?` ligature
-           case xs of
-             '`':_ -> emits "?{}"
-             _     -> emitc x
-         '!' | ligatures ->  -- avoid !` ligature
-           case xs of
-             '`':_ -> emits "!{}"
-             _     -> emitc x
-         '{' -> emits "\\{"
-         '}' -> emits "\\}"
-         '`' | ctx == CodeString -> emitcseq "\\textasciigrave"
-         '$' | not isUrl -> emits "\\$"
-         '%' -> emits "\\%"
-         '&' -> emits "\\&"
-         '_' | not isUrl -> emits "\\_"
-         '#' -> emits "\\#"
-         '-' | not isUrl -> case xs of
-                     -- prevent adjacent hyphens from forming ligatures
-                     ('-':_) -> emits "-\\/"
-                     _       -> emitc '-'
-         '~' | not isUrl -> emitcseq "\\textasciitilde"
-         '^' -> emits "\\^{}"
-         '\\'| isUrl     -> emitc '/' -- NB. / works as path sep even on Windows
-             | otherwise -> emitcseq "\\textbackslash"
-         '|' | not isUrl -> emitcseq "\\textbar"
-         '<' -> emitcseq "\\textless"
-         '>' -> emitcseq "\\textgreater"
-         '[' -> emits "{[}"  -- to avoid interpretation as
-         ']' -> emits "{]}"  -- optional arguments
-         '\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
-         '\160' -> emits "~"
-         '\x200B' -> emits "\\hspace{0pt}"  -- zero-width space
-         '\x202F' -> emits "\\,"
-         '\x2026' -> emitcseq "\\ldots"
-         '\x2018' | ligatures -> emitquote "`"
-         '\x2019' | ligatures -> emitquote "'"
-         '\x201C' | ligatures -> emitquote "``"
-         '\x201D' | ligatures -> emitquote "''"
-         '\x2014' | ligatures -> emits "---"
-         '\x2013' | ligatures -> emits "--"
-         _ | writerPreferAscii opts
-             -> case x of
-                  'ı' -> emitcseq "\\i"
-                  'ȷ' -> emitcseq "\\j"
-                  'å' -> emitcseq "\\aa"
-                  'Å' -> emitcseq "\\AA"
-                  'ß' -> emitcseq "\\ss"
-                  'ø' -> emitcseq "\\o"
-                  'Ø' -> emitcseq "\\O"
-                  'Ł' -> emitcseq "\\L"
-                  'ł' -> emitcseq "\\l"
-                  'æ' -> emitcseq "\\ae"
-                  'Æ' -> emitcseq "\\AE"
-                  'œ' -> emitcseq "\\oe"
-                  'Œ' -> emitcseq "\\OE"
-                  '£' -> emitcseq "\\pounds"
-                  '€' -> emitcseq "\\euro"
-                  '©' -> emitcseq "\\copyright"
-                  _   -> emitc x
-           | otherwise -> emitc x
-
-lookupAccent :: Char -> Maybe String
-lookupAccent '\779'  = Just "\\H"
-lookupAccent '\768'  = Just "\\`"
-lookupAccent '\769'  = Just "\\'"
-lookupAccent '\770'  = Just "\\^"
-lookupAccent '\771'  = Just "\\~"
-lookupAccent '\776'  = Just "\\\""
-lookupAccent '\775'  = Just "\\."
-lookupAccent '\772'  = Just "\\="
-lookupAccent '\781'  = Just "\\|"
-lookupAccent '\817'  = Just "\\b"
-lookupAccent '\807'  = Just "\\c"
-lookupAccent '\783'  = Just "\\G"
-lookupAccent '\777'  = Just "\\h"
-lookupAccent '\803'  = Just "\\d"
-lookupAccent '\785'  = Just "\\f"
-lookupAccent '\778'  = Just "\\r"
-lookupAccent '\865'  = Just "\\t"
-lookupAccent '\782'  = Just "\\U"
-lookupAccent '\780'  = Just "\\v"
-lookupAccent '\774'  = Just "\\u"
-lookupAccent '\808'  = Just "\\k"
-lookupAccent '\8413' = Just "\\textcircled"
-lookupAccent _       = Nothing
-
-toLabel :: PandocMonad m => Text -> LW m Text
-toLabel z = go `fmap` stringToLaTeX URLString z
- where
-   go = T.concatMap $ \x -> case x of
-     _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x
-       | x `elemText` "_-+=:;." -> T.singleton x
-       | otherwise -> T.pack $ "ux" <> printf "%x" (ord x)
-
--- | Puts contents into LaTeX command.
-inCmd :: Text -> Doc Text -> Doc Text
-inCmd cmd contents = char '\\' <> literal cmd <> braces contents
-
 toSlides :: PandocMonad m => [Block] -> LW m [Block]
 toSlides bs = do
   opts <- gets stOptions
@@ -854,91 +708,6 @@ sectionHeader classes ident level lst = do
                                 braces txtNoNotes
                          else empty
 
-mapAlignment :: Text -> Text
-mapAlignment a = case a of
-                   "top" -> "T"
-                   "top-baseline" -> "t"
-                   "bottom" -> "b"
-                   "center" -> "c"
-                   _ -> a
-
-wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text)
-wrapDiv (_,classes,kvs) t = do
-  beamer <- gets stBeamer
-  let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
-  lang <- toLang $ lookup "lang" kvs
-  let wrapColumns = if beamer && "columns" `elem` classes
-                    then \contents ->
-                           let valign = maybe "T" mapAlignment (lookup "align" kvs)
-                               totalwidth = maybe [] (\x -> ["totalwidth=" <> x])
-                                 (lookup "totalwidth" kvs)
-                               onlytextwidth = filter ("onlytextwidth" ==) classes
-                               options = text $ T.unpack $ T.intercalate "," $
-                                 valign : totalwidth ++ onlytextwidth
-                           in inCmd "begin" "columns" <> brackets options
-                              $$ contents
-                              $$ inCmd "end" "columns"
-                    else id
-      wrapColumn  = if beamer && "column" `elem` classes
-                    then \contents ->
-                           let valign =
-                                 maybe ""
-                                 (brackets . text . T.unpack . mapAlignment)
-                                 (lookup "align" kvs)
-                               w = maybe "0.48" fromPct (lookup "width" kvs)
-                           in  inCmd "begin" "column" <>
-                               valign <>
-                               braces (literal w <> "\\textwidth")
-                               $$ contents
-                               $$ inCmd "end" "column"
-                    else id
-      fromPct xs =
-        case T.unsnoc xs of
-          Just (ds, '%') -> case safeRead ds of
-                              Just digits -> showFl (digits / 100 :: Double)
-                              Nothing -> xs
-          _              -> xs
-      wrapDir = case lookup "dir" kvs of
-                  Just "rtl" -> align "RTL"
-                  Just "ltr" -> align "LTR"
-                  _          -> id
-      wrapLang txt = case lang of
-                       Just lng -> let (l, o) = toPolyglossiaEnv lng
-                                       ops = if T.null o
-                                             then ""
-                                             else brackets $ literal o
-                                   in  inCmd "begin" (literal l) <> ops
-                                       $$ blankline <> txt <> blankline
-                                       $$ inCmd "end" (literal l)
-                       Nothing  -> txt
-  return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t
-
-hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text)
-hypertarget _ "" x    = return x
-hypertarget addnewline ident x = do
-  ref <- literal `fmap` toLabel ident
-  return $ text "\\hypertarget"
-              <> braces ref
-              <> braces ((if addnewline && not (isEmpty x)
-                             then "%" <> cr
-                             else empty) <> x)
-
-labelFor :: PandocMonad m => Text -> LW m (Doc Text)
-labelFor ""    = return empty
-labelFor ident = do
-  ref <- literal `fmap` toLabel ident
-  return $ text "\\label" <> braces ref
-
--- Determine listings language from list of class attributes.
-getListingsLanguage :: [Text] -> Maybe Text
-getListingsLanguage xs
-  = foldr ((<|>) . toListingsLanguage) Nothing xs
-
-mbBraced :: Text -> Text
-mbBraced x = if not (T.all isAlphaNum x)
-                then "{" <> x <> "}"
-                else x
-
 -- | Convert list of inline elements to LaTeX.
 inlineListToLaTeX :: PandocMonad m
                   => [Inline]  -- ^ Inlines to convert
@@ -963,10 +732,6 @@ inlineListToLaTeX lst = hcat <$>
            fixInitialLineBreaks xs
        fixInitialLineBreaks xs = xs
 
-isQuoted :: Inline -> Bool
-isQuoted (Quoted _ _) = True
-isQuoted _            = False
-
 -- | Convert inline element to LaTeX
 inlineToLaTeX :: PandocMonad m
               => Inline    -- ^ Inline to convert
@@ -1102,6 +867,9 @@ inlineToLaTeX (Quoted qt lst) = do
                    if isEnabled Ext_smart opts
                       then char '`' <> inner <> char '\''
                       else char '\x2018' <> inner <> char '\x2019'
+    where
+      isQuoted (Quoted _ _) = True
+      isQuoted _            = False
 inlineToLaTeX (Str str) = do
   setEmptyLine False
   liftM literal $ stringToLaTeX TextString str
diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs
new file mode 100644
index 000000000..56bb792ae
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs
@@ -0,0 +1,274 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+   Module      : Text.Pandoc.Writers.LaTeX.Util
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Writers.LaTeX.Util (
+    stringToLaTeX
+  , StringContext(..)
+  , toLabel
+  , inCmd
+  , wrapDiv
+  , hypertarget
+  , labelFor
+  , getListingsLanguage
+  , mbBraced
+  )
+where
+
+import Control.Applicative ((<|>))
+import Text.Pandoc.Class (PandocMonad, toLang)
+import Text.Pandoc.Options (WriterOptions(..), isEnabled)
+import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..))
+import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv)
+import Text.Pandoc.Highlighting (toListingsLanguage)
+import Text.DocLayout
+import Text.Pandoc.Definition
+import Text.Pandoc.ImageSize (showFl)
+import Control.Monad.State.Strict (gets)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Extensions (Extension(Ext_smart))
+import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum)
+import Text.Printf (printf)
+import Text.Pandoc.Shared (safeRead, elemText)
+import qualified Data.Text.Normalize as Normalize
+import Data.List (uncons)
+
+data StringContext = TextString
+                   | URLString
+                   | CodeString
+                   deriving (Eq)
+
+-- escape things as needed for LaTeX
+stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text
+stringToLaTeX context zs = do
+  opts <- gets stOptions
+  return $ T.pack $
+    foldr (go opts context) mempty $ T.unpack $
+    if writerPreferAscii opts
+       then Normalize.normalize Normalize.NFD zs
+       else zs
+ where
+  go :: WriterOptions -> StringContext -> Char -> String -> String
+  go opts ctx x xs   =
+    let ligatures = isEnabled Ext_smart opts && ctx == TextString
+        isUrl = ctx == URLString
+        mbAccentCmd =
+          if writerPreferAscii opts && ctx == TextString
+             then uncons xs >>= \(c,_) -> lookupAccent c
+             else Nothing
+        emits s =
+          case mbAccentCmd of
+               Just cmd ->
+                 cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent
+               Nothing  -> s <> xs
+        emitc c =
+          case mbAccentCmd of
+               Just cmd ->
+                 cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent
+               Nothing  -> c : xs
+        emitcseq cs =
+          case xs of
+            c:_ | isLetter c
+                , ctx == TextString
+                             -> cs <> " " <> xs
+                | isSpace c  -> cs <> "{}" <> xs
+                | ctx == TextString
+                             -> cs <> xs
+            _ -> cs <> "{}" <> xs
+        emitquote cs =
+          case xs of
+            '`':_  -> cs <> "\\," <> xs -- add thin space
+            '\'':_ -> cs <> "\\," <> xs -- add thin space
+            _      -> cs <> xs
+    in case x of
+         '?' | ligatures ->  -- avoid ?` ligature
+           case xs of
+             '`':_ -> emits "?{}"
+             _     -> emitc x
+         '!' | ligatures ->  -- avoid !` ligature
+           case xs of
+             '`':_ -> emits "!{}"
+             _     -> emitc x
+         '{' -> emits "\\{"
+         '}' -> emits "\\}"
+         '`' | ctx == CodeString -> emitcseq "\\textasciigrave"
+         '$' | not isUrl -> emits "\\$"
+         '%' -> emits "\\%"
+         '&' -> emits "\\&"
+         '_' | not isUrl -> emits "\\_"
+         '#' -> emits "\\#"
+         '-' | not isUrl -> case xs of
+                     -- prevent adjacent hyphens from forming ligatures
+                     ('-':_) -> emits "-\\/"
+                     _       -> emitc '-'
+         '~' | not isUrl -> emitcseq "\\textasciitilde"
+         '^' -> emits "\\^{}"
+         '\\'| isUrl     -> emitc '/' -- NB. / works as path sep even on Windows
+             | otherwise -> emitcseq "\\textbackslash"
+         '|' | not isUrl -> emitcseq "\\textbar"
+         '<' -> emitcseq "\\textless"
+         '>' -> emitcseq "\\textgreater"
+         '[' -> emits "{[}"  -- to avoid interpretation as
+         ']' -> emits "{]}"  -- optional arguments
+         '\'' | ctx == CodeString -> emitcseq "\\textquotesingle"
+         '\160' -> emits "~"
+         '\x200B' -> emits "\\hspace{0pt}"  -- zero-width space
+         '\x202F' -> emits "\\,"
+         '\x2026' -> emitcseq "\\ldots"
+         '\x2018' | ligatures -> emitquote "`"
+         '\x2019' | ligatures -> emitquote "'"
+         '\x201C' | ligatures -> emitquote "``"
+         '\x201D' | ligatures -> emitquote "''"
+         '\x2014' | ligatures -> emits "---"
+         '\x2013' | ligatures -> emits "--"
+         _ | writerPreferAscii opts
+             -> case x of
+                  'ı' -> emitcseq "\\i"
+                  'ȷ' -> emitcseq "\\j"
+                  'å' -> emitcseq "\\aa"
+                  'Å' -> emitcseq "\\AA"
+                  'ß' -> emitcseq "\\ss"
+                  'ø' -> emitcseq "\\o"
+                  'Ø' -> emitcseq "\\O"
+                  'Ł' -> emitcseq "\\L"
+                  'ł' -> emitcseq "\\l"
+                  'æ' -> emitcseq "\\ae"
+                  'Æ' -> emitcseq "\\AE"
+                  'œ' -> emitcseq "\\oe"
+                  'Œ' -> emitcseq "\\OE"
+                  '£' -> emitcseq "\\pounds"
+                  '€' -> emitcseq "\\euro"
+                  '©' -> emitcseq "\\copyright"
+                  _   -> emitc x
+           | otherwise -> emitc x
+
+lookupAccent :: Char -> Maybe String
+lookupAccent '\779'  = Just "\\H"
+lookupAccent '\768'  = Just "\\`"
+lookupAccent '\769'  = Just "\\'"
+lookupAccent '\770'  = Just "\\^"
+lookupAccent '\771'  = Just "\\~"
+lookupAccent '\776'  = Just "\\\""
+lookupAccent '\775'  = Just "\\."
+lookupAccent '\772'  = Just "\\="
+lookupAccent '\781'  = Just "\\|"
+lookupAccent '\817'  = Just "\\b"
+lookupAccent '\807'  = Just "\\c"
+lookupAccent '\783'  = Just "\\G"
+lookupAccent '\777'  = Just "\\h"
+lookupAccent '\803'  = Just "\\d"
+lookupAccent '\785'  = Just "\\f"
+lookupAccent '\778'  = Just "\\r"
+lookupAccent '\865'  = Just "\\t"
+lookupAccent '\782'  = Just "\\U"
+lookupAccent '\780'  = Just "\\v"
+lookupAccent '\774'  = Just "\\u"
+lookupAccent '\808'  = Just "\\k"
+lookupAccent '\8413' = Just "\\textcircled"
+lookupAccent _       = Nothing
+
+toLabel :: PandocMonad m => Text -> LW m Text
+toLabel z = go `fmap` stringToLaTeX URLString z
+ where
+   go = T.concatMap $ \x -> case x of
+     _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x
+       | x `elemText` "_-+=:;." -> T.singleton x
+       | otherwise -> T.pack $ "ux" <> printf "%x" (ord x)
+
+-- | Puts contents into LaTeX command.
+inCmd :: Text -> Doc Text -> Doc Text
+inCmd cmd contents = char '\\' <> literal cmd <> braces contents
+
+mapAlignment :: Text -> Text
+mapAlignment a = case a of
+                   "top" -> "T"
+                   "top-baseline" -> "t"
+                   "bottom" -> "b"
+                   "center" -> "c"
+                   _ -> a
+
+wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text)
+wrapDiv (_,classes,kvs) t = do
+  beamer <- gets stBeamer
+  let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+  lang <- toLang $ lookup "lang" kvs
+  let wrapColumns = if beamer && "columns" `elem` classes
+                    then \contents ->
+                           let valign = maybe "T" mapAlignment (lookup "align" kvs)
+                               totalwidth = maybe [] (\x -> ["totalwidth=" <> x])
+                                 (lookup "totalwidth" kvs)
+                               onlytextwidth = filter ("onlytextwidth" ==) classes
+                               options = text $ T.unpack $ T.intercalate "," $
+                                 valign : totalwidth ++ onlytextwidth
+                           in inCmd "begin" "columns" <> brackets options
+                              $$ contents
+                              $$ inCmd "end" "columns"
+                    else id
+      wrapColumn  = if beamer && "column" `elem` classes
+                    then \contents ->
+                           let valign =
+                                 maybe ""
+                                 (brackets . text . T.unpack . mapAlignment)
+                                 (lookup "align" kvs)
+                               w = maybe "0.48" fromPct (lookup "width" kvs)
+                           in  inCmd "begin" "column" <>
+                               valign <>
+                               braces (literal w <> "\\textwidth")
+                               $$ contents
+                               $$ inCmd "end" "column"
+                    else id
+      fromPct xs =
+        case T.unsnoc xs of
+          Just (ds, '%') -> case safeRead ds of
+                              Just digits -> showFl (digits / 100 :: Double)
+                              Nothing -> xs
+          _              -> xs
+      wrapDir = case lookup "dir" kvs of
+                  Just "rtl" -> align "RTL"
+                  Just "ltr" -> align "LTR"
+                  _          -> id
+      wrapLang txt = case lang of
+                       Just lng -> let (l, o) = toPolyglossiaEnv lng
+                                       ops = if T.null o
+                                             then ""
+                                             else brackets $ literal o
+                                   in  inCmd "begin" (literal l) <> ops
+                                       $$ blankline <> txt <> blankline
+                                       $$ inCmd "end" (literal l)
+                       Nothing  -> txt
+  return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t
+
+hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text)
+hypertarget _ "" x    = return x
+hypertarget addnewline ident x = do
+  ref <- literal `fmap` toLabel ident
+  return $ text "\\hypertarget"
+              <> braces ref
+              <> braces ((if addnewline && not (isEmpty x)
+                             then "%" <> cr
+                             else empty) <> x)
+
+labelFor :: PandocMonad m => Text -> LW m (Doc Text)
+labelFor ""    = return empty
+labelFor ident = do
+  ref <- literal `fmap` toLabel ident
+  return $ text "\\label" <> braces ref
+
+-- Determine listings language from list of class attributes.
+getListingsLanguage :: [Text] -> Maybe Text
+getListingsLanguage xs
+  = foldr ((<|>) . toListingsLanguage) Nothing xs
+
+mbBraced :: Text -> Text
+mbBraced x = if not (T.all isAlphaNum x)
+                then "{" <> x <> "}"
+                else x
+
+
-- 
cgit v1.2.3


From bbcc1501a5fa6b40ded88f6738d35ce7a8079313 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 10:05:46 -0800
Subject: Split out T.P.Readers.LaTeX.Inline.

---
 pandoc.cabal                            |  11 +-
 src/Text/Pandoc/Readers/LaTeX.hs        | 474 ++++++++++----------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 275 ++++++++++++++++++
 3 files changed, 419 insertions(+), 341 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Inline.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index d457a0620..211327642 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,15 +626,16 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Types,
-                   Text.Pandoc.Readers.LaTeX.Parsing,
-                   Text.Pandoc.Readers.LaTeX.Lang,
-                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Accent,
+                   Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
+                   Text.Pandoc.Readers.LaTeX.Lang,
+                   Text.Pandoc.Readers.LaTeX.Macro,
                    Text.Pandoc.Readers.LaTeX.Math,
+                   Text.Pandoc.Readers.LaTeX.Parsing,
+                   Text.Pandoc.Readers.LaTeX.SIunitx,
                    Text.Pandoc.Readers.LaTeX.Table,
-                   Text.Pandoc.Readers.LaTeX.Macro,
+                   Text.Pandoc.Readers.LaTeX.Types,
                    Text.Pandoc.Readers.Odt.Base,
                    Text.Pandoc.Readers.Odt.Namespaces,
                    Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index fc85f0545..a27135fd2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,13 +35,13 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Pandoc.BCP47 (Lang (..), renderLang)
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report, setResourcePath,
-                                      translateTerm)
+                                      readFileFromDirs, report,
+                                      setResourcePath)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
-import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
+import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
@@ -61,10 +61,12 @@ import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
+                                         nameCommands, charCommands,
+                                         verbCommands, rawInlineOr,
+                                         listingsLanguage)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.Translations as Translations
 import Text.Pandoc.Walk
-import qualified Text.Pandoc.Builder as B
 import Safe
 
 -- for debugging:
@@ -317,76 +319,6 @@ blockquote cvariant mblang = do
   optional $ symbolIn (".:;?!" :: [Char])  -- currently ignored
   return $ blockQuote . langdiv $ (bs <> citepar)
 
-doAcronym :: PandocMonad m => Text -> LP m Inlines
-doAcronym form = do
-  acro <- braced
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "singular+" <> form)])
-    $ str $ untokenize acro]
-
-doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
-doAcronymPlural form = do
-  acro <- braced
-  plural <- lit "s"
-  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
-    ("acronym-form", "plural+" <> form)]) $
-   mconcat [str $ untokenize acro, plural]]
-
-doverb :: PandocMonad m => LP m Inlines
-doverb = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  withVerbatimMode $
-    code . untokenize <$>
-      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
-
-verbTok :: PandocMonad m => Char -> LP m Tok
-verbTok stopchar = do
-  t@(Tok pos toktype txt) <- anyTok
-  case T.findIndex (== stopchar) txt of
-       Nothing -> return t
-       Just i  -> do
-         let (t1, t2) = T.splitAt i txt
-         inp <- getInput
-         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
-                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
-         return $ Tok pos toktype t1
-
-listingsLanguage :: [(Text, Text)] -> Maybe Text
-listingsLanguage opts =
-  case lookup "language" opts of
-    Nothing  -> Nothing
-    Just l   -> fromListingsLanguage l `mplus` Just l
-
-dolstinline :: PandocMonad m => LP m Inlines
-dolstinline = do
-  options <- option [] keyvals
-  let classes = maybeToList $ listingsLanguage options
-  doinlinecode classes
-
-domintinline :: PandocMonad m => LP m Inlines
-domintinline = do
-  skipopts
-  cls <- untokenize <$> braced
-  doinlinecode [cls]
-
-doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
-doinlinecode classes = do
-  Tok _ Symbol t <- anySymbol
-  marker <- case T.uncons t of
-              Just (c, ts) | T.null ts -> return c
-              _            -> mzero
-  let stopchar = if marker == '{' then '}' else marker
-  withVerbatimMode $
-    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
-      manyTill (verbTok stopchar) (symbol stopchar)
-
-nlToSpace :: Char -> Char
-nlToSpace '\n' = ' '
-nlToSpace x    = x
-
 inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
@@ -405,9 +337,6 @@ inlineCommand' = try $ do
 tok :: PandocMonad m => LP m Inlines
 tok = tokWith inline
 
-inBrackets :: Inlines -> Inlines
-inBrackets x = str "[" <> x <> str "]"
-
 unescapeURL :: Text -> Text
 unescapeURL = T.concat . go . T.splitOn "\\"
   where
@@ -420,234 +349,136 @@ unescapeURL = T.concat . go . T.splitOn "\\"
       | otherwise = "\\" <> t
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands =
-  M.union inlineLanguageCommands $
-  M.union (accentCommands tok) $
-  M.union (citationCommands inline) $
-  M.union (siunitxCommands tok) $
-  M.fromList
-  [ ("emph", extractSpaces emph <$> tok)
-  , ("textit", extractSpaces emph <$> tok)
-  , ("textsl", extractSpaces emph <$> tok)
-  , ("textsc", extractSpaces smallcaps <$> tok)
-  , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
-  , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
-  , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
-  , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
-  , ("texttt", ttfamily)
-  , ("sout", extractSpaces strikeout <$> tok)
-  , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-  , ("lq", return (str "‘"))
-  , ("rq", return (str "’"))
-  , ("textquoteleft", return (str "‘"))
-  , ("textquoteright", return (str "’"))
-  , ("textquotedblleft", return (str "“"))
-  , ("textquotedblright", return (str "”"))
-  , ("textsuperscript", extractSpaces superscript <$> tok)
-  , ("textsubscript", extractSpaces subscript <$> tok)
-  , ("textbackslash", lit "\\")
-  , ("backslash", lit "\\")
-  , ("slash", lit "/")
-  , ("textbf", extractSpaces strong <$> tok)
-  , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
-  , ("underline", underline <$> tok)
-  , ("ldots", lit "…")
-  , ("vdots", lit "\8942")
-  , ("dots", lit "…")
-  , ("mdots", lit "…")
-  , ("sim", lit "~")
-  , ("sep", lit ",")
-  , ("label", rawInlineOr "label" dolabel)
-  , ("ref", rawInlineOr "ref" $ doref "ref")
-  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
-  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
-  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
-  , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
-  , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
-  , ("lettrine", rawInlineOr "lettrine" lettrine)
-  , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
-  , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
-  , ("ensuremath", mathInline . untokenize <$> braced)
-  , ("texorpdfstring", const <$> tok <*> tok)
-  , ("P", lit "¶")
-  , ("S", lit "§")
-  , ("$", lit "$")
-  , ("%", lit "%")
-  , ("&", lit "&")
-  , ("#", lit "#")
-  , ("_", lit "_")
-  , ("{", lit "{")
-  , ("}", lit "}")
-  , ("qed", lit "\a0\x25FB")
-  -- old TeX commands
-  , ("em", extractSpaces emph <$> inlines)
-  , ("it", extractSpaces emph <$> inlines)
-  , ("sl", extractSpaces emph <$> inlines)
-  , ("bf", extractSpaces strong <$> inlines)
-  , ("tt", code . stringify . toList <$> inlines)
-  , ("rm", inlines)
-  , ("itshape", extractSpaces emph <$> inlines)
-  , ("slshape", extractSpaces emph <$> inlines)
-  , ("scshape", extractSpaces smallcaps <$> inlines)
-  , ("bfseries", extractSpaces strong <$> inlines)
-  , ("MakeUppercase", makeUppercase <$> tok)
-  , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
-  , ("uppercase", makeUppercase <$> tok)
-  , ("MakeLowercase", makeLowercase <$> tok)
-  , ("MakeTextLowercase", makeLowercase <$> tok)
-  , ("lowercase", makeLowercase <$> tok)
-  , ("/", pure mempty) -- italic correction
-  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
-                            guard $ not inTableCell
-                            optional rawopt
-                            spaces))
-  , (",", lit "\8198")
-  , ("@", pure mempty)
-  , (" ", lit "\160")
-  , ("ps", pure $ str "PS." <> space)
-  , ("TeX", lit "TeX")
-  , ("LaTeX", lit "LaTeX")
-  , ("bar", lit "|")
-  , ("textless", lit "<")
-  , ("textgreater", lit ">")
-  , ("thanks", skipopts >> note <$> grouped block)
-  , ("footnote", skipopts >> note <$> grouped block)
-  , ("passthrough", tok) -- \passthrough macro used by latex writer
-                         -- for listings
-  , ("verb", doverb)
-  , ("lstinline", dolstinline)
-  , ("mintinline", domintinline)
-  , ("Verb", doverb)
-  , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
-                  bracedUrl)
-  , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
-  , ("href", do url <- bracedUrl
-                sp
-                link (unescapeURL $ untokenize url) "" <$> tok)
-  , ("includegraphics", do options <- option [] keyvals
-                           src <- braced
-                           mkImage options . unescapeURL . removeDoubleQuotes $
-                               untokenize src)
-  , ("enquote*", enquote True Nothing)
-  , ("enquote", enquote False Nothing)
-  -- foreignquote is supposed to use native quote marks
-  , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-  , ("foreignquote", braced >>= enquote False . Just . untokenize)
-  -- hypehnquote uses regular quotes
-  , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-  , ("hyphenquote", braced >>= enquote False . Just . untokenize)
-  , ("figurename", doTerm Translations.Figure)
-  , ("prefacename", doTerm Translations.Preface)
-  , ("refname", doTerm Translations.References)
-  , ("bibname", doTerm Translations.Bibliography)
-  , ("chaptername", doTerm Translations.Chapter)
-  , ("partname", doTerm Translations.Part)
-  , ("contentsname", doTerm Translations.Contents)
-  , ("listfigurename", doTerm Translations.ListOfFigures)
-  , ("listtablename", doTerm Translations.ListOfTables)
-  , ("indexname", doTerm Translations.Index)
-  , ("abstractname", doTerm Translations.Abstract)
-  , ("tablename", doTerm Translations.Table)
-  , ("enclname", doTerm Translations.Encl)
-  , ("ccname", doTerm Translations.Cc)
-  , ("headtoname", doTerm Translations.To)
-  , ("pagename", doTerm Translations.Page)
-  , ("seename", doTerm Translations.See)
-  , ("seealsoname", doTerm Translations.SeeAlso)
-  , ("proofname", doTerm Translations.Proof)
-  , ("glossaryname", doTerm Translations.Glossary)
-  , ("lstlistingname", doTerm Translations.Listing)
-  , ("hyperlink", hyperlink)
-  , ("hypertarget", hypertargetInline)
-  -- glossaries package
-  , ("gls", doAcronym "short")
-  , ("Gls", doAcronym "short")
-  , ("glsdesc", doAcronym "long")
-  , ("Glsdesc", doAcronym "long")
-  , ("GLSdesc", doAcronym "long")
-  , ("acrlong", doAcronym "long")
-  , ("Acrlong", doAcronym "long")
-  , ("acrfull", doAcronym "full")
-  , ("Acrfull", doAcronym "full")
-  , ("acrshort", doAcronym "abbrv")
-  , ("Acrshort", doAcronym "abbrv")
-  , ("glspl", doAcronymPlural "short")
-  , ("Glspl", doAcronymPlural "short")
-  , ("glsdescplural", doAcronymPlural "long")
-  , ("Glsdescplural", doAcronymPlural "long")
-  , ("GLSdescplural", doAcronymPlural "long")
-  -- acronyms package
-  , ("ac", doAcronym "short")
-  , ("acf", doAcronym "full")
-  , ("acs", doAcronym "abbrv")
-  , ("acl", doAcronym "long")
-  , ("acp", doAcronymPlural "short")
-  , ("acfp", doAcronymPlural "full")
-  , ("acsp", doAcronymPlural "abbrv")
-  , ("aclp", doAcronymPlural "long")
-  , ("Ac", doAcronym "short")
-  , ("Acf", doAcronym "full")
-  , ("Acs", doAcronym "abbrv")
-  , ("Acl", doAcronym "long")
-  , ("Acp", doAcronymPlural "short")
-  , ("Acfp", doAcronymPlural "full")
-  , ("Acsp", doAcronymPlural "abbrv")
-  , ("Aclp", doAcronymPlural "long")
-  -- hyphenat
-  , ("bshyp", lit "\\\173")
-  , ("fshyp", lit "/\173")
-  , ("dothyp", lit ".\173")
-  , ("colonhyp", lit ":\173")
-  , ("hyp", lit "-")
-  , ("nohyphens", tok)
-  , ("textnhtt", ttfamily)
-  , ("nhttfamily", ttfamily)
-  -- LaTeX colors
-  , ("textcolor", coloredInline "color")
-  , ("colorbox", coloredInline "background-color")
-  -- fontawesome
-  , ("faCheck", lit "\10003")
-  , ("faClose", lit "\10007")
-  -- xspace
-  , ("xspace", doxspace)
-  -- etoolbox
-  , ("ifstrequal", ifstrequal)
-  , ("newtoggle", braced >>= newToggle)
-  , ("toggletrue", braced >>= setToggle True)
-  , ("togglefalse", braced >>= setToggle False)
-  , ("iftoggle", try $ ifToggle >> inline)
-  -- biblatex misc
-  , ("RN", romanNumeralUpper)
-  , ("Rn", romanNumeralLower)
-  -- babel
-  , ("foreignlanguage", foreignlanguage)
-  -- include
-  , ("input", rawInlineOr "input" $ include "input")
-  -- soul package
-  , ("ul", underline <$> tok)
-  -- ulem package
-  , ("uline", underline <$> tok)
-  -- plain tex stuff that should just be passed through as raw tex
-  , ("ifdim", ifdim)
-  -- bibtex
-  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-  , ("mkbibparens",
-       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-  , ("mkbibbrackets",
-       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-  , ("autocap", spanWith nullAttr <$> tok)
-  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-  , ("bibstring",
-       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-         <$> braced)
-  , ("adddot", pure (str "."))
-  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-  , ("addabbrvspace", pure space)
-  , ("hyphen", pure (str "-"))
-  ]
+inlineCommands = M.unions
+  [ inlineLanguageCommands
+  , accentCommands tok
+  , citationCommands inline
+  , siunitxCommands tok
+  , acronymCommands
+  , refCommands
+  , nameCommands
+  , verbCommands
+  , charCommands
+  , rest ]
+ where
+  rest = M.fromList
+    [ ("emph", extractSpaces emph <$> tok)
+    , ("textit", extractSpaces emph <$> tok)
+    , ("textsl", extractSpaces emph <$> tok)
+    , ("textsc", extractSpaces smallcaps <$> tok)
+    , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok)
+    , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok)
+    , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok)
+    , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok)
+    , ("texttt", ttfamily)
+    , ("sout", extractSpaces strikeout <$> tok)
+    , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
+    , ("lq", return (str "‘"))
+    , ("rq", return (str "’"))
+    , ("textquoteleft", return (str "‘"))
+    , ("textquoteright", return (str "’"))
+    , ("textquotedblleft", return (str "“"))
+    , ("textquotedblright", return (str "”"))
+    , ("textsuperscript", extractSpaces superscript <$> tok)
+    , ("textsubscript", extractSpaces subscript <$> tok)
+    , ("textbf", extractSpaces strong <$> tok)
+    , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok)
+    , ("underline", underline <$> tok)
+    , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
+    , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
+    , ("lettrine", rawInlineOr "lettrine" lettrine)
+    , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+    , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+    , ("ensuremath", mathInline . untokenize <$> braced)
+    , ("texorpdfstring", const <$> tok <*> tok)
+    -- old TeX commands
+    , ("em", extractSpaces emph <$> inlines)
+    , ("it", extractSpaces emph <$> inlines)
+    , ("sl", extractSpaces emph <$> inlines)
+    , ("bf", extractSpaces strong <$> inlines)
+    , ("tt", code . stringify . toList <$> inlines)
+    , ("rm", inlines)
+    , ("itshape", extractSpaces emph <$> inlines)
+    , ("slshape", extractSpaces emph <$> inlines)
+    , ("scshape", extractSpaces smallcaps <$> inlines)
+    , ("bfseries", extractSpaces strong <$> inlines)
+    , ("MakeUppercase", makeUppercase <$> tok)
+    , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase
+    , ("uppercase", makeUppercase <$> tok)
+    , ("MakeLowercase", makeLowercase <$> tok)
+    , ("MakeTextLowercase", makeLowercase <$> tok)
+    , ("lowercase", makeLowercase <$> tok)
+    , ("thanks", skipopts >> note <$> grouped block)
+    , ("footnote", skipopts >> note <$> grouped block)
+    , ("passthrough", tok) -- \passthrough macro used by latex writer
+                           -- for listings
+    , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
+                    bracedUrl)
+    , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
+    , ("href", do url <- bracedUrl
+                  sp
+                  link (unescapeURL $ untokenize url) "" <$> tok)
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
+    , ("enquote*", enquote True Nothing)
+    , ("enquote", enquote False Nothing)
+    -- foreignquote is supposed to use native quote marks
+    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
+    , ("foreignquote", braced >>= enquote False . Just . untokenize)
+    -- hypehnquote uses regular quotes
+    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
+    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
+    , ("hyperlink", hyperlink)
+    , ("hypertarget", hypertargetInline)
+    -- hyphenat
+    , ("nohyphens", tok)
+    , ("textnhtt", ttfamily)
+    , ("nhttfamily", ttfamily)
+    -- LaTeX colors
+    , ("textcolor", coloredInline "color")
+    , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
+    -- etoolbox
+    , ("ifstrequal", ifstrequal)
+    , ("newtoggle", braced >>= newToggle)
+    , ("toggletrue", braced >>= setToggle True)
+    , ("togglefalse", braced >>= setToggle False)
+    , ("iftoggle", try $ ifToggle >> inline)
+    -- biblatex misc
+    , ("RN", romanNumeralUpper)
+    , ("Rn", romanNumeralLower)
+    -- babel
+    , ("foreignlanguage", foreignlanguage)
+    -- include
+    , ("input", rawInlineOr "input" $ include "input")
+    -- soul package
+    , ("ul", underline <$> tok)
+    -- ulem package
+    , ("uline", underline <$> tok)
+    -- plain tex stuff that should just be passed through as raw tex
+    , ("ifdim", ifdim)
+    -- bibtex
+    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+    , ("mkbibparens",
+         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+    , ("mkbibbrackets",
+         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+    , ("autocap", spanWith nullAttr <$> tok)
+    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+    , ("bibstring",
+         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+           <$> braced)
+    , ("adddot", pure (str "."))
+    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+    , ("addabbrvspace", pure space)
+    , ("hyphen", pure (str "-"))
+    ]
 
 lettrine :: PandocMonad m => LP m Inlines
 lettrine = do
@@ -766,9 +597,6 @@ ifToggle = do
                   report $ UndefinedToggle name' pos
   return ()
 
-doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
-doTerm term = str <$> translateTerm term
-
 ifstrequal :: (PandocMonad m, Monoid a) => LP m a
 ifstrequal = do
   str1 <- tok
@@ -789,13 +617,6 @@ coloredInline stylename = do
 ttfamily :: PandocMonad m => LP m Inlines
 ttfamily = code . stringify . toList <$> tok
 
-rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
-rawInlineOr name' fallback = do
-  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
-  if parseRaw
-     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
-     else fallback
-
 processHBox :: Inlines -> Inlines
 processHBox = walk convert
   where
@@ -846,25 +667,6 @@ treatAsInline = Set.fromList
   , "pagebreak"
   ]
 
-dolabel :: PandocMonad m => LP m Inlines
-dolabel = do
-  v <- braced
-  let refstr = untokenize v
-  updateState $ \st ->
-    st{ sLastLabel = Just refstr }
-  return $ spanWith (refstr,[],[("label", refstr)])
-    $ inBrackets $ str $ untokenize v
-
-doref :: PandocMonad m => Text -> LP m Inlines
-doref cls = do
-  v <- braced
-  let refstr = untokenize v
-  return $ linkWith ("",[],[ ("reference-type", cls)
-                           , ("reference", refstr)])
-                    ("#" <> refstr)
-                    ""
-                    (inBrackets $ str refstr)
-
 lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v
 lookupListDefault d = (fromMaybe d .) . lookupList
   where lookupList l m = msum $ map (`M.lookup` m) l
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
new file mode 100644
index 000000000..66014a77f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Inline
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Inline
+  ( acronymCommands
+  , verbCommands
+  , charCommands
+  , nameCommands
+  , refCommands
+  , rawInlineOr
+  , listingsLanguage
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
+import Control.Applicative (optional)
+import Control.Monad (guard, mzero, mplus)
+import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
+import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
+                            manyTill, getInput, setInput, incSourceColumn,
+                            option)
+import Text.Pandoc.Highlighting (fromListingsLanguage,)
+import Data.Maybe (maybeToList)
+import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Text.Pandoc.Translations as Translations
+
+rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
+rawInlineOr name' fallback = do
+  parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions
+  if parseRaw
+     then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
+     else fallback
+
+dolabel :: PandocMonad m => LP m Inlines
+dolabel = do
+  v <- braced
+  let refstr = untokenize v
+  updateState $ \st ->
+    st{ sLastLabel = Just refstr }
+  return $ spanWith (refstr,[],[("label", refstr)])
+    $ inBrackets $ str $ untokenize v
+
+doref :: PandocMonad m => Text -> LP m Inlines
+doref cls = do
+  v <- braced
+  let refstr = untokenize v
+  return $ linkWith ("",[],[ ("reference-type", cls)
+                           , ("reference", refstr)])
+                    ("#" <> refstr)
+                    ""
+                    (inBrackets $ str refstr)
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = str "[" <> x <> str "]"
+
+doTerm :: PandocMonad m => Translations.Term -> LP m Inlines
+doTerm term = str <$> translateTerm term
+
+lit :: Text -> LP m Inlines
+lit = pure . str
+
+doverb :: PandocMonad m => LP m Inlines
+doverb = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  withVerbatimMode $
+    code . untokenize <$>
+      manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
+
+verbTok :: PandocMonad m => Char -> LP m Tok
+verbTok stopchar = do
+  t@(Tok pos toktype txt) <- anyTok
+  case T.findIndex (== stopchar) txt of
+       Nothing -> return t
+       Just i  -> do
+         let (t1, t2) = T.splitAt i txt
+         inp <- getInput
+         setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar)
+                  : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
+         return $ Tok pos toktype t1
+
+listingsLanguage :: [(Text, Text)] -> Maybe Text
+listingsLanguage opts =
+  case lookup "language" opts of
+    Nothing  -> Nothing
+    Just l   -> fromListingsLanguage l `mplus` Just l
+
+dolstinline :: PandocMonad m => LP m Inlines
+dolstinline = do
+  options <- option [] keyvals
+  let classes = maybeToList $ listingsLanguage options
+  doinlinecode classes
+
+domintinline :: PandocMonad m => LP m Inlines
+domintinline = do
+  skipopts
+  cls <- untokenize <$> braced
+  doinlinecode [cls]
+
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
+doinlinecode classes = do
+  Tok _ Symbol t <- anySymbol
+  marker <- case T.uncons t of
+              Just (c, ts) | T.null ts -> return c
+              _            -> mzero
+  let stopchar = if marker == '{' then '}' else marker
+  withVerbatimMode $
+    codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$>
+      manyTill (verbTok stopchar) (symbol stopchar)
+
+nlToSpace :: Char -> Char
+nlToSpace '\n' = ' '
+nlToSpace x    = x
+
+
+
+verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+verbCommands = M.fromList
+  [ ("verb", doverb)
+  , ("lstinline", dolstinline)
+  , ("mintinline", domintinline)
+  , ("Verb", doverb)
+  ]
+
+
+
+charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+charCommands = M.fromList
+  [ ("ldots", lit "…")
+  , ("vdots", lit "\8942")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("sep", lit ",")
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  , ("qed", lit "\a0\x25FB")
+  , ("/", pure mempty) -- italic correction
+  , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
+                            guard $ not inTableCell
+                            optional rawopt
+                            spaces))
+  , (",", lit "\8198")
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("slash", lit "/")
+  -- fontawesome
+  , ("faCheck", lit "\10003")
+  , ("faClose", lit "\10007")
+  -- hyphenat
+  , ("bshyp", lit "\\\173")
+  , ("fshyp", lit "/\173")
+  , ("dothyp", lit ".\173")
+  , ("colonhyp", lit ":\173")
+  , ("hyp", lit "-")
+  ]
+
+nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+nameCommands = M.fromList
+  [ ("figurename", doTerm Translations.Figure)
+  , ("prefacename", doTerm Translations.Preface)
+  , ("refname", doTerm Translations.References)
+  , ("bibname", doTerm Translations.Bibliography)
+  , ("chaptername", doTerm Translations.Chapter)
+  , ("partname", doTerm Translations.Part)
+  , ("contentsname", doTerm Translations.Contents)
+  , ("listfigurename", doTerm Translations.ListOfFigures)
+  , ("listtablename", doTerm Translations.ListOfTables)
+  , ("indexname", doTerm Translations.Index)
+  , ("abstractname", doTerm Translations.Abstract)
+  , ("tablename", doTerm Translations.Table)
+  , ("enclname", doTerm Translations.Encl)
+  , ("ccname", doTerm Translations.Cc)
+  , ("headtoname", doTerm Translations.To)
+  , ("pagename", doTerm Translations.Page)
+  , ("seename", doTerm Translations.See)
+  , ("seealsoname", doTerm Translations.SeeAlso)
+  , ("proofname", doTerm Translations.Proof)
+  , ("glossaryname", doTerm Translations.Glossary)
+  , ("lstlistingname", doTerm Translations.Listing)
+  ]
+
+refCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+refCommands = M.fromList
+  [ ("label", rawInlineOr "label" dolabel)
+  , ("ref", rawInlineOr "ref" $ doref "ref")
+  , ("cref", rawInlineOr "cref" $ doref "ref")       -- from cleveref.sty
+  , ("vref", rawInlineOr "vref" $ doref "ref+page")  -- from varioref.sty
+  , ("eqref", rawInlineOr "eqref" $ doref "eqref")   -- from amsmath.sty
+  ]
+
+acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines)
+acronymCommands = M.fromList
+  -- glossaries package
+  [ ("gls", doAcronym "short")
+  , ("Gls", doAcronym "short")
+  , ("glsdesc", doAcronym "long")
+  , ("Glsdesc", doAcronym "long")
+  , ("GLSdesc", doAcronym "long")
+  , ("acrlong", doAcronym "long")
+  , ("Acrlong", doAcronym "long")
+  , ("acrfull", doAcronym "full")
+  , ("Acrfull", doAcronym "full")
+  , ("acrshort", doAcronym "abbrv")
+  , ("Acrshort", doAcronym "abbrv")
+  , ("glspl", doAcronymPlural "short")
+  , ("Glspl", doAcronymPlural "short")
+  , ("glsdescplural", doAcronymPlural "long")
+  , ("Glsdescplural", doAcronymPlural "long")
+  , ("GLSdescplural", doAcronymPlural "long")
+  -- acronyms package
+  , ("ac", doAcronym "short")
+  , ("acf", doAcronym "full")
+  , ("acs", doAcronym "abbrv")
+  , ("acl", doAcronym "long")
+  , ("acp", doAcronymPlural "short")
+  , ("acfp", doAcronymPlural "full")
+  , ("acsp", doAcronymPlural "abbrv")
+  , ("aclp", doAcronymPlural "long")
+  , ("Ac", doAcronym "short")
+  , ("Acf", doAcronym "full")
+  , ("Acs", doAcronym "abbrv")
+  , ("Acl", doAcronym "long")
+  , ("Acp", doAcronymPlural "short")
+  , ("Acfp", doAcronymPlural "full")
+  , ("Acsp", doAcronymPlural "abbrv")
+  , ("Aclp", doAcronymPlural "long")
+  ]
+
+doAcronym :: PandocMonad m => Text -> LP m Inlines
+doAcronym form = do
+  acro <- braced
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "singular+" <> form)])
+    $ str $ untokenize acro]
+
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
+doAcronymPlural form = do
+  acro <- braced
+  let plural = str "s"
+  return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+    ("acronym-form", "plural+" <> form)]) $
+   mconcat [str $ untokenize acro, plural]]
+
+
-- 
cgit v1.2.3


From 044bc44fc621e421b74367765022f108494b4e2e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:08:02 -0800
Subject: Moved more into T.P.Readers.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 84 +++------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 65 +++++++++++++++++++++++--
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 30 +++++++++++-
 3 files changed, 97 insertions(+), 82 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a27135fd2..4ec038b94 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -34,7 +34,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (Lang (..), renderLang)
+import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
@@ -58,11 +58,12 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        theoremEnvironment)
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
-import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
+import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
 import Text.Pandoc.Shared
@@ -350,8 +351,7 @@ unescapeURL = T.concat . go . T.splitOn "\\"
 
 inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 inlineCommands = M.unions
-  [ inlineLanguageCommands
-  , accentCommands tok
+  [ accentCommands tok
   , citationCommands inline
   , siunitxCommands tok
   , acronymCommands
@@ -359,6 +359,8 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , inlineLanguageCommands tok
+  , biblatexInlineCommands tok
   , rest ]
  where
   rest = M.fromList
@@ -373,12 +375,6 @@ inlineCommands = M.unions
     , ("texttt", ttfamily)
     , ("sout", extractSpaces strikeout <$> tok)
     , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer
-    , ("lq", return (str "‘"))
-    , ("rq", return (str "’"))
-    , ("textquoteleft", return (str "‘"))
-    , ("textquoteright", return (str "’"))
-    , ("textquotedblleft", return (str "“"))
-    , ("textquotedblright", return (str "”"))
     , ("textsuperscript", extractSpaces superscript <$> tok)
     , ("textsubscript", extractSpaces subscript <$> tok)
     , ("textbf", extractSpaces strong <$> tok)
@@ -447,11 +443,6 @@ inlineCommands = M.unions
     , ("toggletrue", braced >>= setToggle True)
     , ("togglefalse", braced >>= setToggle False)
     , ("iftoggle", try $ ifToggle >> inline)
-    -- biblatex misc
-    , ("RN", romanNumeralUpper)
-    , ("Rn", romanNumeralLower)
-    -- babel
-    , ("foreignlanguage", foreignlanguage)
     -- include
     , ("input", rawInlineOr "input" $ include "input")
     -- soul package
@@ -460,24 +451,6 @@ inlineCommands = M.unions
     , ("uline", underline <$> tok)
     -- plain tex stuff that should just be passed through as raw tex
     , ("ifdim", ifdim)
-    -- bibtex
-    , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
-    , ("mkbibemph", spanWith nullAttr . emph <$> tok)
-    , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
-    , ("mkbibbold", spanWith nullAttr . strong <$> tok)
-    , ("mkbibparens",
-         spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
-    , ("mkbibbrackets",
-         spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
-    , ("autocap", spanWith nullAttr <$> tok)
-    , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
-    , ("bibstring",
-         (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
-           <$> braced)
-    , ("adddot", pure (str "."))
-    , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
-    , ("addabbrvspace", pure space)
-    , ("hyphen", pure (str "-"))
     ]
 
 lettrine :: PandocMonad m => LP m Inlines
@@ -502,26 +475,6 @@ alterStr :: (Text -> Text) -> Inline -> Inline
 alterStr f (Str xs) = Str (f xs)
 alterStr _ x = x
 
-foreignlanguage :: PandocMonad m => LP m Inlines
-foreignlanguage = do
-  babelLang <- untokenize <$> braced
-  case babelLangToBCP47 babelLang of
-       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
-       _ -> tok
-
-inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
-  where
-    mk (polyglossia, bcp47Func) =
-      ("text" <> polyglossia, inlineLanguage bcp47Func)
-
-inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
-inlineLanguage bcp47Func = do
-  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
-                <$> rawopt
-  let lang = renderLang $ bcp47Func o
-  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
-
 hyperlink :: PandocMonad m => LP m Inlines
 hyperlink = try $ do
   src <- untokenize <$> braced
@@ -542,31 +495,6 @@ hypertargetInline = try $ do
   ils <- grouped inline
   return $ spanWith (ref, [], []) ils
 
-romanNumeralUpper :: (PandocMonad m) => LP m Inlines
-romanNumeralUpper =
-  str . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralLower :: (PandocMonad m) => LP m Inlines
-romanNumeralLower =
-  str . T.toLower . toRomanNumeral <$> romanNumeralArg
-
-romanNumeralArg :: (PandocMonad m) => LP m Int
-romanNumeralArg = spaces *> (parser <|> inBraces)
-  where
-    inBraces = do
-      symbol '{'
-      spaces
-      res <- parser
-      spaces
-      symbol '}'
-      return res
-    parser = do
-      s <- untokenize <$> many1 (satisfyTok isWordTok)
-      let (digits, rest) = T.span isDigit s
-      unless (T.null rest) $
-        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
-      safeRead digits
-
 newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
 newToggle name = do
   updateState $ \st ->
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 66014a77f..37c29188e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -13,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   , verbCommands
   , charCommands
   , nameCommands
+  , biblatexInlineCommands
   , refCommands
   , rawInlineOr
   , listingsLanguage
@@ -23,15 +24,17 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder
+import Text.Pandoc.Shared (toRomanNumeral, safeRead)
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
-import Control.Applicative (optional)
-import Control.Monad (guard, mzero, mplus)
+import Control.Applicative (optional, (<|>))
+import Control.Monad (guard, mzero, mplus, unless)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option)
+                            option, many1)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -127,6 +130,31 @@ nlToSpace :: Char -> Char
 nlToSpace '\n' = ' '
 nlToSpace x    = x
 
+romanNumeralUpper :: (PandocMonad m) => LP m Inlines
+romanNumeralUpper =
+  str . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralLower :: (PandocMonad m) => LP m Inlines
+romanNumeralLower =
+  str . T.toLower . toRomanNumeral <$> romanNumeralArg
+
+romanNumeralArg :: (PandocMonad m) => LP m Int
+romanNumeralArg = spaces *> (parser <|> inBraces)
+  where
+    inBraces = do
+      symbol '{'
+      spaces
+      res <- parser
+      spaces
+      symbol '}'
+      return res
+    parser = do
+      s <- untokenize <$> many1 (satisfyTok isWordTok)
+      let (digits, rest) = T.span isDigit s
+      unless (T.null rest) $
+        Prelude.fail "Non-digits in argument to \\Rn or \\RN"
+      safeRead digits
+
 
 
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
@@ -157,6 +185,12 @@ charCommands = M.fromList
   , ("{", lit "{")
   , ("}", lit "}")
   , ("qed", lit "\a0\x25FB")
+  , ("lq", return (str "‘"))
+  , ("rq", return (str "’"))
+  , ("textquoteleft", return (str "‘"))
+  , ("textquoteright", return (str "’"))
+  , ("textquotedblleft", return (str "“"))
+  , ("textquotedblright", return (str "”"))
   , ("/", pure mempty) -- italic correction
   , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
                             guard $ not inTableCell
@@ -185,6 +219,31 @@ charCommands = M.fromList
   , ("hyp", lit "-")
   ]
 
+biblatexInlineCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+biblatexInlineCommands tok = M.fromList
+  -- biblatex misc
+  [ ("RN", romanNumeralUpper)
+  , ("Rn", romanNumeralLower)
+  , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+  , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+  , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+  , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+  , ("mkbibparens",
+       spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+  , ("mkbibbrackets",
+       spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+  , ("autocap", spanWith nullAttr <$> tok)
+  , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+  , ("bibstring",
+       (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+         <$> braced)
+  , ("adddot", pure (str "."))
+  , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+  , ("addabbrvspace", pure space)
+  , ("hyphen", pure (str "-"))
+  ]
+
 nameCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 nameCommands = M.fromList
   [ ("figurename", doTerm Translations.Figure)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index adbeaa6d4..24acbdbe4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,15 +15,43 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , inlineLanguageCommands
   )
 where
 import qualified Data.Map as M
+import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, setMeta, str)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+
+foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
+foreignlanguage tok = do
+  babelLang <- untokenize <$> braced
+  case babelLangToBCP47 babelLang of
+       Just lang -> spanWith ("", [], [("lang",  renderLang lang)]) <$> tok
+       _ -> tok
+
+inlineLanguageCommands :: PandocMonad m
+                       => LP m Inlines -> M.Map Text (LP m Inlines)
+inlineLanguageCommands tok =
+  M.fromList $
+    ("foreignlanguage", foreignlanguage tok) :
+    (mk <$> M.toList polyglossiaLangToBCP47)
+  where
+    mk (polyglossia, bcp47Func) =
+      ("text" <> polyglossia, inlineLanguage tok bcp47Func)
+
+inlineLanguage :: PandocMonad m
+               => LP m Inlines -> (Text -> Lang) -> LP m Inlines
+inlineLanguage tok bcp47Func = do
+  o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
+                <$> rawopt
+  let lang = renderLang $ bcp47Func o
+  extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
 
 setDefaultLanguage :: PandocMonad m => LP m Blocks
 setDefaultLanguage = do
-- 
cgit v1.2.3


From da5e9e5956aae3ac83edef7831939553360b8964 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 11:22:42 -0800
Subject: Move enquote commands to T.P.LaTeX.Lang.

---
 src/Text/Pandoc/Readers/LaTeX.hs        | 22 ++-------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs |  2 --
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 34 +++++++++++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 4ec038b94..a4261bbeb 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -59,6 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
 import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
 import Text.Pandoc.Readers.LaTeX.Macro (macroDef)
 import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
+                                       enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
@@ -293,18 +294,6 @@ quoted' f starter ender = do
 lit :: Text -> LP m Inlines
 lit = pure . str
 
-enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
-enquote starred mblang = do
-  skipopts
-  let lang = mblang >>= babelLangToBCP47
-  let langspan = case lang of
-                      Nothing -> id
-                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
-  quoteContext <- sQuoteContext <$> getState
-  if starred || quoteContext == InDoubleQuote
-     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
-     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
-
 blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks
 blockquote cvariant mblang = do
   citepar <- if cvariant
@@ -359,6 +348,7 @@ inlineCommands = M.unions
   , nameCommands
   , verbCommands
   , charCommands
+  , enquoteCommands tok
   , inlineLanguageCommands tok
   , biblatexInlineCommands tok
   , rest ]
@@ -418,14 +408,6 @@ inlineCommands = M.unions
                              src <- braced
                              mkImage options . unescapeURL . removeDoubleQuotes $
                                  untokenize src)
-    , ("enquote*", enquote True Nothing)
-    , ("enquote", enquote False Nothing)
-    -- foreignquote is supposed to use native quote marks
-    , ("foreignquote*", braced >>= enquote True . Just . untokenize)
-    , ("foreignquote", braced >>= enquote False . Just . untokenize)
-    -- hypehnquote uses regular quotes
-    , ("hyphenquote*", braced >>= enquote True . Just . untokenize)
-    , ("hyphenquote", braced >>= enquote False . Just . untokenize)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 37c29188e..8bdff58f7 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -155,8 +155,6 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
-
-
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 24acbdbe4..08e217bdb 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Lang
   ( setDefaultLanguage
   , polyglossiaLangToBCP47
   , babelLangToBCP47
+  , enquoteCommands
   , inlineLanguageCommands
   )
 where
@@ -25,8 +26,37 @@ import Text.Pandoc.Shared (extractSpaces)
 import Text.Pandoc.BCP47 (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, option)
-import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith)
+import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
+                            withQuoteContext)
+import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith,
+                            singleQuoted, doubleQuoted)
+
+enquote :: PandocMonad m
+        => LP m Inlines
+        -> Bool -> Maybe Text -> LP m Inlines
+enquote tok starred mblang = do
+  skipopts
+  let lang = mblang >>= babelLangToBCP47
+  let langspan = case lang of
+                      Nothing -> id
+                      Just l  -> spanWith ("",[],[("lang", renderLang l)])
+  quoteContext <- sQuoteContext <$> getState
+  if starred || quoteContext == InDoubleQuote
+     then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok
+     else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok
+
+enquoteCommands :: PandocMonad m
+                => LP m Inlines -> M.Map Text (LP m Inlines)
+enquoteCommands tok = M.fromList
+  [ ("enquote*", enquote tok True Nothing)
+  , ("enquote", enquote tok False Nothing)
+  -- foreignquote is supposed to use native quote marks
+  , ("foreignquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("foreignquote", braced >>= enquote tok False . Just . untokenize)
+  -- hypehnquote uses regular quotes
+  , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize)
+  , ("hyphenquote", braced >>= enquote tok False . Just . untokenize)
+  ]
 
 foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines
 foreignlanguage tok = do
-- 
cgit v1.2.3


From 33e4c8dd6c2bbc8109880f43b379d074ceb38391 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:21:32 -0800
Subject: Remove T.P.Readers.LaTeX.Accent.

Incorporate accentCommands into T.P.Readers.LaTeX.Inline.
---
 pandoc.cabal                            |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 ---------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs | 71 ++++++++++++++++++++++++++++--
 4 files changed, 69 insertions(+), 83 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 211327642..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Accent,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index a4261bbeb..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -49,7 +49,6 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..))
 import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
 import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites)
 import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments,
                                        inlineEnvironment,
@@ -64,6 +63,7 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
+                                         accentCommands,
                                          biblatexInlineCommands,
                                          verbCommands, rawInlineOr,
                                          listingsLanguage)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
deleted file mode 100644
index f8c53491c..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Accent.hs
+++ /dev/null
@@ -1,78 +0,0 @@
-{-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE OverloadedStrings #-}
-module Text.Pandoc.Readers.LaTeX.Accent
-  ( accentCommands )
-where
-
-import Text.Pandoc.Class
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Builder as B
-import qualified Data.Map as M
-import Data.Text (Text)
-import Data.Maybe (fromMaybe)
-import Text.Pandoc.Parsing
-import qualified Data.Text as T
-import qualified Data.Text.Normalize as Normalize
-
-accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
-accentCommands tok =
-  let accent = accentWith tok
-      lit = pure . str
-  in M.fromList
-  [ ("aa", lit "å")
-  , ("AA", lit "Å")
-  , ("ss", lit "ß")
-  , ("o", lit "ø")
-  , ("O", lit "Ø")
-  , ("L", lit "Ł")
-  , ("l", lit "ł")
-  , ("ae", lit "æ")
-  , ("AE", lit "Æ")
-  , ("oe", lit "œ")
-  , ("OE", lit "Œ")
-  , ("pounds", lit "£")
-  , ("euro", lit "€")
-  , ("copyright", lit "©")
-  , ("textasciicircum", lit "^")
-  , ("textasciitilde", lit "~")
-  , ("H", accent '\779' Nothing) -- hungarumlaut
-  , ("`", accent '\768' (Just '`')) -- grave
-  , ("'", accent '\769' (Just '\'')) -- acute
-  , ("^", accent '\770' (Just '^')) -- circ
-  , ("~", accent '\771' (Just '~')) -- tilde
-  , ("\"", accent '\776' Nothing) -- umlaut
-  , (".", accent '\775' Nothing) -- dot
-  , ("=", accent '\772' Nothing) -- macron
-  , ("|", accent '\781' Nothing) -- vertical line above
-  , ("b", accent '\817' Nothing) -- macron below
-  , ("c", accent '\807' Nothing) -- cedilla
-  , ("G", accent '\783' Nothing) -- doublegrave
-  , ("h", accent '\777' Nothing) -- hookabove
-  , ("d", accent '\803' Nothing) -- dotbelow
-  , ("f", accent '\785' Nothing)  -- inverted breve
-  , ("r", accent '\778' Nothing)  -- ringabove
-  , ("t", accent '\865' Nothing)  -- double inverted breve
-  , ("U", accent '\782' Nothing)  -- double vertical line above
-  , ("v", accent '\780' Nothing) -- hacek
-  , ("u", accent '\774' Nothing) -- breve
-  , ("k", accent '\808' Nothing) -- ogonek
-  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
-  , ("i", lit "ı")  -- dotless i
-  , ("j", lit "ȷ")  -- dotless j
-  , ("newtie", accent '\785' Nothing) -- inverted breve
-  , ("textcircled", accent '\8413' Nothing) -- combining circle
-  ]
-
-accentWith :: PandocMonad m
-           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
-accentWith tok combiningAccent fallBack = try $ do
-  ils <- tok
-  case toList ils of
-       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-         -- try to normalize to the combined character:
-         Str (Normalize.normalize Normalize.NFC
-               (T.pack [x, combiningAccent]) <> xs) : ys
-       [Space]           -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       []                -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
-       _                 -> return ils
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 8bdff58f7..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings     #-}
+{-# LANGUAGE ViewPatterns          #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Inline
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
@@ -12,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline
   ( acronymCommands
   , verbCommands
   , charCommands
+  , accentCommands
   , nameCommands
   , biblatexInlineCommands
   , refCommands
@@ -33,11 +35,12 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1)
+                            option, many1, try)
 import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
-import Data.Maybe (maybeToList)
+import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
+import qualified Data.Text.Normalize as Normalize
 import qualified Text.Pandoc.Translations as Translations
 
 rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines
@@ -155,6 +158,22 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
         Prelude.fail "Non-digits in argument to \\Rn or \\RN"
       safeRead digits
 
+accentWith :: PandocMonad m
+           => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+  ils <- tok
+  case toList ils of
+       (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+         -- try to normalize to the combined character:
+         Str (Normalize.normalize Normalize.NFC
+               (T.pack [x, combiningAccent]) <> xs) : ys
+       [Space] -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       []      -> return $ str $ T.singleton
+                         $ fromMaybe combiningAccent fallBack
+       _       -> return ils
+
+
 verbCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 verbCommands = M.fromList
   [ ("verb", doverb)
@@ -163,7 +182,53 @@ verbCommands = M.fromList
   , ("Verb", doverb)
   ]
 
-
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+  let accent = accentWith tok
+  in  M.fromList
+  [ ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("oe", lit "œ")
+  , ("OE", lit "Œ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("textasciicircum", lit "^")
+  , ("textasciitilde", lit "~")
+  , ("H", accent '\779' Nothing) -- hungarumlaut
+  , ("`", accent '\768' (Just '`')) -- grave
+  , ("'", accent '\769' (Just '\'')) -- acute
+  , ("^", accent '\770' (Just '^')) -- circ
+  , ("~", accent '\771' (Just '~')) -- tilde
+  , ("\"", accent '\776' Nothing) -- umlaut
+  , (".", accent '\775' Nothing) -- dot
+  , ("=", accent '\772' Nothing) -- macron
+  , ("|", accent '\781' Nothing) -- vertical line above
+  , ("b", accent '\817' Nothing) -- macron below
+  , ("c", accent '\807' Nothing) -- cedilla
+  , ("G", accent '\783' Nothing) -- doublegrave
+  , ("h", accent '\777' Nothing) -- hookabove
+  , ("d", accent '\803' Nothing) -- dotbelow
+  , ("f", accent '\785' Nothing)  -- inverted breve
+  , ("r", accent '\778' Nothing)  -- ringabove
+  , ("t", accent '\865' Nothing)  -- double inverted breve
+  , ("U", accent '\782' Nothing)  -- double vertical line above
+  , ("v", accent '\780' Nothing) -- hacek
+  , ("u", accent '\774' Nothing) -- breve
+  , ("k", accent '\808' Nothing) -- ogonek
+  , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+  , ("i", lit "ı")  -- dotless i
+  , ("j", lit "ȷ")  -- dotless j
+  , ("newtie", accent '\785' Nothing) -- inverted breve
+  , ("textcircled", accent '\8413' Nothing) -- combining circle
+  ]
 
 charCommands :: PandocMonad m => M.Map Text (LP m Inlines)
 charCommands = M.fromList
-- 
cgit v1.2.3


From b569b0226d4bd5e0699077089d54fb03d4394b7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 18:47:17 -0800
Subject: Add T.P.Readers.LaTeX.Include.

---
 pandoc.cabal                             |  1 +
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 ++++-----------------------
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 ++++++++++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +++++++-
 4 files changed, 87 insertions(+), 52 deletions(-)
 create mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 090c28287..11d34a19a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,6 +626,7 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
+                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 552411db8..dd6c2a1fa 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,9 +38,8 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      readFileFromDirs, report,
-                                      setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
+                                      report, setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -61,6 +60,8 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
+import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
+                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -235,19 +236,10 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -406,8 +398,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL . removeDoubleQuotes $
-                                 untokenize src)
+                             mkImage options . unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -417,8 +409,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -698,39 +688,6 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
new file mode 100644
index 000000000..618a89284
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Include.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings     #-}
+{- |
+   Module      : Text.Pandoc.Readers.LaTeX.Include
+   Copyright   : Copyright (C) 2006-2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+-}
+module Text.Pandoc.Readers.LaTeX.Include
+  ( readFileFromTexinputs
+  , insertIncluded
+  )
+where
+
+import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Pandoc.Shared (splitTextBy)
+import System.FilePath (takeExtension, addExtension)
+import Control.Monad (when)
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(PandocParseError))
+import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
+import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
+                            getPosition, addIncludeFile, getIncludeFiles,
+                            dropLatestIncludeFile)
+import Data.Maybe (fromMaybe)
+
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
+
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 7b8bca4af..3b37ee50e 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try)
-import Data.Char (isDigit)
+                            option, many1, try, lookAhead)
+import Data.Char (isDigit, isLetter)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,6 +50,15 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -280,6 +289,8 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
+  -- xspace
+  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From 92ea8a0cb63241dbc8f89e73a359ac5efca2ab87 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 3 Mar 2021 19:07:16 -0800
Subject: Revert "Add T.P.Readers.LaTeX.Include."

This reverts commit b569b0226d4bd5e0699077089d54fb03d4394b7d.

Memory usage improvement in compilation wasn't very significant.
---
 pandoc.cabal                             |  1 -
 src/Text/Pandoc/Readers/LaTeX.hs         | 57 +++++++++++++++++++++++----
 src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 --------------------------------
 src/Text/Pandoc/Readers/LaTeX/Inline.hs  | 15 +-------
 4 files changed, 52 insertions(+), 87 deletions(-)
 delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 11d34a19a..090c28287 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -626,7 +626,6 @@ library
                    Text.Pandoc.Readers.HTML.Table,
                    Text.Pandoc.Readers.HTML.TagCategories,
                    Text.Pandoc.Readers.HTML.Types,
-                   Text.Pandoc.Readers.LaTeX.Include,
                    Text.Pandoc.Readers.LaTeX.Inline,
                    Text.Pandoc.Readers.LaTeX.Citation,
                    Text.Pandoc.Readers.LaTeX.Lang,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index dd6c2a1fa..552411db8 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,8 +38,9 @@ import Text.Pandoc.BCP47 (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
-                                      report, setResourcePath)
-import Text.Pandoc.Error (PandocError (PandocParsecError))
+                                      readFileFromDirs, report,
+                                      setResourcePath)
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
 import Text.Pandoc.Highlighting (languagesByExtension)
 import Text.Pandoc.ImageSize (numUnit, showFl)
 import Text.Pandoc.Logging
@@ -60,8 +61,6 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands,
                                        enquoteCommands,
                                        babelLangToBCP47, setDefaultLanguage)
 import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands)
-import Text.Pandoc.Readers.LaTeX.Include (insertIncluded,
-                                          readFileFromTexinputs)
 import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          nameCommands, charCommands,
                                          accentCommands,
@@ -236,10 +235,19 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
+doxspace :: PandocMonad m => LP m Inlines
+doxspace =
+  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
+  where startsWithLetter (Tok _ Word t) =
+          case T.uncons t of
+               Just (c, _) | isLetter c -> True
+               _           -> False
+        startsWithLetter _ = False
+
 
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
-  fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
+  Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
 
 doubleQuote :: PandocMonad m => LP m Inlines
 doubleQuote =
@@ -398,8 +406,8 @@ inlineCommands = M.unions
                   link (unescapeURL $ untokenize url) "" <$> tok)
     , ("includegraphics", do options <- option [] keyvals
                              src <- braced
-                             mkImage options . unescapeURL .
-                               removeDoubleQuotes $ untokenize src)
+                             mkImage options . unescapeURL . removeDoubleQuotes $
+                                 untokenize src)
     , ("hyperlink", hyperlink)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
@@ -409,6 +417,8 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
+    -- xspace
+    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
@@ -688,6 +698,39 @@ include name = do
   mapM_ (insertIncluded defaultExt) fs
   return mempty
 
+readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
+readFileFromTexinputs fp = do
+  fileContentsMap <- sFileContents <$> getState
+  case M.lookup (T.pack fp) fileContentsMap of
+    Just t -> return (Just t)
+    Nothing -> do
+      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
+               <$> lookupEnv "TEXINPUTS"
+      readFileFromDirs dirs fp
+
+insertIncluded :: PandocMonad m
+               => FilePath
+               -> FilePath
+               -> LP m ()
+insertIncluded defaultExtension f' = do
+  let f = case takeExtension f' of
+                ".tex" -> f'
+                ".sty" -> f'
+                _      -> addExtension f' defaultExtension
+  pos <- getPosition
+  containers <- getIncludeFiles <$> getState
+  when (T.pack f `elem` containers) $
+    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+  updateState $ addIncludeFile $ T.pack f
+  mbcontents <- readFileFromTexinputs f
+  contents <- case mbcontents of
+                   Just s -> return s
+                   Nothing -> do
+                     report $ CouldNotLoadIncludeFile (T.pack f) pos
+                     return ""
+  getInput >>= setInput . (tokenize f contents ++)
+  updateState dropLatestIncludeFile
+
 authors :: PandocMonad m => LP m ()
 authors = try $ do
   bgroup
diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs
deleted file mode 100644
index 618a89284..000000000
--- a/src/Text/Pandoc/Readers/LaTeX/Include.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings     #-}
-{- |
-   Module      : Text.Pandoc.Readers.LaTeX.Include
-   Copyright   : Copyright (C) 2006-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
--}
-module Text.Pandoc.Readers.LaTeX.Include
-  ( readFileFromTexinputs
-  , insertIncluded
-  )
-where
-
-import qualified Data.Map as M
-import Data.Text (Text)
-import qualified Data.Text as T
-import Text.Pandoc.Shared (splitTextBy)
-import System.FilePath (takeExtension, addExtension)
-import Control.Monad (when)
-import Control.Monad.Except (throwError)
-import Text.Pandoc.Error (PandocError(PandocParseError))
-import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile))
-import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report)
-import Text.Pandoc.Readers.LaTeX.Parsing
-import Text.Pandoc.Parsing (updateState, getState, getInput, setInput,
-                            getPosition, addIncludeFile, getIncludeFiles,
-                            dropLatestIncludeFile)
-import Data.Maybe (fromMaybe)
-
-readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text)
-readFileFromTexinputs fp = do
-  fileContentsMap <- sFileContents <$> getState
-  case M.lookup (T.pack fp) fileContentsMap of
-    Just t -> return (Just t)
-    Nothing -> do
-      dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "."
-               <$> lookupEnv "TEXINPUTS"
-      readFileFromDirs dirs fp
-
-insertIncluded :: PandocMonad m
-               => FilePath
-               -> FilePath
-               -> LP m ()
-insertIncluded defaultExtension f' = do
-  let f = case takeExtension f' of
-                ".tex" -> f'
-                ".sty" -> f'
-                _      -> addExtension f' defaultExtension
-  pos <- getPosition
-  containers <- getIncludeFiles <$> getState
-  when (T.pack f `elem` containers) $
-    throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
-  updateState $ addIncludeFile $ T.pack f
-  mbcontents <- readFileFromTexinputs f
-  contents <- case mbcontents of
-                   Just s -> return s
-                   Nothing -> do
-                     report $ CouldNotLoadIncludeFile (T.pack f) pos
-                     return ""
-  getInput >>= setInput . (tokenize f contents ++)
-  updateState dropLatestIncludeFile
-
-
diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
index 3b37ee50e..7b8bca4af 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs
@@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Extensions (extensionEnabled, Extension(..))
 import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy,
                             manyTill, getInput, setInput, incSourceColumn,
-                            option, many1, try, lookAhead)
-import Data.Char (isDigit, isLetter)
+                            option, many1, try)
+import Data.Char (isDigit)
 import Text.Pandoc.Highlighting (fromListingsLanguage,)
 import Data.Maybe (maybeToList, fromMaybe)
 import Text.Pandoc.Options (ReaderOptions(..))
@@ -50,15 +50,6 @@ rawInlineOr name' fallback = do
      then rawInline "latex" <$> getRawCommand name' ("\\" <> name')
      else fallback
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
 dolabel :: PandocMonad m => LP m Inlines
 dolabel = do
   v <- braced
@@ -289,8 +280,6 @@ charCommands = M.fromList
   , ("dothyp", lit ".\173")
   , ("colonhyp", lit ":\173")
   , ("hyp", lit "-")
-  -- xspace
-  , ("xspace", doxspace)
   ]
 
 biblatexInlineCommands :: PandocMonad m
-- 
cgit v1.2.3


From e461b7dd45f717f3317216c7d3207a1d24bf1c85 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:01:40 -0800
Subject: Relax `--abbreviations` rules so that a period isn't required.

Partially addresses #7124.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 52b82a3d0..667a784e0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings ending in a period that are found in this list will
-    be followed by a nonbreaking space, so that the period will
-    not produce sentence-ending space in formats like LaTeX.
+    Strings found in this list will be followed by a nonbreaking
+    space, and the period will not produce sentence-ending space
+    in formats like LaTeX.  The strings may not contain spaces.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b1b99dfe5..34edbcc17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From 916ce4d51121e0529b938fda71f37e947882abe5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:22:08 -0800
Subject: Revert "Relax `--abbreviations` rules so that a period isn't
 required."

This reverts commit e461b7dd45f717f3317216c7d3207a1d24bf1c85.

Ill-advised change.  This doesn't work because we parse
strings in chunks.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 667a784e0..52b82a3d0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings found in this list will be followed by a nonbreaking
-    space, and the period will not produce sentence-ending space
-    in formats like LaTeX.  The strings may not contain spaces.
+    Strings ending in a period that are found in this list will
+    be followed by a nonbreaking space, so that the period will
+    not produce sentence-ending space in formats like LaTeX.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34edbcc17..b1b99dfe5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if result `Set.member` abbrevs
+      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From 030209fc292bd51700189653fd38ae5b2a723ef1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 4 Mar 2021 16:25:13 -0800
Subject: Revert "Revert "Relax `--abbreviations` rules so that a period isn't
 required.

This reverts commit 916ce4d51121e0529b938fda71f37e947882abe5.

I was confused in thinking it wouldn't work.
---
 MANUAL.txt                          | 6 +++---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 52b82a3d0..667a784e0 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -691,9 +691,9 @@ header when requesting a document from a URL:
     system default, use
     `pandoc --print-default-data-file=abbreviations`.  The only
     use pandoc makes of this list is in the Markdown reader.
-    Strings ending in a period that are found in this list will
-    be followed by a nonbreaking space, so that the period will
-    not produce sentence-ending space in formats like LaTeX.
+    Strings found in this list will be followed by a nonbreaking
+    space, and the period will not produce sentence-ending space
+    in formats like LaTeX.  The strings may not contain spaces.
 
 [`pandocfilters`]: https://github.com/jgm/pandocfilters
 [PHP]: https://github.com/vinai/pandocfilters-php
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b1b99dfe5..34edbcc17 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1725,7 +1725,7 @@ str = do
   updateLastStrPos
   (do guardEnabled Ext_smart
       abbrevs <- getOption readerAbbreviations
-      if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
+      if result `Set.member` abbrevs
          then try (do ils <- whitespace
                       notFollowedBy (() <$ cite <|> () <$ note)
                       -- ?? lookAhead alphaNum
-- 
cgit v1.2.3


From 5f9327cfc8143902bbd3fdb9d97a7995a19fd217 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 10:20:16 -0800
Subject: Shared: Change defaultUserDataDirs -> defaultUserDataDir.

Rationale: the manual says that the XDG data directory will
be used if it exists, otherwise the legacy data directory.
So we should just determine this and use this directory,
rather than having a search path which could cause some
things to be taken from one data directory and others from
others.

[API change]
---
 MANUAL.txt                | 12 ++++++------
 src/Text/Pandoc/Shared.hs | 20 ++++++++++++--------
 2 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 667a784e0..d97cbcbc9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -361,15 +361,15 @@ header when requesting a document from a URL:
     will be used.  On \*nix and macOS systems this will be the `pandoc`
     subdirectory of the XDG data directory (by default,
     `$HOME/.local/share`, overridable by setting the `XDG_DATA_HOME`
-    environment variable).  If that directory does not exist,
-    `$HOME/.pandoc` will be used (for backwards compatibility).
-    In Windows the default user data directory is
+    environment variable).  If that directory does not exist and
+    `$HOME/.pandoc` exists, it will be used (for backwards compatibility).
+    On Windows the default user data directory is
     `C:\Users\USERNAME\AppData\Roaming\pandoc`.
     You can find the default user data directory on your system by
     looking at the output of `pandoc --version`.
-    A `reference.odt`, `reference.docx`, `epub.css`, `templates`,
-    `slidy`, `slideous`, or `s5` directory
-    placed in this directory will override pandoc's normal defaults.
+    Data files placed in this directory (for example, `reference.odt`,
+    `reference.docx`, `epub.css`, `templates`) will override
+    pandoc's normal defaults.
 
 `-d` *FILE*, `--defaults=`*FILE*
 
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 922df7922..2aba9b2e1 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -95,7 +95,7 @@ module Text.Pandoc.Shared (
                      safeRead,
                      safeStrRead,
                      -- * User data directory
-                     defaultUserDataDirs,
+                     defaultUserDataDir,
                      -- * Version
                      pandocVersion
                     ) where
@@ -1012,12 +1012,16 @@ safeStrRead s = case reads s of
 --
 
 -- | Return appropriate user data directory for platform.  We use
--- XDG_DATA_HOME (or its default value), but fall back to the
--- legacy user data directory ($HOME/.pandoc on *nix) if this is
--- missing.
-defaultUserDataDirs :: IO [FilePath]
-defaultUserDataDirs = E.catch (do
+-- XDG_DATA_HOME (or its default value), but for backwards compatibility,
+-- we fall back to the legacy user data directory ($HOME/.pandoc on *nix)
+-- if the XDG_DATA_HOME is missing and this exists.  If neither directory
+-- is present, we return the XDG data directory.
+defaultUserDataDir :: IO FilePath
+defaultUserDataDir = do
   xdgDir <- getXdgDirectory XdgData "pandoc"
   legacyDir <- getAppUserDataDirectory "pandoc"
-  return $ ordNub [xdgDir, legacyDir])
- (\(_ :: E.SomeException) -> return [])
+  xdgExists <- doesDirectoryExist xdgDir
+  legacyDirExists <- doesDirectoryExist legacyDir
+  if not xdgExists && legacyDirExists
+     then return legacyDir
+     else return xdgDir
-- 
cgit v1.2.3


From ccc530c5884f9c36d63fb8a63ce6fadce166015c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 10:28:31 -0800
Subject: Logging: Add EnvironmentVariableUndefined constructor to LogMessage.

[API change]
---
 src/Text/Pandoc/Logging.hs | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 3a3acedc7..1d7bc9d66 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -100,6 +100,7 @@ data LogMessage =
   | FilterCompleted FilePath Integer
   | CiteprocWarning Text
   | ATXHeadingInLHS Int Text
+  | EnvironmentVariableUndefined Text
   deriving (Show, Eq, Data, Ord, Typeable, Generic)
 
 instance ToJSON LogMessage where
@@ -229,6 +230,8 @@ instance ToJSON LogMessage where
       ATXHeadingInLHS lvl contents ->
            ["level" .= lvl
            ,"contents" .= contents]
+      EnvironmentVariableUndefined var ->
+           ["variable" .= var ]
 
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
@@ -345,6 +348,8 @@ showLogMessage msg =
          if lvl < 3
             then " Consider using --markdown-headings=setext."
             else ""
+       EnvironmentVariableUndefined var ->
+         "Undefined environment variable " <> var <> " in defaults file."
 
 messageVerbosity :: LogMessage -> Verbosity
 messageVerbosity msg =
@@ -391,3 +396,4 @@ messageVerbosity msg =
        FilterCompleted{}             -> INFO
        CiteprocWarning{}             -> WARNING
        ATXHeadingInLHS{}             -> WARNING
+       EnvironmentVariableUndefined{}-> WARNING
-- 
cgit v1.2.3


From a8324690061d561583f2cb583cfb28591f500181 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 10:42:33 -0800
Subject: Add fields for CSL optinos to Opt.

* Add `optCSL`, `optBibliography`, `optCitationAbbreviations` to
  `Opt` [API change].
* Move `addMeta` from T.P.App.Opt to T.P.App.CommandLineOptions.
---
 src/Text/Pandoc/App/CommandLineOptions.hs | 31 +++++++++++---
 src/Text/Pandoc/App/Opt.hs                | 68 +++++++++----------------------
 2 files changed, 46 insertions(+), 53 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index 0a8193f6c..ac92db0ae 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -48,12 +48,13 @@ import System.FilePath
 import System.IO (stdout)
 import Text.DocTemplates (Context (..), ToContext (toVal), Val (..))
 import Text.Pandoc
+import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..),
-                            DefaultsState (..), addMeta, applyDefaults,
+                            DefaultsState (..), applyDefaults,
                             fullDefaultsPath)
 import Text.Pandoc.Filter (Filter (..))
 import Text.Pandoc.Highlighting (highlightingStyles)
-import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs)
+import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDir)
 import Text.Printf
 
 #ifdef EMBED_DATA_FILES
@@ -939,13 +940,12 @@ options =
                  (NoArg
                   (\_ -> do
                      prg <- getProgName
-                     defaultDatadirs <- defaultUserDataDirs
+                     defaultDatadir <- defaultUserDataDir
                      UTF8.hPutStrLn stdout
                       $ T.pack
                       $ prg ++ " " ++ T.unpack pandocVersion ++
                         compileInfo ++
-                        "\nUser data directory: " ++
-                        intercalate " or " defaultDatadirs ++
+                        "\nUser data directory: " ++ defaultDatadir ++
                         ('\n':copyrightMessage)
                      exitSuccess ))
                  "" -- "Print version"
@@ -1053,6 +1053,27 @@ setVariable key val (Context ctx) = Context $ M.alter go key ctx
         go (Just (ListVal xs)) = Just $ ListVal $ xs ++ [toVal val]
         go (Just x)            = Just $ ListVal [x, toVal val]
 
+addMeta :: String -> String -> Meta -> Meta
+addMeta k v meta =
+  case lookupMeta k' meta of
+       Nothing -> setMeta k' v' meta
+       Just (MetaList xs) ->
+                  setMeta k' (MetaList (xs ++ [v'])) meta
+       Just x  -> setMeta k' (MetaList [x, v']) meta
+ where
+  v' = readMetaValue v
+  k' = T.pack k
+
+readMetaValue :: String -> MetaValue
+readMetaValue s
+  | s == "true"  = MetaBool True
+  | s == "True"  = MetaBool True
+  | s == "TRUE"  = MetaBool True
+  | s == "false" = MetaBool False
+  | s == "False" = MetaBool False
+  | s == "FALSE" = MetaBool False
+  | otherwise    = MetaString $ T.pack s
+
 -- On Windows with ghc 8.6+, we need to rewrite paths
 -- beginning with \\ to \\?\UNC\. -- See #5127.
 normalizePath :: FilePath -> FilePath
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index 0d96ab67c..b69e4e51e 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -22,7 +22,6 @@ module Text.Pandoc.App.Opt (
           , IpynbOutput (..)
           , DefaultsState (..)
           , defaultOpts
-          , addMeta
           , applyDefaults
           , fullDefaultsPath
           ) where
@@ -32,7 +31,6 @@ import System.FilePath ( addExtension, (</>), takeExtension )
 import Data.Char (isLower, toLower)
 import Data.Maybe (fromMaybe)
 import GHC.Generics hiding (Meta)
-import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.Filter (Filter (..))
 import Text.Pandoc.Logging (Verbosity (WARNING))
 import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault),
@@ -52,7 +50,7 @@ import Data.Text (Text, unpack)
 import Data.Default (def)
 import qualified Data.Text as T
 import qualified Data.Map as M
-import Text.Pandoc.Definition (Meta(..), MetaValue(..), lookupMeta)
+import Text.Pandoc.Definition (Meta(..), MetaValue(..))
 import Data.Aeson (defaultOptions, Options(..))
 import Data.Aeson.TH (deriveJSON)
 import Control.Applicative ((<|>))
@@ -156,6 +154,9 @@ data Opt = Opt
     , optNoCheckCertificate    :: Bool       -- ^ Disable certificate validation
     , optEol                   :: LineEnding -- ^ Style of line-endings to use
     , optStripComments         :: Bool       -- ^ Skip HTML comments
+    , optCSL                   :: Maybe FilePath -- ^ CSL stylesheet
+    , optBibliography          :: [FilePath]  -- ^ Bibliography files
+    , optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations
     } deriving (Generic, Show)
 
 instance FromYAML (Opt -> Opt) where
@@ -428,26 +429,18 @@ doOpt (k',v) = do
       (parseYAML v >>= \x -> return (\o -> o{ optCss = optCss o <>
                                                 [unpack x] }))
     "bibliography" ->
-      do let addItem x o = o{ optMetadata =
-                                 addMeta "bibliography" (T.unpack x)
-                                    (optMetadata o) }
-         (parseYAML v >>= \(xs :: [Text]) -> return $ \o ->
-                                                    foldr addItem o xs)
-          <|>
-          (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o)
+      (parseYAML v >>= \x -> return (\o ->
+                               o{ optBibliography = optBibliography o <>
+                                                      map unpack x }))
+      <|>
+      (parseYAML v >>= \x -> return (\o ->
+                               o{ optBibliography = optBibliography o <>
+                                                       [unpack x] }))
     "csl" ->
-      do let addItem x o = o{ optMetadata =
-                                 addMeta "csl" (T.unpack x)
-                                   (optMetadata o) }
-         (parseYAML v >>= \(xs :: [Text]) -> return $ \o ->
-                                                    foldr addItem o xs)
-          <|>
-          (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o)
+      parseYAML v >>= \x -> return (\o -> o{ optCSL = unpack <$> x })
     "citation-abbreviations" ->
-      parseYAML v >>= \x ->
-             return (\o -> o{ optMetadata =
-                                addMeta "citation-abbreviations" (T.unpack x)
-                                  (optMetadata o) })
+      parseYAML v >>= \x -> return (\o -> o{ optCitationAbbreviations =
+                                                  unpack <$> x })
     "ipynb-output" ->
       parseYAML v >>= \x -> return (\o -> o{ optIpynbOutput = x })
     "include-before-body" ->
@@ -562,6 +555,9 @@ defaultOpts = Opt
     , optNoCheckCertificate    = False
     , optEol                   = Native
     , optStripComments         = False
+    , optCSL                   = Nothing
+    , optBibliography          = []
+    , optCitationAbbreviations = Nothing
     }
 
 parseStringKey ::  Node Pos -> Parser Text
@@ -579,27 +575,6 @@ yamlToMeta (Mapping _ _ m) =
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
-addMeta :: String -> String -> Meta -> Meta
-addMeta k v meta =
-  case lookupMeta k' meta of
-       Nothing -> setMeta k' v' meta
-       Just (MetaList xs) ->
-                  setMeta k' (MetaList (xs ++ [v'])) meta
-       Just x  -> setMeta k' (MetaList [x, v']) meta
- where
-  v' = readMetaValue v
-  k' = T.pack k
-
-readMetaValue :: String -> MetaValue
-readMetaValue s
-  | s == "true"  = MetaBool True
-  | s == "True"  = MetaBool True
-  | s == "TRUE"  = MetaBool True
-  | s == "false" = MetaBool False
-  | s == "False" = MetaBool False
-  | s == "FALSE" = MetaBool False
-  | otherwise    = MetaString $ T.pack s
-
 -- | Apply defaults from --defaults file.
 applyDefaults :: (PandocMonad m, MonadIO m)
               => Opt
@@ -625,12 +600,9 @@ fullDefaultsPath dataDir file = do
   let fp = if null (takeExtension file)
               then addExtension file "yaml"
               else file
-  dataDirs <- liftIO defaultUserDataDirs
-  let fps = fp : case dataDir of
-                   Nothing -> map (</> ("defaults" </> fp))
-                                    dataDirs
-                   Just dd -> [dd </> "defaults" </> fp]
-  fromMaybe fp <$> findM fileExists fps
+  defaultDataDir <- liftIO defaultUserDataDir
+  let defaultFp = fromMaybe defaultDataDir dataDir </> "defaults" </> fp
+  fromMaybe fp <$> findM fileExists [fp, defaultFp]
 
 -- | In a list of lists, append another list in front of every list which
 -- starts with specific element.
-- 
cgit v1.2.3


From 6dd7520cc4b3816ae13ec486ce0909b9b881d240 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 10:44:28 -0800
Subject: Implement environment variable interpolation in defaults files.

This allows the syntax `${HOME}` to be used, in fields that expect
file paths only.  Any environment variable may be interpolated
in this way. A warning will be raised for undefined variables.
The special variable `USERDATA` is automatically set to the
user data directory in force when the defaults file is parsed.
(Note: it may be different from the eventual user data directory,
if the defaults file or further command line options change that.)

Closes #5982.
Closes #5977.
Closes #6108 (path not taken).
---
 MANUAL.txt                 |  15 ++++++
 src/Text/Pandoc/App.hs     |  29 +++++++----
 src/Text/Pandoc/App/Opt.hs | 117 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 147 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index d97cbcbc9..f06293dd3 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1672,6 +1672,21 @@ one line:
 verbosity: INFO
 ```
 
+In fields that expect a file path (or list of file paths), the
+following syntax may be used to interpolate environment variables:
+
+``` yaml
+csl:  ${HOME}/mycsldir/special.csl
+```
+
+`${USERDATA}` may also be used; this will always resolve to the
+user data directory that is current when the defaults file is
+parsed, regardless of the setting of the environment
+variable `USERDATA`.
+
+This environment variable interpolation syntax *only* works in
+fields that expect file paths.
+
 Default files can be placed in the `defaults` subdirectory of
 the user data directory and used from any directory.  For
 example, one could create a file specifying defaults for writing
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 40fb34834..6b45e5418 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -47,10 +47,11 @@ import System.FilePath ( takeBaseName, takeExtension )
 import System.IO (nativeNewline, stdout)
 import qualified System.IO as IO (Newline (..))
 import Text.Pandoc
+import Text.Pandoc.Builder (setMeta)
 import Text.Pandoc.MIME (getCharset)
 import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
 import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
-                            IpynbOutput (..) )
+                            IpynbOutput (..))
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
@@ -60,7 +61,7 @@ import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
 import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
          headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
-         defaultUserDataDirs, tshow, findM)
+         defaultUserDataDir, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString)
 import Text.Pandoc.Readers.Markdown (yamlToMeta)
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -71,6 +72,15 @@ import System.Posix.Terminal (queryTerminal)
 
 convertWithOpts :: Opt -> IO ()
 convertWithOpts opts = do
+  datadir <- case optDataDir opts of
+                  Nothing   -> do
+                    d <- defaultUserDataDir
+                    exists <- doesDirectoryExist d
+                    return $ if exists
+                                then Just d
+                                else Nothing
+                  Just _    -> return $ optDataDir opts
+
   let outputFile = fromMaybe "-" (optOutputFile opts)
   let filters = optFilters opts
   let verbosity = optVerbosity opts
@@ -85,12 +95,6 @@ convertWithOpts opts = do
                      Just xs | not (optIgnoreArgs opts) -> xs
                      _ -> ["-"]
 
-  datadir <- case optDataDir opts of
-                  Nothing   -> do
-                    ds <- defaultUserDataDirs
-                    findM doesDirectoryExist ds
-                  Just _    -> return $ optDataDir opts
-
   let runIO' :: PandocIO a -> IO a
       runIO' f = do
         (res, reports) <- runIOorExplode $ do
@@ -275,12 +279,21 @@ convertWithOpts opts = do
       report $ Deprecated "pandoc-citeproc filter"
                "Use --citeproc instead."
 
+    let cslMetadata =
+          maybe id (setMeta "csl") (optCSL opts) .
+          (case optBibliography opts of
+             [] -> id
+             xs -> setMeta "bibliography" xs) .
+          maybe id (setMeta "citation-abbreviations")
+                         (optCitationAbbreviations opts) $ mempty
+
     doc <- sourceToDoc sources >>=
               (   (if isJust (optExtractMedia opts)
                       then fillMediaBag
                       else return)
               >=> return . adjustMetadata (metadataFromFile <>)
               >=> return . adjustMetadata (<> optMetadata opts)
+              >=> return . adjustMetadata (<> cslMetadata)
               >=> applyTransforms transforms
               >=> applyFilters readerOpts filters [T.unpack format]
               >=> maybe return extractMedia (optExtractMedia opts)
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index b69e4e51e..b56b2c377 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -32,16 +32,18 @@ import Data.Char (isLower, toLower)
 import Data.Maybe (fromMaybe)
 import GHC.Generics hiding (Meta)
 import Text.Pandoc.Filter (Filter (..))
-import Text.Pandoc.Logging (Verbosity (WARNING))
+import Text.Pandoc.Logging (Verbosity (WARNING), LogMessage(..))
 import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault),
                             TrackChanges (AcceptChanges),
                             WrapOption (WrapAuto), HTMLMathMethod (PlainMath),
                             ReferenceLocation (EndOfDocument),
                             ObfuscationMethod (NoObfuscation),
                             CiteMethod (Citeproc))
-import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, PandocMonad)
+import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, report,
+                          PandocMonad(lookupEnv), getUserDataDir)
 import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError))
-import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDirs, findM, ordNub)
+import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDir,
+                           findM, ordNub)
 import qualified Text.Pandoc.Parsing as P
 import Text.Pandoc.Readers.Metadata (yamlMap)
 import Text.Pandoc.Class.PandocPure
@@ -176,17 +178,120 @@ instance (PandocMonad m, MonadIO m)
     dataDir <- case M.lookup "data-dir" opts of
       Nothing -> return Nothing
       Just v -> Just . unpack <$> parseYAML v
-    f <- parseOptions $ M.toList m
+    f <- parseOptions (M.toList m)
     case M.lookup "defaults" opts of
       Just v -> do
         g <- parseDefaults v dataDir
-        return  $ g >=> f
-      Nothing -> return f
+        return  $ g >=> f >=> resolveVarsInOpt
+      Nothing -> return $ f >=> resolveVarsInOpt
     where
       toText (Scalar _ (SStr s)) = s
       toText _ = ""
   parseYAML n = failAtNode n "Expected a mapping"
 
+resolveVarsInOpt :: (PandocMonad m, MonadIO m) => Opt -> m Opt
+resolveVarsInOpt
+    opt@Opt
+    { optTemplate              = oTemplate
+    , optMetadataFiles         = oMetadataFiles
+    , optOutputFile            = oOutputFile
+    , optInputFiles            = oInputFiles
+    , optSyntaxDefinitions     = oSyntaxDefinitions
+    , optAbbreviations         = oAbbreviations
+    , optReferenceDoc          = oReferenceDoc
+    , optEpubMetadata          = oEpubMetadata
+    , optEpubFonts             = oEpubFonts
+    , optEpubCoverImage        = oEpubCoverImage
+    , optLogFile               = oLogFile
+    , optFilters               = oFilters
+    , optDataDir               = oDataDir
+    , optExtractMedia          = oExtractMedia
+    , optCss                   = oCss
+    , optIncludeBeforeBody     = oIncludeBeforeBody
+    , optIncludeAfterBody      = oIncludeAfterBody
+    , optIncludeInHeader       = oIncludeInHeader
+    , optResourcePath          = oResourcePath
+    , optCSL                   = oCSL
+    , optBibliography          = oBibliography
+    , optCitationAbbreviations = oCitationAbbreviations
+    }
+  = do
+      oTemplate' <- mapM resolveVars oTemplate
+      oMetadataFiles' <- mapM resolveVars oMetadataFiles
+      oOutputFile' <- mapM resolveVars oOutputFile
+      oInputFiles' <- mapM (mapM resolveVars) oInputFiles
+      oSyntaxDefinitions' <- mapM resolveVars oSyntaxDefinitions
+      oAbbreviations' <- mapM resolveVars oAbbreviations
+      oReferenceDoc' <- mapM resolveVars oReferenceDoc
+      oEpubMetadata' <- mapM resolveVars oEpubMetadata
+      oEpubFonts' <- mapM resolveVars oEpubFonts
+      oEpubCoverImage' <- mapM resolveVars oEpubCoverImage
+      oLogFile' <- mapM resolveVars oLogFile
+      oFilters' <- mapM resolveVarsInFilter oFilters
+      oDataDir' <- mapM resolveVars oDataDir
+      oExtractMedia' <- mapM resolveVars oExtractMedia
+      oCss' <- mapM resolveVars oCss
+      oIncludeBeforeBody' <- mapM resolveVars oIncludeBeforeBody
+      oIncludeAfterBody' <- mapM resolveVars oIncludeAfterBody
+      oIncludeInHeader' <- mapM resolveVars oIncludeInHeader
+      oResourcePath' <- mapM resolveVars oResourcePath
+      oCSL' <- mapM resolveVars oCSL
+      oBibliography' <- mapM resolveVars oBibliography
+      oCitationAbbreviations' <- mapM resolveVars oCitationAbbreviations
+      return opt{ optTemplate              = oTemplate'
+                , optMetadataFiles         = oMetadataFiles'
+                , optOutputFile            = oOutputFile'
+                , optInputFiles            = oInputFiles'
+                , optSyntaxDefinitions     = oSyntaxDefinitions'
+                , optAbbreviations         = oAbbreviations'
+                , optReferenceDoc          = oReferenceDoc'
+                , optEpubMetadata          = oEpubMetadata'
+                , optEpubFonts             = oEpubFonts'
+                , optEpubCoverImage        = oEpubCoverImage'
+                , optLogFile               = oLogFile'
+                , optFilters               = oFilters'
+                , optDataDir               = oDataDir'
+                , optExtractMedia          = oExtractMedia'
+                , optCss                   = oCss'
+                , optIncludeBeforeBody     = oIncludeBeforeBody'
+                , optIncludeAfterBody      = oIncludeAfterBody'
+                , optIncludeInHeader       = oIncludeInHeader'
+                , optResourcePath          = oResourcePath'
+                , optCSL                   = oCSL'
+                , optBibliography          = oBibliography'
+                , optCitationAbbreviations = oCitationAbbreviations'
+                }
+
+ where
+  resolveVars [] = return []
+  resolveVars ('$':'{':xs) =
+    let (ys, zs) = break (=='}') xs
+     in if null zs
+           then return $ '$':'{':xs
+           else do
+             val <- lookupEnv' ys
+             (val ++) <$> resolveVars (drop 1 zs)
+  resolveVars (c:cs) = (c:) <$> resolveVars cs
+  lookupEnv' "USERDATA" = do
+    mbodatadir <- mapM resolveVars oDataDir
+    mbdatadir  <- getUserDataDir
+    defdatadir <- liftIO defaultUserDataDir
+    return $ fromMaybe defdatadir (mbodatadir <|> mbdatadir)
+  lookupEnv' v = do
+    mbval <- fmap T.unpack <$> lookupEnv (T.pack v)
+    case mbval of
+      Nothing -> do
+        report $ EnvironmentVariableUndefined (T.pack v)
+        return mempty
+      Just x  -> return x
+  resolveVarsInFilter (JSONFilter fp) =
+    JSONFilter <$> resolveVars fp
+  resolveVarsInFilter (LuaFilter fp) =
+    LuaFilter <$> resolveVars fp
+  resolveVarsInFilter CiteprocFilter = return CiteprocFilter
+
+
+
 parseDefaults :: (PandocMonad m, MonadIO m)
               => Node Pos
               -> Maybe FilePath
-- 
cgit v1.2.3


From df00cf05cbf817b7d8e7de0a4a220dd70f8a9608 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 5 Mar 2021 11:56:41 -0800
Subject: Allow `${.}` in defaults files paths...

to refer to the directory where the default file is.
This will make it possible to create moveable
"packages" of resources in a directory.

Closes #5871.
---
 MANUAL.txt                 | 13 +++++++++++++
 src/Text/Pandoc/App/Opt.hs | 13 +++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index f06293dd3..8e98ee1eb 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1684,6 +1684,19 @@ user data directory that is current when the defaults file is
 parsed, regardless of the setting of the environment
 variable `USERDATA`.
 
+`${.}` will resolve to the directory containing the default
+file itself.  This allows you to refer to resources contained
+in that directory:
+
+``` yaml
+epub-cover-image: ${.}/cover.jpg
+epub-metadata: ${.}/meta.xml
+resource-path:
+- .             # the working directory from which pandoc is run
+- ${.}/images   # the images subdirectory of the directory
+                # containing this defaults file
+```
+
 This environment variable interpolation syntax *only* works in
 fields that expect file paths.
 
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index b56b2c377..d09a6afc0 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -27,7 +27,8 @@ module Text.Pandoc.App.Opt (
           ) where
 import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM)
 import Control.Monad.State.Strict (StateT, modify, gets)
-import System.FilePath ( addExtension, (</>), takeExtension )
+import System.FilePath ( addExtension, (</>), takeExtension, takeDirectory )
+import System.Directory ( canonicalizePath )
 import Data.Char (isLower, toLower)
 import Data.Maybe (fromMaybe)
 import GHC.Generics hiding (Meta)
@@ -189,7 +190,8 @@ instance (PandocMonad m, MonadIO m)
       toText _ = ""
   parseYAML n = failAtNode n "Expected a mapping"
 
-resolveVarsInOpt :: (PandocMonad m, MonadIO m) => Opt -> m Opt
+resolveVarsInOpt :: forall m. (PandocMonad m, MonadIO m)
+                 => Opt -> StateT DefaultsState m Opt
 resolveVarsInOpt
     opt@Opt
     { optTemplate              = oTemplate
@@ -263,6 +265,7 @@ resolveVarsInOpt
                 }
 
  where
+  resolveVars :: FilePath -> StateT DefaultsState m FilePath
   resolveVars [] = return []
   resolveVars ('$':'{':xs) =
     let (ys, zs) = break (=='}') xs
@@ -272,6 +275,12 @@ resolveVarsInOpt
              val <- lookupEnv' ys
              (val ++) <$> resolveVars (drop 1 zs)
   resolveVars (c:cs) = (c:) <$> resolveVars cs
+  lookupEnv' :: String -> StateT DefaultsState m String
+  lookupEnv' "." = do
+    mbCurDefaults <- gets curDefaults
+    maybe (return "")
+          (fmap takeDirectory . liftIO . canonicalizePath)
+          mbCurDefaults
   lookupEnv' "USERDATA" = do
     mbodatadir <- mapM resolveVars oDataDir
     mbdatadir  <- getUserDataDir
-- 
cgit v1.2.3


From 735a69de6b94e32ee9aceb44a893de4067f24354 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 6 Mar 2021 10:32:51 -0800
Subject: Allow `--resource-path` to accumulate.

Previously, if `--resource-path` were used multiple times, the last
resource path would replace the others.

With this change, each time `--resource-path` is used, it prepends
the specified path components to the existing resource path.

Similarly, when `resource-path` is specified in a defaults file,
the paths provided will be prepended to the existing resource
path.

This change also allows one to avoid using the OS-specific path
separator; instead, one can simply use `--resource-path`
a number of times with single paths. This form of command
will not have an OS-dependent behavior.

This change facilitates the use of multiple, small defaults
files: each can specify a directory containing its own
resources without clobbering the resource paths set by
the others.

Closes #6152.
---
 MANUAL.txt                                | 5 +++++
 src/Text/Pandoc/App/CommandLineOptions.hs | 3 ++-
 src/Text/Pandoc/App/Opt.hs                | 3 ++-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 8e98ee1eb..a5036b361 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -891,6 +891,11 @@ header when requesting a document from a URL:
     will not be searched.  For example:
     `--resource-path=.:test` will search the working directory
     and the `test` subdirectory, in that order.
+    This option can be used repeatedly. Search path components
+    that come later on the command line will be searched before
+    those that come earlier, so
+    `--resource-path foo:bar --resource-path baz:bim` is
+    equivalent to `--resource-path baz:bim:foo:bar`.
 
 `--request-header=`*NAME*`:`*VAL*
 
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index ac92db0ae..b4483f756 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -288,7 +288,8 @@ options =
     , Option "" ["resource-path"]
                 (ReqArg
                   (\arg opt -> return opt { optResourcePath =
-                                   splitSearchPath arg })
+                                   splitSearchPath arg ++
+                                    optResourcePath opt })
                    "SEARCHPATH")
                   "" -- "Paths to search for images and other resources"
 
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index d09a6afc0..e5aaec9c5 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -583,7 +583,8 @@ doOpt (k',v) = do
                                 optIncludeInHeader o <> [unpack x] }))
     "resource-path" ->
       parseYAML v >>= \x ->
-             return (\o -> o{ optResourcePath = map unpack x })
+             return (\o -> o{ optResourcePath = map unpack x <>
+                                 optResourcePath o })
     "request-headers" ->
       parseYAML v >>= \x ->
              return (\o -> o{ optRequestHeaders = x })
-- 
cgit v1.2.3


From c652dcc16b208954ab30a27b1c72a889b20ef2e9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 7 Mar 2021 13:21:45 -0800
Subject: LaTeX reader: support hyperref command.

Closes #7127.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 552411db8..ceac261d2 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -398,17 +398,20 @@ inlineCommands = M.unions
     , ("footnote", skipopts >> note <$> grouped block)
     , ("passthrough", tok) -- \passthrough macro used by latex writer
                            -- for listings
+    , ("includegraphics", do options <- option [] keyvals
+                             src <- braced
+                             mkImage options .
+                               unescapeURL .
+                               removeDoubleQuotes $ untokenize src)
+    -- hyperref
     , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$>
                     bracedUrl)
     , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl)
     , ("href", do url <- bracedUrl
                   sp
                   link (unescapeURL $ untokenize url) "" <$> tok)
-    , ("includegraphics", do options <- option [] keyvals
-                             src <- braced
-                             mkImage options . unescapeURL . removeDoubleQuotes $
-                                 untokenize src)
     , ("hyperlink", hyperlink)
+    , ("hyperref", hyperref)
     , ("hypertarget", hypertargetInline)
     -- hyphenat
     , ("nohyphens", tok)
@@ -463,6 +466,12 @@ hyperlink = try $ do
   lab <- tok
   return $ link ("#" <> src) "" lab
 
+hyperref :: PandocMonad m => LP m Inlines
+hyperref = try $ do
+  url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp))
+       <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl)
+  link url "" <$> tok
+
 hypertargetBlock :: PandocMonad m => LP m Blocks
 hypertargetBlock = try $ do
   ref <- untokenize <$> braced
-- 
cgit v1.2.3


From 5aa73bd0a2820a0c89b5990dbe53abfdd5ade32d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 7 Mar 2021 15:49:02 -0800
Subject: LaTeX reader: handle table cells containing `&` in `\verb`.

Closes #7129.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs |  7 ++++++-
 test/command/7129.md                   | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7129.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 2ea9caf58..7833da081 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -134,6 +134,11 @@ parseTableRow :: PandocMonad m
               -> LP m Row
 parseTableRow blocks inline envname prefsufs = do
   notFollowedBy (spaces *> end_ envname)
+  -- contexts that can contain & that is not colsep:
+  let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
+      canContainAmp (Tok _ (CtrlSeq "verb") _)  = True
+      canContainAmp (Tok _ (CtrlSeq "Verb") _)  = True
+      canContainAmp _       = False
   -- add prefixes and suffixes in token stream:
   let celltoks (pref, suff) = do
         prefpos <- getPosition
@@ -142,7 +147,7 @@ parseTableRow blocks inline envname prefsufs = do
                      ((lookAhead (controlSeq "parbox") >>
                        void blocks) -- #5711
                       <|>
-                      (lookAhead (controlSeq "begin") >> void inline)
+                      (lookAhead (satisfyTok canContainAmp) >> void inline)
                       <|>
                       (lookAhead (symbol '$') >> void inline))
                   <|>
diff --git a/test/command/7129.md b/test/command/7129.md
new file mode 100644
index 000000000..fef4ca2c3
--- /dev/null
+++ b/test/command/7129.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f latex -t native
+\begin{tabular}{ll} \hline
+  FOO & BAR         \\ \hline
+  foo & \verb|b&r|  \\ \hline
+\end{tabular}
+^D
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignLeft,ColWidthDefault)
+ ,(AlignLeft,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "FOO"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "BAR"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "foo"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Code ("",[],[]) "b&r"]]]])]
+ (TableFoot ("",[],[])
+ [])]
+```
-- 
cgit v1.2.3


From eb184d9148e8a3e8c896a71550b1f0bee8da9a21 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 8 Mar 2021 12:40:25 +0100
Subject: Jira writer: use noformat instead of code for unknown languages.

Code blocks that are not marked as a language supported by Jira are
rendered as preformatted text with `{noformat}` blocks.

Fixes: tarleb/jira-wiki-markup#4
---
 src/Text/Pandoc/Writers/Jira.hs | 10 ++++----
 test/Tests/Writers/Jira.hs      | 10 ++++++++
 test/writer.jira                | 55 +++++++++++++++++------------------------
 3 files changed, 37 insertions(+), 38 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index 131896201..a714dac2e 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -126,13 +126,13 @@ toJiraCode :: PandocMonad m
            -> Text
            -> JiraConverter m [Jira.Block]
 toJiraCode (ident, classes, _attribs) code = do
-  let lang = case find (\c -> T.toLower c `elem` knownLanguages) classes of
-               Nothing -> Jira.Language "java"
-               Just l  -> Jira.Language l
   let addAnchor b = if T.null ident
                     then b
                     else [Jira.Para (singleton (Jira.Anchor ident))] <> b
-  return . addAnchor . singleton $ Jira.Code lang mempty code
+  return . addAnchor . singleton $
+    case find (\c -> T.toLower c `elem` knownLanguages) classes of
+      Nothing -> Jira.NoFormat mempty code
+      Just l  -> Jira.Code (Jira.Language l) mempty code
 
 -- | Creates a Jira definition list
 toJiraDefinitionList :: PandocMonad m
@@ -310,7 +310,7 @@ registerNotes contents = do
 knownLanguages :: [Text]
 knownLanguages =
   [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++"
-  , "css", "erlang", "go", "groovy", "haskell", "html", "javascript"
+  , "css", "erlang", "go", "groovy", "haskell", "html", "java", "javascript"
   , "json", "lua", "nyan", "objc", "perl", "php", "python", "r", "ruby"
   , "scala", "sql", "swift", "visualbasic", "xml", "yaml"
   ]
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index aff8348d4..b618c3970 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -68,5 +68,15 @@ tests =
         spanWith ("unicorn", [], []) (str "Unicorn") =?>
         "{anchor:unicorn}Unicorn"
       ]
+
+    , testGroup "code"
+      [ "code block with known language" =:
+        codeBlockWith ("", ["java"], []) "Book book = new Book(\"Algebra\")" =?>
+        "{code:java}\nBook book = new Book(\"Algebra\")\n{code}"
+
+      , "code block without language" =:
+        codeBlockWith ("", [], []) "preformatted\n  text.\n" =?>
+        "{noformat}\npreformatted\n  text.\n{noformat}"
+      ]
     ]
   ]
diff --git a/test/writer.jira b/test/writer.jira
index aff0dc320..71b720d65 100644
--- a/test/writer.jira
+++ b/test/writer.jira
@@ -33,11 +33,10 @@ bq. This is a block quote. It is pretty short.
 {quote}
 Code in a block quote:
 
-{code:java}
+{noformat}
 sub status {
     print "working";
-}
-{code}
+}{noformat}
 A list:
 
 # item one
@@ -56,22 +55,20 @@ And a following paragraph.
 h1. {anchor:code-blocks}Code Blocks
 Code:
 
-{code:java}
+{noformat}
 ---- (should be four hyphens)
 
 sub status {
     print "working";
 }
 
-this code block is indented by one tab
-{code}
+this code block is indented by one tab{noformat}
 And:
 
-{code:java}
+{noformat}
     this code block is indented by two tabs
 
-These should not be escaped:  \$ \\ \> \[ \{
-{code}
+These should not be escaped:  \$ \\ \> \[ \{{noformat}
 ----
 h1. {anchor:lists}Lists
 h2. {anchor:unordered}Unordered
@@ -236,9 +233,8 @@ red fruit
 contains seeds, crisp, pleasant to taste
 * *_orange_*
 orange fruit
-{code:java}
-{ orange code block }
-{code}
+{noformat}
+{ orange code block }{noformat}
 bq. orange block quote
 
 Multiple definitions, tight:
@@ -292,16 +288,14 @@ foo
 
 This should be a code block, though:
 
-{code:java}
+{noformat}
 <div>
     foo
-</div>
-{code}
+</div>{noformat}
 As should this:
 
-{code:java}
-<div>foo</div>
-{code}
+{noformat}
+<div>foo</div>{noformat}
 Now, nested:
 
 foo
@@ -312,16 +306,14 @@ Multiline:
 
 Code block:
 
-{code:java}
-<!-- Comment -->
-{code}
+{noformat}
+<!-- Comment -->{noformat}
 Just plain comment, with trailing spaces on the line:
 
 Code:
 
-{code:java}
-<hr />
-{code}
+{noformat}
+<hr />{noformat}
 Hr’s:
 
 ----
@@ -478,9 +470,8 @@ Indented [thrice|/url].
 
 This should \[not\]\[\] be a link.
 
-{code:java}
-[not]: /url
-{code}
+{noformat}
+[not]: /url{noformat}
 Foo [bar|/url/].
 
 Foo [biz|/url/].
@@ -506,9 +497,8 @@ An e-mail address: [mailto:nobody@nowhere.net]
 bq. Blockquoted: [http://example.com/]
 Auto-links should not occur here: {{<http://example.com/>}}
 
-{code:java}
-or here: <http://example.com/>
-{code}
+{noformat}
+or here: <http://example.com/>{noformat}
 ----
 h1. {anchor:images}Images
 From "Voyage dans la Lune" by Georges Melies \(1902):
@@ -534,9 +524,8 @@ This paragraph should not be part of the note, as it is not indented.
 
 Subsequent blocks are indented to show that they belong to the footnote \(as with list items).
 
-{code:java}
-  { <code> }
-{code}
+{noformat}
+  { <code> }{noformat}
 If you want, you can indent every line, but you can also be lazy and just indent the first line of each block.
 
 
-- 
cgit v1.2.3


From b9b2586ed3e9aac9c5ba86127fbf984fb3149844 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 9 Mar 2021 18:01:08 +0100
Subject: Org writer: prevent unintended creation of ordered list items

Adjust line wrapping if default wrapping would cause a line to be read
as an ordered list item.

Fixes #7132
---
 src/Text/Pandoc/Writers/Org.hs | 11 +++++++----
 test/command/7132.md           | 10 ++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 test/command/7132.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 8dfc2749c..1b525831e 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -17,7 +17,7 @@ Org-Mode:  <http://orgmode.org>
 -}
 module Text.Pandoc.Writers.Org (writeOrg) where
 import Control.Monad.State.Strict
-import Data.Char (isAlphaNum)
+import Data.Char (isAlphaNum, isDigit)
 import Data.List (intersect, intersperse, partition, transpose)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -347,16 +347,19 @@ inlineListToOrg :: PandocMonad m
                 => [Inline]
                 -> Org m (Doc Text)
 inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst)
-  where fixMarkers [] = []  -- prevent note refs and list markers from wrapping, see #4171
+  where -- Prevent note refs and list markers from wrapping, see #4171
+        -- and #7132.
+        fixMarkers [] = []
         fixMarkers (Space : x : rest) | shouldFix x =
           Str " " : x : fixMarkers rest
         fixMarkers (SoftBreak : x : rest) | shouldFix x =
           Str " " : x : fixMarkers rest
         fixMarkers (x : rest) = x : fixMarkers rest
 
-        shouldFix Note{} = True -- Prevent footnotes
+        shouldFix Note{} = True    -- Prevent footnotes
         shouldFix (Str "-") = True -- Prevent bullet list items
-        -- TODO: prevent ordered list items
+        shouldFix (Str x)          -- Prevent ordered list items
+          | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && c == '.' || c == ')'
         shouldFix _ = False
 
 -- | Convert Pandoc inline element to Org.
diff --git a/test/command/7132.md b/test/command/7132.md
new file mode 100644
index 000000000..30d4c0b3b
--- /dev/null
+++ b/test/command/7132.md
@@ -0,0 +1,10 @@
+```
+% pandoc -f markdown -t org --columns=72
+- This line has exactly the wrong number of characters before the number 5.
+- Long line ending with a number (this time it is in parentheses and a 23)
+^D
+- This line has exactly the wrong number of characters before the
+  number 5.
+- Long line ending with a number (this time it is in parentheses and
+  a 23)
+```
-- 
cgit v1.2.3


From d7f8fbf04b3d58b6b10d1a8db32c3c7e50b614c1 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 9 Mar 2021 21:15:16 +0100
Subject: Org writer: fix operator precedence mistake in previous commit

---
 src/Text/Pandoc/Writers/Org.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 1b525831e..29d58a161 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -359,7 +359,8 @@ inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst)
         shouldFix Note{} = True    -- Prevent footnotes
         shouldFix (Str "-") = True -- Prevent bullet list items
         shouldFix (Str x)          -- Prevent ordered list items
-          | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && c == '.' || c == ')'
+          | Just (cs, c) <- T.unsnoc x = T.all isDigit cs &&
+                                         (c == '.' || c == ')')
         shouldFix _ = False
 
 -- | Convert Pandoc inline element to Org.
-- 
cgit v1.2.3


From 1c23e3a824bc4d850a908193443ad1915a3ebc61 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 9 Mar 2021 13:03:27 -0800
Subject: RST reader: fix logic for ending comments.

Previously comments sometimes got extended too far.  Closes #7134.
---
 src/Text/Pandoc/Readers/RST.hs |  3 ++-
 test/command/7134.md           | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7134.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 29f81b046..514e3b88d 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -613,8 +613,9 @@ comment = try $ do
   string ".."
   skipMany1 spaceChar <|> (() <$ lookAhead newline)
   -- notFollowedBy' directiveLabel -- comment comes after directive so unnec.
-  manyTill anyChar blanklines
+  _ <- anyLine
   optional indentedBlock
+  optional blanklines
   return mempty
 
 directiveLabel :: Monad m => RSTParser m Text
diff --git a/test/command/7134.md b/test/command/7134.md
new file mode 100644
index 000000000..c69ae4bd0
--- /dev/null
+++ b/test/command/7134.md
@@ -0,0 +1,16 @@
+```
+% pandoc -f rst -t native
+This is a paragraph.
+
+    This is a block quote.
+
+..
+
+    This should be a second block quote.
+^D
+[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph."]
+,BlockQuote
+ [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."]]
+,BlockQuote
+ [Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "second",Space,Str "block",Space,Str "quote."]]]
+```
-- 
cgit v1.2.3


From 5608dc01e5342d367fba3377042dec3944f4d86f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 10 Mar 2021 10:19:40 -0800
Subject: HTML writer: Add warnings on duplicate attribute values.

This prevents emitting invalid HTML.

Ultimately it would be good to prevent this in the types
themselves, but this is better for now.

T.P.Logging: Add DuplicateAttribute constructor to LogMessage.
[API change]
---
 src/Text/Pandoc/Logging.hs           |  8 +++++++
 src/Text/Pandoc/Writers/HTML.hs      | 41 ++++++++++++++++++++----------------
 test/command/duplicate_attributes.md |  7 ++++++
 3 files changed, 38 insertions(+), 18 deletions(-)
 create mode 100644 test/command/duplicate_attributes.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 1d7bc9d66..efd2188f1 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -36,6 +36,7 @@ import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
 import Text.Pandoc.Definition
 import Text.Parsec.Pos
+import Text.Pandoc.Shared (tshow)
 
 -- | Verbosity level.
 data Verbosity = ERROR | WARNING | INFO
@@ -101,6 +102,7 @@ data LogMessage =
   | CiteprocWarning Text
   | ATXHeadingInLHS Int Text
   | EnvironmentVariableUndefined Text
+  | DuplicateAttribute Text Text
   deriving (Show, Eq, Data, Ord, Typeable, Generic)
 
 instance ToJSON LogMessage where
@@ -232,6 +234,9 @@ instance ToJSON LogMessage where
            ,"contents" .= contents]
       EnvironmentVariableUndefined var ->
            ["variable" .= var ]
+      DuplicateAttribute attr val ->
+           ["attribute" .= attr
+           ,"value" .= val]
 
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
@@ -350,6 +355,8 @@ showLogMessage msg =
             else ""
        EnvironmentVariableUndefined var ->
          "Undefined environment variable " <> var <> " in defaults file."
+       DuplicateAttribute attr val ->
+         "Ignoring duplicate attribute " <> attr <> "=" <> tshow val <> "."
 
 messageVerbosity :: LogMessage -> Verbosity
 messageVerbosity msg =
@@ -397,3 +404,4 @@ messageVerbosity msg =
        CiteprocWarning{}             -> WARNING
        ATXHeadingInLHS{}             -> WARNING
        EnvironmentVariableUndefined{}-> WARNING
+       DuplicateAttribute{}          -> WARNING
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 26df0325e..2f33cd467 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -32,7 +32,7 @@ import Control.Monad.State.Strict
 import Data.Char (ord)
 import Data.List (intercalate, intersperse, partition, delete, (\\))
 import Data.List.NonEmpty (NonEmpty((:|)))
-import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe)
+import Data.Maybe (fromMaybe, isJust, isNothing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -551,23 +551,28 @@ toAttrs :: PandocMonad m
 toAttrs kvs = do
   html5 <- gets stHtml5
   mbEpubVersion <- gets stEPUBVersion
-  return $ mapMaybe (\(x,y) ->
-            if html5
-               then
-                  if x `Set.member` (html5Attributes <> rdfaAttributes)
-                     || T.any (== ':') x -- e.g. epub: namespace
-                     || "data-" `T.isPrefixOf` x
-                     || "aria-" `T.isPrefixOf` x
-                     then Just $ customAttribute (textTag x) (toValue y)
-                     else Just $ customAttribute (textTag ("data-" <> x))
-                                  (toValue y)
-               else
-                 if mbEpubVersion == Just EPUB2 &&
-                    not (x `Set.member` (html4Attributes <> rdfaAttributes) ||
-                         "xml:" `T.isPrefixOf` x)
-                    then Nothing
-                    else Just $ customAttribute (textTag x) (toValue y))
-            kvs
+  reverse . snd <$> foldM (go html5 mbEpubVersion) (Set.empty, []) kvs
+ where
+  go html5 mbEpubVersion (keys, attrs) (k,v) = do
+    if k `Set.member` keys
+       then do
+         report $ DuplicateAttribute k v
+         return (keys, attrs)
+       else return (Set.insert k keys, addAttr html5 mbEpubVersion k v attrs)
+  addAttr html5 mbEpubVersion x y
+    | html5
+      = if x `Set.member` (html5Attributes <> rdfaAttributes)
+             || T.any (== ':') x -- e.g. epub: namespace
+             || "data-" `T.isPrefixOf` x
+             || "aria-" `T.isPrefixOf` x
+           then (customAttribute (textTag x) (toValue y) :)
+           else (customAttribute (textTag ("data-" <> x)) (toValue y) :)
+    | mbEpubVersion == Just EPUB2
+    , not (x `Set.member` (html4Attributes <> rdfaAttributes) ||
+      "xml:" `T.isPrefixOf` x)
+      = id
+    | otherwise
+      = (customAttribute (textTag x) (toValue y) :)
 
 attrsToHtml :: PandocMonad m
             => WriterOptions -> Attr -> StateT WriterState m [Attribute]
diff --git a/test/command/duplicate_attributes.md b/test/command/duplicate_attributes.md
new file mode 100644
index 000000000..b6e8a4c21
--- /dev/null
+++ b/test/command/duplicate_attributes.md
@@ -0,0 +1,7 @@
+```
+% pandoc
+[span]{.foobar style="color:blue" class="zip" style="color:red"}
+^D
+[WARNING] Ignoring duplicate attribute style="color:red".
+<p><span class="foobar zip" style="color:blue">span</span></p>
+```
-- 
cgit v1.2.3


From 92ffd374754e28939a855fe84fb5455cb91383fa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 11 Mar 2021 15:58:37 -0800
Subject: Simplify compactDL.

---
 src/Text/Pandoc/Shared.hs | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 2aba9b2e1..d11ad13f5 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -464,22 +464,20 @@ plainToPara :: Block -> Block
 plainToPara (Plain ils) = Para ils
 plainToPara x = x
 
+
 -- | Like @compactify@, but acts on items of definition lists.
 compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
 compactifyDL items =
-  let defs = concatMap snd items
-  in  case reverse (concatMap B.toList defs) of
-           (Para x:xs)
-             | not (any isPara xs) ->
-                   let (t,ds) = last items
-                       lastDef = B.toList $ last ds
-                       ds' = init ds ++
-                             if null lastDef
-                                then [B.fromList lastDef]
-                                else [B.fromList $ init lastDef ++ [Plain x]]
-                    in init items ++ [(t, ds')]
-             | otherwise           -> items
-           _                       -> items
+  case reverse items of
+        ((t,ds):ys) ->
+           case reverse (map (reverse . B.toList) ds) of
+             ((Para x:xs) : zs) | not (any isPara xs) ->
+                  reverse ys ++
+                    [(t, reverse (map B.fromList zs) ++
+                         [B.fromList (reverse (Plain x:xs))])]
+             _     -> items
+        _          -> items
+
 
 -- | Combine a list of lines by adding hard linebreaks.
 combineLines :: [[Inline]] -> [Inline]
-- 
cgit v1.2.3


From 894ed8ebb01ce11cbcc6afc13d00d6ac1dd05ed0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 12 Mar 2021 11:57:31 -0800
Subject: Citeproc: apply fixLinks correctly.

This is code that incorporates a prefix like `https://doi.org/`
into a following link when appropriate.  But it didn't work because
we were walking with a `[Inline] -> [Inline]` function on an `Inlines`.
Changed the point of application of `fixLink` to resolve the issue.
Closes #7130.
---
 src/Text/Pandoc/Citeproc.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 38e992ba1..af302f782 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -49,7 +49,6 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
--- import Debug.Trace as Trace (trace, traceShowId)
 
 
 processCitations  :: PandocMonad m => Pandoc -> m Pandoc
@@ -91,11 +90,12 @@ processCitations (Pandoc meta bs) = do
                    _ -> id) $ []
   let bibs = mconcat $ map (\(ident, out) ->
                      B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para .
-                       walk (convertQuotes locale) .  insertSpace $ out)
+                         walk (convertQuotes locale) .
+                         insertSpace $ out)
                       (resultBibliography result)
   let moveNotes = maybe True truish $
                         lookupMeta "notes-after-punctuation" meta
-  let cits = map (walk fixLinks . walk (convertQuotes locale)) $
+  let cits = map (walk (convertQuotes locale)) $
                resultCitations result
 
   let fixQuotes = case localePunctuationInQuote locale of
@@ -294,7 +294,7 @@ insertResolvedCitations (Cite cs ils) = do
     [] -> return (Cite cs ils)
     (x:xs) -> do
       put xs
-      return $ Cite cs (B.toList x)
+      return $ Cite cs (walk fixLinks $ B.toList x)
 insertResolvedCitations x = return x
 
 getCitations :: Locale
@@ -431,7 +431,7 @@ mvPunct _ _ [] = []
 -- move https://doi.org etc. prefix inside link text (#6723):
 fixLinks :: [Inline] -> [Inline]
 fixLinks (Str t : Link attr [Str u1] (u2,tit) : xs)
-  | t <> u1 == u2
+  | u2 == t <> u1
   = Link attr [Str (t <> u1)] (u2,tit) : fixLinks xs
 fixLinks (x:xs) = x : fixLinks xs
 fixLinks [] = []
-- 
cgit v1.2.3


From a8aa301428752d96cdd58d7f4ecaa7d054f3505d Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Mar 2021 12:10:02 +0100
Subject: Jira writer: improve div/panel handling

Include div attributes in panels, always render divs with class `panel`
as panels, and avoid nesting of panels.
---
 src/Text/Pandoc/Writers/Jira.hs | 39 ++++++++++++++++++++++++++++-----------
 test/Tests/Writers/Jira.hs      | 30 ++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index a714dac2e..aa78d9419 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -39,11 +39,17 @@ writeJira :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeJira opts = runDefaultConverter (writerWrapText opts) (pandocToJira opts)
 
 -- | State to keep track of footnotes.
-newtype ConverterState = ConverterState { stNotes :: [Text] }
+data ConverterState = ConverterState
+  { stNotes   :: [Text] -- ^ Footnotes to be appended to the end of the text
+  , stInPanel :: Bool   -- ^ whether we are in a @{panel}@ block
+  }
 
 -- | Initial converter state.
 startState :: ConverterState
-startState = ConverterState { stNotes = [] }
+startState = ConverterState
+  { stNotes = []
+  , stInPanel = False
+  }
 
 -- | Converter monad
 type JiraConverter m = ReaderT WrapOption (StateT ConverterState m)
@@ -126,14 +132,20 @@ toJiraCode :: PandocMonad m
            -> Text
            -> JiraConverter m [Jira.Block]
 toJiraCode (ident, classes, _attribs) code = do
-  let addAnchor b = if T.null ident
-                    then b
-                    else [Jira.Para (singleton (Jira.Anchor ident))] <> b
-  return . addAnchor . singleton $
+  return . addAnchor ident . singleton $
     case find (\c -> T.toLower c `elem` knownLanguages) classes of
       Nothing -> Jira.NoFormat mempty code
       Just l  -> Jira.Code (Jira.Language l) mempty code
 
+-- | Prepends an anchor with the given identifier.
+addAnchor :: Text -> [Jira.Block] -> [Jira.Block]
+addAnchor ident =
+  if T.null ident
+  then id
+  else \case
+    Jira.Para xs : bs -> (Jira.Para (Jira.Anchor ident : xs) : bs)
+    bs                -> (Jira.Para (singleton (Jira.Anchor ident)) : bs)
+
 -- | Creates a Jira definition list
 toJiraDefinitionList :: PandocMonad m
                      => [([Inline], [[Block]])]
@@ -149,11 +161,16 @@ toJiraDefinitionList defItems = do
 toJiraPanel :: PandocMonad m
             => Attr -> [Block]
             -> JiraConverter m [Jira.Block]
-toJiraPanel attr blocks = do
-  jiraBlocks <- toJiraBlocks blocks
-  return $ if attr == nullAttr
-           then jiraBlocks
-           else singleton (Jira.Panel [] jiraBlocks)
+toJiraPanel (ident, classes, attribs) blocks = do
+  inPanel <- gets stInPanel
+  if inPanel || ("panel" `notElem` classes && null attribs)
+    then addAnchor ident <$> toJiraBlocks blocks
+    else do
+      modify $ \st -> st{ stInPanel = True }
+      jiraBlocks <- toJiraBlocks blocks
+      modify $ \st -> st{ stInPanel = inPanel }
+      let params = map (uncurry Jira.Parameter) attribs
+      return $ singleton (Jira.Panel params $ addAnchor ident jiraBlocks)
 
 -- | Creates a Jira header
 toJiraHeader :: PandocMonad m
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index b618c3970..0c6f48853 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -79,4 +79,34 @@ tests =
         "{noformat}\npreformatted\n  text.\n{noformat}"
       ]
     ]
+
+  , testGroup "blocks"
+    [ testGroup "div"
+      [ "empty attributes" =:
+        divWith nullAttr (para "interesting text") =?>
+        "interesting text"
+
+      , "just identifier" =:
+        divWith ("a", [], []) (para "interesting text") =?>
+        "{anchor:a}interesting text"
+
+      , "with class 'panel'" =:
+        divWith ("", ["panel"], []) (para "Contents!") =?>
+        "{panel}\nContents\\!\n{panel}\n"
+
+      , "panel with id" =:
+        divWith ("b", ["panel"], []) (para "text") =?>
+        "{panel}\n{anchor:b}text\n{panel}\n"
+
+      , "title attribute" =:
+        divWith ("", [], [("title", "Gimme!")]) (para "Contents!") =?>
+        "{panel:title=Gimme!}\nContents\\!\n{panel}\n"
+
+      , "nested panels" =:
+        let panelAttr = ("", ["panel"], [])
+        in divWith panelAttr (para "hi" <>
+                              divWith panelAttr (para "wassup?")) =?>
+        "{panel}\nhi\n\nwassup?\n{panel}\n"
+      ]
+    ]
   ]
-- 
cgit v1.2.3


From 00e8d0678edac34af7a412642fbf7d85442b15aa Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Mar 2021 14:28:30 +0100
Subject: Jira reader: mark divs created from panels with class "panel".

Closes: tarleb/jira-wiki-markup#2
---
 src/Text/Pandoc/Readers/Jira.hs | 4 ++--
 test/Tests/Readers/Jira.hs      | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 5a5d0ee1e..89aecbf56 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -71,10 +71,10 @@ toPandocCodeBlocks langMay params txt =
                   Nothing                   -> []
   in codeBlockWith ("", classes, map paramToPair params) txt
 
--- | Create a pandoc @'Div'@
+-- | Create a pandoc @'Div'@ from a panel.
 toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks
 toPandocDiv params =
-  divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks
+  divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks
 
 paramToPair :: Jira.Parameter -> (Text, Text)
 paramToPair (Jira.Parameter key value) = (key, value)
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index 189b504f7..cb7dde4ea 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -96,6 +96,12 @@ tests =
       simpleTable [para "Name"] [[para "Test"]]
     ]
 
+  , testGroup "panel"
+    [ "simple panel" =:
+      "{panel}\nInterviewer: Jane Doe{panel}\n" =?>
+      divWith ("", ["panel"], []) (para "Interviewer: Jane Doe")
+    ]
+
   , testGroup "inlines"
     [ "emphasis" =:
       "*quid pro quo*" =?>
-- 
cgit v1.2.3


From eed18d231cc706e27a1495d46e8c05dd18a0938f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Mar 2021 12:05:52 -0800
Subject: Use integral values for w:tblW in docx.

Cloess #7141.
---
 src/Text/Pandoc/Writers/Docx.hs            |   2 +-
 test/docx/golden/table_one_row.docx        | Bin 9908 -> 9906 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10227 -> 10225 bytes
 test/docx/golden/tables.docx               | Bin 10244 -> 10241 bytes
 4 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 89c71d773..c47bfb2ea 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1031,7 +1031,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
            map mkcell cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
   let fullrow = 5000 -- 100% specified in pct
-  let rowwidth = fullrow * sum widths
+  let (rowwidth :: Int) = round $ fullrow * sum widths
   let mkgridcol w = mknode "w:gridCol"
                        [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index a1d2323c2..1178f7c6e 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 2f3a831a7..84be2720f 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index af066107c..140366d8b 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 35b66a76718205c303f416bf0afc01c098e8a171 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Mar 2021 12:50:44 -0800
Subject: MediaWiki reader: Allow block-level content in notes (ref).

Closes #7145.
---
 src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++++++-
 test/command/7145.md                 | 12 ++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7145.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index cdb746c67..9f4d5e170 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -112,12 +112,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
 isBlockTag' :: Tag Text -> Bool
 isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
+isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
 isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
   t `notElem` eitherBlockOrInline
 isBlockTag' tag = isBlockTag tag
 
 isInlineTag' :: Tag Text -> Bool
 isInlineTag' (TagComment _) = True
+isInlineTag' (TagClose "ref") = False -- see below inlineTag
 isInlineTag' t              = not (isBlockTag' t)
 
 eitherBlockOrInline :: [Text]
@@ -554,11 +556,17 @@ variable = try $ do
   contents <- manyTillChar anyChar (try $ string "}}}")
   return $ "{{{" <> contents <> "}}}"
 
+singleParaToPlain :: Blocks -> Blocks
+singleParaToPlain bs =
+  case B.toList bs of
+    [Para ils] -> B.fromList [Plain ils]
+    _ -> bs
+
 inlineTag :: PandocMonad m => MWParser m Inlines
 inlineTag = do
   (tag, _) <- lookAhead $ htmlTag isInlineTag'
   case tag of
-       TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
+       TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
        TagOpen "nowiki" _ -> try $ do
           (_,raw) <- htmlTag (~== tag)
           if T.any (== '/') raw
diff --git a/test/command/7145.md b/test/command/7145.md
new file mode 100644
index 000000000..a04345890
--- /dev/null
+++ b/test/command/7145.md
@@ -0,0 +1,12 @@
+```
+% pandoc -f mediawiki -t native
+Maecenas at sapien tempor, pretium turpis ut, imperdiet augue.<ref>This is a multiline
+
+reference
+<i>with</i>
+empty
+
+linebreaks</ref>  Nulla ut massa eget ex venenatis lobortis id in eros.
+^D
+[Para [Str "Maecenas",Space,Str "at",Space,Str "sapien",Space,Str "tempor,",Space,Str "pretium",Space,Str "turpis",Space,Str "ut,",Space,Str "imperdiet",Space,Str "augue.",Note [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "multiline"],Para [Str "reference",SoftBreak,RawInline (Format "html") "<i>",Str "with",RawInline (Format "html") "</i>",SoftBreak,Str "empty"],Para [Str "linebreaks"]],Space,Str "Nulla",Space,Str "ut",Space,Str "massa",Space,Str "eget",Space,Str "ex",Space,Str "venenatis",Space,Str "lobortis",Space,Str "id",Space,Str "in",Space,Str "eros."]]
+```
-- 
cgit v1.2.3


From 35688c42627dce4641f4f61253e3d3786452b61a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 13 Mar 2021 22:03:36 +0100
Subject: T.P.App.FormatHeuristics: shorten code, improve docs.

---
 src/Text/Pandoc/App/FormatHeuristics.hs | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs
index 65a1a7b82..bdf8c6667 100644
--- a/src/Text/Pandoc/App/FormatHeuristics.hs
+++ b/src/Text/Pandoc/App/FormatHeuristics.hs
@@ -15,18 +15,24 @@ module Text.Pandoc.App.FormatHeuristics
   ) where
 
 import Data.Char (toLower)
+import Data.Foldable (asum)
 import Data.Text (Text)
 import System.FilePath (takeExtension)
 
--- Determine default format based on file extensions.
+-- | Determines default format based on file extensions; uses the format
+-- of the first extension that's associated with a format.
+--
+-- Examples:
+--
+-- > formatFromFilePaths ["text.unknown", "no-extension"]
+-- Nothing
+--
+-- > formatFromFilePaths ["my.md", "other.rst"]
+-- Just "markdown"
 formatFromFilePaths :: [FilePath] -> Maybe Text
-formatFromFilePaths [] = Nothing
-formatFromFilePaths (x:xs) =
-  case formatFromFilePath x of
-    Just f     -> Just f
-    Nothing    -> formatFromFilePaths xs
+formatFromFilePaths = asum . map formatFromFilePath
 
--- Determine format based on file extension
+-- | Determines format based on file extension.
 formatFromFilePath :: FilePath -> Maybe Text
 formatFromFilePath x =
   case takeExtension (map toLower x) of
-- 
cgit v1.2.3


From 3622097da360ab83112eb26b4f6231488d747a95 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 14 Mar 2021 00:09:36 -0800
Subject: Handle 'nocite' better with --biblatex and --natbib.

Previously the nocite metadata field was ignored with
these formats.  Now it populates a `nocite-ids` template
variable and causes a `\nocite` command to be issued.

Closes #4585.
---
 data/templates/default.latex     |  3 +++
 src/Text/Pandoc/Writers/LaTeX.hs | 12 ++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/data/templates/default.latex b/data/templates/default.latex
index 50fad2a49..830bf8012 100644
--- a/data/templates/default.latex
+++ b/data/templates/default.latex
@@ -372,6 +372,9 @@ $for(bibliography)$
 \addbibresource{$bibliography$}
 $endfor$
 $endif$
+$if(nocite-ids)$
+\nocite{$for(nocite-ids)$$it$$sep$, $endfor$}
+$endif$
 $if(csl-refs)$
 \newlength{\cslhangindent}
 \setlength{\cslhangindent}{1.5em}
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 180aaa44d..6a205a798 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE TupleSections       #-}
 {-# LANGUAGE PatternGuards       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
@@ -145,6 +146,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do
 
   let dirs = query (extract "dir") blocks
 
+  let nociteIds = query (\case
+                           Cite cs _ -> map citationId cs
+                           _         -> [])
+                    $ lookupMetaInlines "nocite" meta
+
   let context  =  defField "toc" (writerTableOfContents options) $
                   defField "toc-depth" (tshow
                                         (writerTOCDepth options -
@@ -177,9 +183,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                       else id) $
                   (case writerCiteMethod options of
                          Natbib   -> defField "biblio-title" biblioTitle .
-                                     defField "natbib" True
+                                     defField "natbib" True .
+                                     defField "nocite-ids" nociteIds
                          Biblatex -> defField "biblio-title" biblioTitle .
-                                     defField "biblatex" True
+                                     defField "biblatex" True .
+                                     defField "nocite-ids" nociteIds
                          _        -> id) $
                   defField "colorlinks" (any hasStringValue
                            ["citecolor", "urlcolor", "linkcolor", "toccolor",
-- 
cgit v1.2.3


From 24191a2a278c0dec30bacd66b78cbb8cc8d91324 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 15 Mar 2021 10:37:35 -0700
Subject: Use foldl' instead of foldl everywhere.

---
 src/Text/Pandoc/App/CommandLineOptions.hs           | 4 ++--
 src/Text/Pandoc/Citeproc/Locator.hs                 | 3 ++-
 src/Text/Pandoc/Class/PandocMonad.hs                | 3 ++-
 src/Text/Pandoc/Extensions.hs                       | 3 ++-
 src/Text/Pandoc/Lua/Filter.hs                       | 3 ++-
 src/Text/Pandoc/Readers/Docx/Combine.hs             | 4 ++--
 src/Text/Pandoc/Readers/HTML/TagCategories.hs       | 1 +
 src/Text/Pandoc/Readers/Markdown.hs                 | 6 +++---
 src/Text/Pandoc/Readers/Odt/Arrows/State.hs         | 4 ++--
 src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 4 ++--
 src/Text/Pandoc/Readers/Odt/StyleReader.hs          | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs                  | 4 ++--
 src/Text/Pandoc/Shared.hs                           | 4 ++--
 src/Text/Pandoc/Writers/HTML.hs                     | 8 ++++----
 src/Text/Pandoc/Writers/RST.hs                      | 4 ++--
 src/Text/Pandoc/Writers/Texinfo.hs                  | 4 ++--
 16 files changed, 34 insertions(+), 29 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index b4483f756..a6df12715 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -31,7 +31,7 @@ import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder,
          defConfig, Indent(..), NumberFormat(..))
 import Data.Bifunctor (second)
 import Data.Char (toLower)
-import Data.List (intercalate, sort)
+import Data.List (intercalate, sort, foldl')
 #ifdef _WINDOWS
 #if MIN_VERSION_base(4,12,0)
 import Data.List (isPrefixOf)
@@ -93,7 +93,7 @@ parseOptionsFromArgs options' defaults prg rawArgs = do
         ("Try " ++ prg ++ " --help for more information.")
 
   -- thread option data structure through all supplied option actions
-  opts <- foldl (>>=) (return defaults) actions
+  opts <- foldl' (>>=) (return defaults) actions
   let mbArgs = case args of
                  [] -> Nothing
                  xs -> Just xs
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index dba762c02..44416ca12 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -7,6 +7,7 @@ where
 import Citeproc.Types
 import Data.Text (Text)
 import qualified Data.Text as T
+import Data.List (foldl')
 import Text.Parsec
 import Text.Pandoc.Definition
 import Text.Pandoc.Parsing (romanNumeral)
@@ -139,7 +140,7 @@ pBalancedBraces braces p = try $ do
   where
       except = notFollowedBy pBraces >> p
       -- outer and inner
-      surround = foldl (\a (open, close) -> sur open close except <|> a)
+      surround = foldl' (\a (open, close) -> sur open close except <|> a)
                        except
                        braces
 
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 86c8de79e..293a822a0 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -59,6 +59,7 @@ import Control.Monad.Except (MonadError (catchError, throwError),
                              MonadTrans, lift, when)
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Data.Maybe (fromMaybe)
+import Data.List (foldl')
 import Data.Time (UTCTime)
 import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
                              posixSecondsToUTCTime)
@@ -612,7 +613,7 @@ checkExistence fn = do
 -- | Canonicalizes a file path by removing redundant @.@ and @..@.
 makeCanonical :: FilePath -> FilePath
 makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
- where  transformPathParts = reverse . foldl go []
+ where  transformPathParts = reverse . foldl' go []
         go as     "."  = as
         go (_:as) ".." = as
         go as     x    = x : as
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 3b96f9e04..266a09e3c 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..)
 where
 import Data.Bits (clearBit, setBit, testBit, (.|.))
 import Data.Data (Data)
+import Data.List (foldl')
 import qualified Data.Text as T
 import Data.Typeable (Typeable)
 import GHC.Generics (Generic)
@@ -593,7 +594,7 @@ parseFormatSpec :: T.Text
 parseFormatSpec = parse formatSpec ""
   where formatSpec = do
           name <- formatName
-          (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$>
+          (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$>
                                              many extMod
           return (T.pack name, reverse extsToEnable, reverse extsToDisable)
         formatName = many1 $ noneOf "-+"
diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index bffe01a34..90967f295 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -22,6 +22,7 @@ import Control.Monad.Catch (finally, try)
 import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf,
                   showConstr, toConstr, tyconUQname)
 import Data.Foldable (foldrM)
+import Data.List (foldl')
 import Data.Map (Map)
 import Data.Maybe (fromMaybe)
 import Foreign.Lua (Lua, Peekable, Pushable, StackIndex)
@@ -204,7 +205,7 @@ walkMeta lf (Pandoc m bs) = do
 
 walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc
 walkPandoc (LuaFilter fnMap) =
-  case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
+  case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of
     Just fn -> \x -> runFilterFunction fn x *> singleElement x
     Nothing -> return
 
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index bcf26c4a3..7c6d01769 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -182,7 +182,7 @@ isAttrModifier _                  = False
 
 smushInlines :: [Inlines] -> Inlines
 smushInlines xs = combineInlines xs' mempty
-  where xs' = foldl combineInlines mempty xs
+  where xs' = foldl' combineInlines mempty xs
 
 smushBlocks :: [Blocks] -> Blocks
-smushBlocks xs = foldl combineBlocks mempty xs
+smushBlocks xs = foldl' combineBlocks mempty xs
diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index b7bd40fee..67aba1cb1 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -23,6 +23,7 @@ where
 
 import Data.Set (Set, fromList, unions)
 import Data.Text (Text)
+import Data.List (foldl')
 
 eitherBlockOrInline :: Set Text
 eitherBlockOrInline = fromList
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34edbcc17..dc94fc2d6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -21,7 +21,7 @@ module Text.Pandoc.Readers.Markdown (
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
-import Data.List (transpose, elemIndex, sortOn)
+import Data.List (transpose, elemIndex, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
@@ -357,7 +357,7 @@ referenceKey = try $ do
   addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
                           >> many (try $ spnl >> keyValAttr)
   blanklines
-  let attr'  = extractIdClass $ foldl (\x f -> f x) attr addKvs
+  let attr'  = extractIdClass $ foldl' (\x f -> f x) attr addKvs
       target = (escapeURI $ trimr src, tit)
   st <- getState
   let oldkeys = stateKeys st
@@ -613,7 +613,7 @@ attributes = try $ do
   spnl
   attrs <- many (attribute <* spnl)
   char '}'
-  return $ foldl (\x f -> f x) nullAttr attrs
+  return $ foldl' (\x f -> f x) nullAttr attrs
 
 attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
 attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
index 93c6b5e79..96515bf56 100644
--- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
+++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs
@@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where
 import Control.Arrow
 import qualified Control.Category as Cat
 import Control.Monad
-
+import Data.List (foldl')
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 import Text.Pandoc.Readers.Odt.Generic.Fallible
 
@@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f
 iterateSL :: (Foldable f, MonadPlus m)
           => ArrowState s    x     y
           -> ArrowState s (f x) (m y)
-iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f
+iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f
   where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x)
 
 
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 0d921e23b..341903046 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -64,12 +64,12 @@ import qualified Data.Map             as M
 import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
+import           Data.List (foldl')
 
 import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
-
 import           Text.Pandoc.Readers.Odt.Generic.Namespaces
 import           Text.Pandoc.Readers.Odt.Generic.Utils
 import           Text.Pandoc.Readers.Odt.Generic.Fallible
@@ -293,7 +293,7 @@ readNSattributes         = fromState $ \state -> maybe (state, failEmpty     )
                          => XMLConverterState nsID extraState
                          -> Maybe (XMLConverterState nsID extraState)
     extractNSAttrs startState
-                         = foldl (\state d -> state >>= addNS d)
+                         = foldl' (\state d -> state >>= addNS d)
                                  (Just startState)
                                  nsAttribs
       where nsAttribs    = mapMaybe readNSattr (XML.elAttribs element)
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 5e10f896c..b722aa07d 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -44,7 +44,7 @@ import Control.Arrow
 
 import Data.Default
 import qualified Data.Foldable as F
-import Data.List (unfoldr)
+import Data.List (unfoldr, foldl')
 import qualified Data.Map as M
 import Data.Maybe
 import Data.Text (Text)
@@ -120,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" (
                               &&&
                               lookupDefaultingAttr NsStyle "font-pitch"
                             ))
-                    >>?^ ( M.fromList . foldl accumLegalPitches [] )
+                    >>?^ ( M.fromList . foldl' accumLegalPitches [] )
                   ) `ifFailedDo` returnV (Right M.empty)
   where accumLegalPitches ls (Nothing,_) = ls
         accumLegalPitches ls (Just n,p)  = (n,p):ls
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 860da2dc3..99238c7f0 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -38,7 +38,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
 import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
-import Data.List (intersperse, transpose)
+import Data.List (intersperse, transpose, foldl')
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -627,7 +627,7 @@ code2 = do
 
 -- | Html / CSS attributes
 attributes :: PandocMonad m => ParserT Text ParserState m Attr
-attributes = foldl (flip ($)) ("",[],[]) <$>
+attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d11ad13f5..0ce9396b3 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -109,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor
 import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum,
                   generalCategory, GeneralCategory(NonSpacingMark,
                   SpacingCombiningMark, EnclosingMark, ConnectorPunctuation))
-import Data.List (find, intercalate, intersperse, sortOn)
+import Data.List (find, intercalate, intersperse, sortOn, foldl')
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Monoid (Any (..))
@@ -840,7 +840,7 @@ mapLeft = Bifunctor.first
 -- > collapseFilePath "parent/foo/.." ==  "parent"
 -- > collapseFilePath "/parent/foo/../../bar" ==  "/bar"
 collapseFilePath :: FilePath -> FilePath
-collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
+collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories
   where
     go rs "." = rs
     go r@(p:rs) ".." = case p of
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 2f33cd467..332de1545 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -30,7 +30,7 @@ module Text.Pandoc.Writers.HTML (
   ) where
 import Control.Monad.State.Strict
 import Data.Char (ord)
-import Data.List (intercalate, intersperse, partition, delete, (\\))
+import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
 import Data.List.NonEmpty (NonEmpty((:|)))
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import qualified Data.Set as Set
@@ -544,7 +544,7 @@ tagWithAttributes opts html5 selfClosing tagname attr =
 
 addAttrs :: PandocMonad m
          => WriterOptions -> Attr -> Html -> StateT WriterState m Html
-addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr
+addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr
 
 toAttrs :: PandocMonad m
         => [(Text, Text)] -> StateT WriterState m [Attribute]
@@ -926,7 +926,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
                                    numstyle']
                    else [])
   l <- ordList opts contents
-  return $ foldl (!) l attribs
+  return $ foldl' (!) l attribs
 blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
                   do term' <- liftM H.dt $ inlineListToHtml opts term
@@ -1407,7 +1407,7 @@ inlineToHtml opts inline = do
                               Just "audio" -> mediaTag H5.audio "Audio"
                               Just _       -> (H5.embed, [])
                               _            -> imageTag
-                        return $ foldl (!) tag $ attributes ++ specAttrs
+                        return $ foldl' (!) tag $ attributes ++ specAttrs
                         -- note:  null title included, as in Markdown.pl
     (Note contents) -> do
                         notes <- gets stNotes
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d01e13db4..54d042332 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -16,7 +16,7 @@ reStructuredText:  <http://docutils.sourceforge.net/rst.html>
 module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
-import Data.List (transpose, intersperse)
+import Data.List (transpose, intersperse, foldl')
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -509,7 +509,7 @@ flatten outer
   | null contents = [outer]
   | otherwise     = combineAll contents
   where contents = dropInlineParent outer
-        combineAll = foldl combine []
+        combineAll = foldl' combine []
 
         combine :: [Inline] -> Inline -> [Inline]
         combine f i =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 53da70f84..9d695563f 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
 import Data.Char (chr, ord)
-import Data.List (maximumBy, transpose)
+import Data.List (maximumBy, transpose, foldl')
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -271,7 +271,7 @@ tableAnyRowToTexinfo :: PandocMonad m
                      -> [[Block]]
                      -> TI m (Doc Text)
 tableAnyRowToTexinfo itemtype aligns cols =
-  (literal itemtype $$) . foldl (\row item -> row $$
+  (literal itemtype $$) . foldl' (\row item -> row $$
   (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols
 
 alignedBlock :: PandocMonad m
-- 
cgit v1.2.3


From 805d12ac9cc91913cbb6275e8ef94f49985df5e1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 15 Mar 2021 14:21:52 -0700
Subject: Remove an unneeded import

---
 src/Text/Pandoc/Readers/HTML/TagCategories.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
index 67aba1cb1..b7bd40fee 100644
--- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs
+++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs
@@ -23,7 +23,6 @@ where
 
 import Data.Set (Set, fromList, unions)
 import Data.Text (Text)
-import Data.List (foldl')
 
 eitherBlockOrInline :: Set Text
 eitherBlockOrInline = fromList
-- 
cgit v1.2.3


From 87538966a029358d42bc71ab0cd4c1ed9e125520 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 16 Mar 2021 13:05:29 -0700
Subject: Removed unused LANGUAGE pragmas.

---
 src/Text/Pandoc/Writers/LaTeX/Caption.hs | 1 -
 src/Text/Pandoc/XML/Light/Proc.hs        | 1 -
 2 files changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
index 7b9ce186f..ab4d365cc 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Writers.LaTeX.Caption
    Copyright   : Copyright (C) 2006-2021 John MacFarlane
diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs
index b53c4b545..a1fb200ff 100644
--- a/src/Text/Pandoc/XML/Light/Proc.hs
+++ b/src/Text/Pandoc/XML/Light/Proc.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.XML.Light.Proc
    Copyright   : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane
-- 
cgit v1.2.3


From 7bf4be04b0c04ccfd02a2a907071b8747380904f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Mar 2021 09:10:44 -0700
Subject: Fix regression with `tex_math_backslash` in Markdown reader.

Added regression test.  Closes #7155.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index dc94fc2d6..c836a896b 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1503,7 +1503,7 @@ inline = do
      '$'     -> math
      '~'     -> strikeout <|> subscript
      '<'     -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign
-     '\\'    -> escapedNewline <|> escapedChar <|> rawLaTeXInline'
+     '\\'    -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline'
      '@'     -> cite <|> exampleRef
      '"'     -> smart
      '\''    -> smart
-- 
cgit v1.2.3


From c3f9e8c12256d19ed6c89d15470945855ee16a94 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Mar 2021 22:31:20 -0700
Subject: Docx writer: make nsid in abstractNum deterministic.

Previously we assigned a random number (though in a deterministic
way).  But changes in the random package mean we get different
results now on different architectures, even with the same random
seed. We don't need random values; so now we just assign a value
based on the list number id, which is guaranteed to be unique
to the list marker.
---
 src/Text/Pandoc/Writers/Docx.hs                     |  10 ++++------
 test/docx/golden/block_quotes.docx                  | Bin 10071 -> 10067 bytes
 test/docx/golden/codeblock.docx                     | Bin 9920 -> 9916 bytes
 test/docx/golden/comments.docx                      | Bin 10258 -> 10254 bytes
 test/docx/golden/custom_style_no_reference.docx     | Bin 10021 -> 10017 bytes
 test/docx/golden/custom_style_preserve.docx         | Bin 10650 -> 10646 bytes
 test/docx/golden/custom_style_reference.docx        | Bin 12403 -> 12400 bytes
 test/docx/golden/definition_list.docx               | Bin 9920 -> 9916 bytes
 .../docx/golden/document-properties-short-desc.docx | Bin 9925 -> 9922 bytes
 test/docx/golden/document-properties.docx           | Bin 10404 -> 10399 bytes
 test/docx/golden/headers.docx                       | Bin 10059 -> 10055 bytes
 test/docx/golden/image.docx                         | Bin 26736 -> 26733 bytes
 test/docx/golden/inline_code.docx                   | Bin 9859 -> 9855 bytes
 test/docx/golden/inline_formatting.docx             | Bin 10038 -> 10035 bytes
 test/docx/golden/inline_images.docx                 | Bin 26793 -> 26789 bytes
 test/docx/golden/link_in_notes.docx                 | Bin 10081 -> 10077 bytes
 test/docx/golden/links.docx                         | Bin 10251 -> 10248 bytes
 test/docx/golden/lists.docx                         | Bin 10332 -> 10314 bytes
 test/docx/golden/lists_continuing.docx              | Bin 10123 -> 10110 bytes
 test/docx/golden/lists_multiple_initial.docx        | Bin 10210 -> 10192 bytes
 test/docx/golden/lists_restarting.docx              | Bin 10122 -> 10108 bytes
 test/docx/golden/nested_anchors_in_header.docx      | Bin 10216 -> 10212 bytes
 test/docx/golden/notes.docx                         | Bin 10028 -> 10024 bytes
 test/docx/golden/raw-blocks.docx                    | Bin 9960 -> 9956 bytes
 test/docx/golden/raw-bookmarks.docx                 | Bin 10094 -> 10090 bytes
 test/docx/golden/table_one_row.docx                 | Bin 9906 -> 9903 bytes
 test/docx/golden/table_with_list_cell.docx          | Bin 10225 -> 10212 bytes
 test/docx/golden/tables.docx                        | Bin 10241 -> 10238 bytes
 test/docx/golden/track_changes_deletion.docx        | Bin 9903 -> 9899 bytes
 test/docx/golden/track_changes_insertion.docx       | Bin 9886 -> 9882 bytes
 test/docx/golden/track_changes_move.docx            | Bin 9920 -> 9916 bytes
 .../golden/track_changes_scrubbed_metadata.docx     | Bin 10032 -> 10028 bytes
 test/docx/golden/unicode.docx                       | Bin 9845 -> 9841 bytes
 test/docx/golden/verbatim_subsuper.docx             | Bin 9892 -> 9888 bytes
 34 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index c47bfb2ea..20bcd0324 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,6 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import System.Random (randomRs, mkStdGen)
 import Text.Pandoc.BCP47 (getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
@@ -695,8 +694,7 @@ baseListId = 1000
 mkNumbering :: [ListMarker] -> [Element]
 mkNumbering lists =
   elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
-    where elts = zipWith mkAbstractNum (ordNub lists) $
-                     randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848
+    where elts = map mkAbstractNum (ordNub lists)
 
 maxListLevel :: Int
 maxListLevel = 8
@@ -713,10 +711,10 @@ mkNum marker numid =
               $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
-mkAbstractNum :: ListMarker -> Integer -> Element
-mkAbstractNum marker nsid =
+mkAbstractNum :: ListMarker -> Element
+mkAbstractNum marker =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index ed7d1165c..af1b33ca2 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 07ae75676..f748f1f01 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index e5f034378..ac9d56680 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 174942135..f27727edd 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index b5c31a851..1da499d6a 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index c42ca1b05..4d2fe245d 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 1cb4c1fd7..f386fcea3 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 7122456ea..debe9a3f6 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 616ba0f81..cd17400bf 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index c30dcdee9..d3af8a3dd 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 8a704b41e..1c4e738c0 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index b1906c8c4..35f43f19f 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 8adf1cf75..8de3f70f6 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 584117503..e76558be9 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 8859fe55c..88bae8142 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index b80f3b3ba..455adcfc7 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 35beed68a..081d9ddba 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 2c29fd674..fc9213fc5 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 10a948886..b636fd3f8 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 5b90e74a0..252623215 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index cc81b46d1..a8c3f5478 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 1394dc442..43e650ebd 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 0d1688694..fe4f7845b 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index be1caef2d..45e90608f 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 1178f7c6e..6eaea2ac2 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 84be2720f..45a97ccaa 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 140366d8b..115a16a48 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 9cc7a075f..247725aaa 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f8b8dcfde..3863afef2 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 1c3baf0bf..5c848b63a 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 28686970d..e0c843713 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 7051cefbd..78a773bdd 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 9df631640..c66a45b74 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From fd76e605cde2639d6bb1b6891dd4b097a6eded01 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:02:05 -0700
Subject: T.P.Readers.Odt.StyleReader: rewrite foldr1 use as foldr.

This avoids a partial function.
---
 src/Text/Pandoc/Readers/Odt/StyleReader.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index b722aa07d..ca791ad1e 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -563,12 +563,13 @@ readListLevelStyle levelType =      readAttr NsText "level"
 
 --
 chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
-chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
-                                    | otherwise    = Just ( F.foldr1 select ls )
+chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls
   where
-   select ( ListLevelStyle       t1            p1          s1          f1          b1 )
-          ( ListLevelStyle       t2            p2          s2          f2          _ )
-        =   ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
+   select l Nothing = Just l
+   select ( ListLevelStyle t1 p1 s1 f1 b1 )
+          ( Just ( ListLevelStyle t2 p2 s2 f2 _ ))
+        =   Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2)
+                                  (selectLinf f1 f2) b1
    select' LltNumbered _           = LltNumbered
    select' _           LltNumbered = LltNumbered
    select' _           _           = LltBullet
-- 
cgit v1.2.3


From 67e173bda116ba9793dd625f782a12ab8e35839b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:10:22 -0700
Subject: Remove another foldr1 partial function use.

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 08083b177..0ce8e286f 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -261,7 +261,7 @@ table = try $ do
   rows <- many1 (many commentLine *> tableRow)
   let columns = transpose rows
   let ncolumns = length columns
-  let aligns = map (foldr1 findAlign . map fst) columns
+  let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
   let size = maximum (map length rows')
   let rowsPadded = map (pad size) rows'
@@ -278,10 +278,11 @@ pad :: (Monoid a) => Int -> [a] -> [a]
 pad n xs = xs ++ replicate (n - length xs) mempty
 
 
-findAlign :: Alignment -> Alignment -> Alignment
-findAlign x y
-  | x == y = x
-  | otherwise = AlignDefault
+findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment
+findAlign (x,_) (Just y)
+  | x == y = Just x
+  | otherwise = Just AlignDefault
+findAlign (x,_) Nothing = Just x
 
 headerRow :: T2T [(Alignment, Blocks)]
 headerRow = genericRow (string "||")
-- 
cgit v1.2.3


From 1da62083156cf493771274164be67a2bead196dc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:30:59 -0700
Subject: Rewrite a foldl1 as a foldl'.

---
 src/Text/Pandoc/Readers/HTML.hs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b73c138ab..c3e68afd8 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -32,6 +32,7 @@ import Data.Char (isAlphaNum, isLetter)
 import Data.Default (Default (..), def)
 import Data.Foldable (for_)
 import Data.List.Split (splitWhen)
+import Data.List (foldl')
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Monoid (First (..))
@@ -360,7 +361,10 @@ pDefListItem = try $ do
   terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
   defs  <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
   skipMany nonItem
-  let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms
+  let term = foldl' (\x y -> if null x
+                                then trimInlines y
+                                else x <> B.linebreak <> trimInlines y)
+                    mempty terms
   return (term, map (fixPlains True) defs)
 
 fixPlains :: Bool -> Blocks -> Blocks
-- 
cgit v1.2.3


From f0e4b9cc3cde01d64e25d331c5b4f3d62d2129b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 21:37:56 -0700
Subject: Require safe >= 0.3.18 and remove cpp.

---
 pandoc.cabal                     | 2 +-
 src/Text/Pandoc/Readers/LaTeX.hs | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 5936d113f..e46518e96 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -477,7 +477,7 @@ library
                  parsec                >= 3.1      && < 3.2,
                  process               >= 1.2.3    && < 1.7,
                  random                >= 1        && < 1.3,
-                 safe                  >= 0.3      && < 0.4,
+                 safe                  >= 0.3.18   && < 0.4,
                  scientific            >= 0.3      && < 0.4,
                  skylighting           >= 0.10.4.1 && < 0.11,
                  skylighting-core      >= 0.10.4.1 && < 0.11,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index ceac261d2..21a563bed 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE CPP                   #-}
 {-# LANGUAGE OverloadedStrings     #-}
 {-# LANGUAGE PatternGuards         #-}
 {-# LANGUAGE ScopedTypeVariables   #-}
@@ -97,11 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-#if MIN_VERSION_safe(0,3,18)
   let bottomLevel = minimumBound 1 $ query headerLevel doc'
-#else
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
-#endif
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
-- 
cgit v1.2.3


From 3428248debb14066e7fb1ef216927d3d62e3a43a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 18 Mar 2021 23:01:12 -0700
Subject: Use minimumDef instead of minimum (partial function).

---
 src/Text/Pandoc/Readers/LaTeX.hs             | 2 +-
 src/Text/Pandoc/Readers/Org/Blocks.hs        | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs           | 4 +++-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 7 ++-----
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 21a563bed..32f7fc5af 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -96,7 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-  let bottomLevel = minimumBound 1 $ query headerLevel doc'
+  let bottomLevel = minimumDef 1 $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index d1aff701e..c6f27118b 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,6 +39,7 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
+import Safe (minimumDef)
 
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
@@ -542,8 +543,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         -- CAVE: partial function in else
-                         curMin = if null levels then 0 else minimum levels
+                         curMin = minimumDef 0 levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 99238c7f0..50ffd36ec 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Safe (minimumDef)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -376,7 +377,8 @@ table = try $ do
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
   let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+  let aligns = map (minimumDef AlignDefault) $
+                transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
   return $ B.table (B.simpleCaption $ B.plain caption)
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 5caeb0753..b26a7ff3e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -50,6 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
 import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
+import Safe (minimumDef)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
 type EMU = Integer
@@ -1427,11 +1428,7 @@ presentationToRels pres@(Presentation _ slides) = do
   -- all relWithoutSlide rels (unless they're 1)
   -- 3. If we have a notesmaster slide, we make space for that as well.
 
-  let minRelNotOne = case filter (1<) $ map relId relsWeKeep of
-        [] -> 0                 -- doesn't matter in this case, since
-                                -- there will be nothing to map the
-                                -- function over
-        l  -> minimum l
+  let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep
 
       modifyRelNum :: Int -> Int
       modifyRelNum 1 = 1
-- 
cgit v1.2.3


From a31731b8e2825c5bc8d7fcc9a61ce92b9d28d040 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 10:11:08 -0700
Subject: Docx reader: Don't reimplement NonEmpty.

---
 src/Text/Pandoc/Readers/Docx.hs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 00de6a0cd..22dd54193 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -85,6 +85,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
+import Data.List.NonEmpty (nonEmpty)
 
 readDocx :: PandocMonad m
          => ReaderOptions
@@ -648,11 +649,6 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
   cells <- mapM rowToBlocksList rows
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
-      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
-      -- our own, see
-      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
-      nonEmpty [] = Nothing
-      nonEmpty l  = Just l
       rowLength :: Docx.Row -> Int
       rowLength (Docx.Row c) = length c
 
-- 
cgit v1.2.3


From 8d5116381b20442bb3fa58dac1ef7d44db618823 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 10:30:32 -0700
Subject: Use NonEmpty instead of minimumDef.

---
 src/Text/Pandoc/Readers/LaTeX.hs             | 4 ++--
 src/Text/Pandoc/Readers/Org/Blocks.hs        | 4 ++--
 src/Text/Pandoc/Readers/Textile.hs           | 4 ++--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 5 +++--
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 32f7fc5af..851756065 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -68,7 +68,7 @@ import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands,
                                          listingsLanguage)
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
-import Safe
+import Data.List.NonEmpty (nonEmpty)
 
 -- for debugging:
 -- import Text.Pandoc.Extensions (getDefaultExtensions)
@@ -96,7 +96,7 @@ parseLaTeX = do
   let doc' = doc bs
   let headerLevel (Header n _ _) = [n]
       headerLevel _              = []
-  let bottomLevel = minimumDef 1 $ query headerLevel doc'
+  let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc'
   let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils
       adjustHeaders _ x                   = x
   let (Pandoc _ bs') =
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index c6f27118b..883434cdc 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,7 +39,7 @@ import Data.Functor (($>))
 import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
@@ -543,7 +543,7 @@ include = try $ do
     in case (minlvl >>= safeRead :: Maybe Int) of
          Nothing -> blks
          Just lvl -> let levels = Walk.query headerLevel blks
-                         curMin = minimumDef 0 levels
+                         curMin = maybe 0 minimum $ nonEmpty levels
                      in Walk.walk (shiftHeader (curMin - lvl)) blks
 
   headerLevel :: Block -> [Int]
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 50ffd36ec..e26b902f1 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,7 +53,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -377,7 +377,7 @@ table = try $ do
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
   let nbOfCols = maximum $ map length (headers:rows)
-  let aligns = map (minimumDef AlignDefault) $
+  let aligns = map (maybe AlignDefault minimum . nonEmpty) $
                 transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index b26a7ff3e..4dbf32c4e 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -50,7 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
 import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
-import Safe (minimumDef)
+import Data.List.NonEmpty (nonEmpty)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
 type EMU = Integer
@@ -1428,7 +1428,8 @@ presentationToRels pres@(Presentation _ slides) = do
   -- all relWithoutSlide rels (unless they're 1)
   -- 3. If we have a notesmaster slide, we make space for that as well.
 
-  let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep
+  let minRelNotOne = maybe 0 minimum $ nonEmpty
+                                     $ filter (1 <) $ map relId relsWeKeep
 
       modifyRelNum :: Int -> Int
       modifyRelNum 1 = 1
-- 
cgit v1.2.3


From 4002c35a9184ecc1c9a6553e9ee28e283cb1fd0a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 11:55:59 -0700
Subject: Protect partial uses of maximum with NonEmpty.

---
 src/Text/Pandoc/Readers/DocBook.hs                 |  6 ++--
 src/Text/Pandoc/Readers/HTML/Table.hs              |  3 +-
 src/Text/Pandoc/Readers/Haddock.hs                 |  3 +-
 src/Text/Pandoc/Readers/Markdown.hs                |  3 +-
 src/Text/Pandoc/Readers/Textile.hs                 |  4 +--
 src/Text/Pandoc/Readers/Txt2Tags.hs                | 22 +++++++-----
 src/Text/Pandoc/Writers/AsciiDoc.hs                |  3 +-
 src/Text/Pandoc/Writers/ConTeXt.hs                 |  6 ++--
 src/Text/Pandoc/Writers/DokuWiki.hs                |  4 ++-
 src/Text/Pandoc/Writers/Man.hs                     |  4 +--
 src/Text/Pandoc/Writers/Markdown.hs                | 37 ++++++++++---------
 src/Text/Pandoc/Writers/Markdown/Inline.hs         |  9 ++---
 src/Text/Pandoc/Writers/Ms.hs                      |  4 +--
 src/Text/Pandoc/Writers/Muse.hs                    |  9 +++--
 src/Text/Pandoc/Writers/Org.hs                     |  5 +--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 42 +++++++++++-----------
 src/Text/Pandoc/Writers/Powerpoint/Presentation.hs |  5 ++-
 src/Text/Pandoc/Writers/RST.hs                     |  6 ++--
 src/Text/Pandoc/Writers/Shared.hs                  |  6 ++--
 src/Text/Pandoc/Writers/Texinfo.hs                 |  9 +++--
 src/Text/Pandoc/Writers/ZimWiki.hs                 |  4 ++-
 21 files changed, 108 insertions(+), 86 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d38b07864..6f5bb0ad4 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -18,6 +18,7 @@ import Data.Either (rights)
 import Data.Foldable (asum)
 import Data.Generics
 import Data.List (intersperse,elemIndex)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -949,9 +950,8 @@ parseBlock (Elem e) =
                                                      (x >= '0' && x <= '9')
                                                       || x == '.') w
                             if n > 0 then Just n else Nothing
-                      let numrows = case bodyrows of
-                                         [] -> 0
-                                         xs -> maximum $ map length xs
+                      let numrows = maybe 0 maximum $ nonEmpty
+                                                    $ map length bodyrows
                       let aligns = case colspecs of
                                      [] -> replicate numrows AlignDefault
                                      cs -> map toAlignment cs
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6179ea8e7..ad0b51253 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks)
@@ -216,7 +217,7 @@ normalize widths head' bodies foot = do
   let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
   let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
   let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
-  let ncols = maximum (map rowLength rows)
+  let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
   let tblType = tableType (map rowCells rows)
   -- fail on empty table
   if null rows
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 25d69f040..48454e353 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.Haddock
 
 import Control.Monad.Except (throwError)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text, unpack)
 import qualified Data.Text as T
@@ -92,7 +93,7 @@ docHToBlocks d' =
                   then ([], map toCells bodyRows)
                   else (toCells (head headerRows),
                         map toCells (tail headerRows ++ bodyRows))
-             colspecs = replicate (maximum (map length body))
+             colspecs = replicate (maybe 0 maximum (nonEmpty (map length body)))
                              (AlignDefault, ColWidthDefault)
          in  B.table B.emptyCaption
                      colspecs
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index c836a896b..a86286b3a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -22,6 +22,7 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace)
 import Data.List (transpose, elemIndex, sortOn, foldl')
+import Data.List.NonEmpty (NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe
 import qualified Data.Set as Set
@@ -1364,7 +1365,7 @@ pipeTable = try $ do
   lines' <- many pipeTableRow
   let lines'' = map (take (length aligns) <$>) lines'
   let maxlength = maximum $
-       map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
+       fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'')
   numColumns <- getOption readerColumns
   let widths = if maxlength > numColumns
                   then map (\len ->
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index e26b902f1..8d7900de4 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -39,6 +39,7 @@ import Control.Monad (guard, liftM)
 import Control.Monad.Except (throwError)
 import Data.Char (digitToInt, isUpper)
 import Data.List (intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -53,7 +54,6 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared (crFilter, trim, tshow)
-import Data.List.NonEmpty (nonEmpty)
 
 -- | Parse a Textile text and return a Pandoc document.
 readTextile :: PandocMonad m
@@ -376,7 +376,7 @@ table = try $ do
                              (toprow:rest) | any (fst . fst) toprow ->
                                 (toprow, rest)
                              _ -> (mempty, rawrows)
-  let nbOfCols = maximum $ map length (headers:rows)
+  let nbOfCols = maximum $ fmap length (headers :| rows)
   let aligns = map (maybe AlignDefault minimum . nonEmpty) $
                 transpose $ map (map (snd . fst)) (headers:rows)
   let toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 0ce8e286f..f27a3fc2c 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
 {- |
    Module      : Text.Pandoc.Readers.Txt2Tags
    Copyright   : Copyright (C) 2014 Matthew Pickering
@@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader (Reader, asks, runReader)
 import Data.Default
 import Data.List (intercalate, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -53,14 +55,16 @@ getT2TMeta = do
     inps <- P.getInputFiles
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
-    let getModTime = fmap (formatTime defaultTimeLocale "%T") .
-                       P.getModificationTime
-    curMtime <- case inps of
-                  [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime
-                  _ -> catchError
-                        (maximum <$> mapM getModTime inps)
-                        (const (return ""))
-    return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
+    curMtime <- catchError
+                 ((nonEmpty <$> mapM P.getModificationTime inps) >>=
+                    \case
+                       Nothing ->
+                         formatTime defaultTimeLocale "%T" <$> P.getZonedTime
+                       Just ts -> return $
+                         formatTime defaultTimeLocale "%T" $ maximum ts)
+                (const (return ""))
+    return $ T2TMeta (T.pack curDate) (T.pack curMtime)
+                     (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
 readTxt2Tags :: PandocMonad m
@@ -263,7 +267,7 @@ table = try $ do
   let ncolumns = length columns
   let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns
   let rows' = map (map snd) rows
-  let size = maximum (map length rows')
+  let size = maybe 0 maximum $ nonEmpty $ map length rows'
   let rowsPadded = map (pad size) rows'
   let headerPadded = if null tableHeader then mempty else pad size tableHeader
   let toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index b4ef7c8b9..69e608ef9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -22,6 +22,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where
 import Control.Monad.State.Strict
 import Data.Char (isPunctuation, isSpace)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (NonEmpty(..))
 import Data.Maybe (fromMaybe, isJust)
 import qualified Data.Set as Set
 import qualified Data.Text as T
@@ -274,7 +275,7 @@ blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do
   let colwidth = if writerWrapText opts == WrapAuto
                     then writerColumns opts
                     else 100000
-  let maxwidth = maximum $ map offset (head':rows')
+  let maxwidth = maximum $ fmap offset (head' :| rows')
   let body = if maxwidth > colwidth then vsep rows' else vcat rows'
   let border = separator <> text "==="
   return $ 
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 4d44842e2..1c56388ed 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
 import Control.Monad.State.Strict
 import Data.Char (ord, isDigit)
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -228,8 +229,9 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                         Period       -> "stopper=."
                         OneParen     -> "stopper=)"
                         TwoParens    -> "left=(,stopper=)"
-    let width = maximum $ map T.length $ take (length contents)
-                          (orderedListMarkers (start, style', delim))
+    let width = maybe 0 maximum $ nonEmpty $ map T.length $
+                  take (length contents)
+                       (orderedListMarkers (start, style', delim))
     let width' = (toEnum width + 1) / 2
     let width'' = if width' > (1.5 :: Double)
                      then "width=" <> tshow width' <> "em"
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 7df47c912..602c70ebe 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -27,6 +27,7 @@ import Control.Monad.Reader (ReaderT, asks, local, runReaderT)
 import Control.Monad.State.Strict (StateT, evalStateT)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -172,7 +173,8 @@ blockToDokuWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then return []
                  else zipWithM (tableItemToDokuWiki opts) aligns headers
   rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length)
+                   $ transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index edb70f53e..87b2d8d21 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -16,6 +16,7 @@ Conversion of 'Pandoc' documents to roff man page format.
 module Text.Pandoc.Writers.Man ( writeMan ) where
 import Control.Monad.State.Strict
 import Data.List (intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -175,8 +176,7 @@ blockToMan opts (BulletList items) = do
   return (vcat contents)
 blockToMan opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 1 +
-                     maximum (map T.length markers)
+  let indent = 1 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
               zip markers items
   return (vcat contents)
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 533bcc071..4d9f3d5b0 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -24,6 +24,7 @@ import Control.Monad.Reader
 import Control.Monad.State.Strict
 import Data.Default
 import Data.List (intersperse, sortOn, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, mapMaybe)
 import qualified Data.Set as Set
@@ -492,19 +493,20 @@ blockToMarkdown' opts (CodeBlock attribs str) = do
            | isEnabled Ext_fenced_code_blocks opts ->
           tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline
      _ -> nest (writerTabStop opts) (literal str) <> blankline
-   where endline c = literal $ case [T.length ln
-                                   | ln <- map trim (T.lines str)
-                                   , T.pack [c,c,c] `T.isPrefixOf` ln
-                                   , T.all (== c) ln] of
-                               [] -> T.replicate 3 $ T.singleton c
-                               xs -> T.replicate (maximum xs + 1) $ T.singleton c
-         backticks = endline '`'
-         tildes = endline '~'
-         attrs  = if isEnabled Ext_fenced_code_attributes opts
-                     then nowrap $ " " <> attrsToMarkdown attribs
-                     else case attribs of
-                                (_,cls:_,_) -> " " <> literal cls
-                                _             -> empty
+   where
+     endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $
+                        [T.length ln
+                         | ln <- map trim (T.lines str)
+                         , T.pack [c,c,c] `T.isPrefixOf` ln
+                         , T.all (== c) ln]
+     endline c = literal $ T.replicate (endlineLen c) $ T.singleton c
+     backticks = endline '`'
+     tildes = endline '~'
+     attrs  = if isEnabled Ext_fenced_code_attributes opts
+                 then nowrap $ " " <> attrsToMarkdown attribs
+                 else case attribs of
+                            (_,cls:_,_) -> " " <> literal cls
+                            _             -> empty
 blockToMarkdown' opts (BlockQuote blocks) = do
   variant <- asks envVariant
   -- if we're writing literate haskell, put a space before the bird tracks
@@ -517,7 +519,7 @@ blockToMarkdown' opts (BlockQuote blocks) = do
   return $ prefixed leader contents <> blankline
 blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do
   let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   caption' <- inlineListToMarkdown opts caption
   let caption''
@@ -619,7 +621,8 @@ pipeTable headless aligns rawHeaders rawRows = do
       blockFor AlignCenter x y = cblock (x + 2) (sp <> y <> sp) <> lblock 0 empty
       blockFor AlignRight  x y = rblock (x + 2) (y <> sp) <> lblock 0 empty
       blockFor _           x y = lblock (x + 2) (sp <> y) <> lblock 0 empty
-  let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows)
+  let widths = map (max 3 . maybe 3 maximum . nonEmpty . map offset) $
+                     transpose (rawHeaders : rawRows)
   let torow cs = nowrap $ literal "|" <>
                     hcat (intersperse (literal "|") $
                           zipWith3 blockFor aligns widths (map chomp cs))
@@ -653,11 +656,11 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break.
   -- The @+2@ is needed for specifying the alignment.
-  let numChars    = (+ 2) . maximum . map offset
+  let numChars    = (+ 2) . maybe 0 maximum . nonEmpty . map offset
   -- Number of characters per column necessary to output every cell
   -- without requiring a line break *inside a word*.
   -- The @+2@ is needed for specifying the alignment.
-  let minNumChars = (+ 2) . maximum . map minOffset
+  let minNumChars = (+ 2) . maybe 0 maximum . nonEmpty . map minOffset
   let columns = transpose (rawHeaders : rawRows)
   -- minimal column width without wrapping a single word
   let relWidth w col =
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index 19157701e..e35e1a0b9 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -17,6 +17,7 @@ import Control.Monad.Reader
 import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isDigit)
 import Data.List (find, intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
@@ -383,9 +384,7 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do
                    else "“" <> contents <> "”"
 inlineToMarkdown opts (Code attr str) = do
   let tickGroups = filter (T.any (== '`')) $ T.group str
-  let longest    = if null tickGroups
-                     then 0
-                     else maximum $ map T.length tickGroups
+  let longest    = maybe 0 maximum $ nonEmpty $ map T.length tickGroups
   let marker     = T.replicate (longest + 1) "`"
   let spacer     = if longest == 0 then "" else " "
   let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
@@ -438,9 +437,7 @@ inlineToMarkdown opts (Math DisplayMath str) =
             (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts)
 inlineToMarkdown opts il@(RawInline f str) = do
   let tickGroups = filter (T.any (== '`')) $ T.group str
-  let numticks   = if null tickGroups
-                     then 1
-                     else 1 + maximum (map T.length tickGroups)
+  let numticks   = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups))
   variant <- asks envVariant
   let Format fmt = f
   let rawAttribInline = return $
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 48395c420..0ed7a8a64 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -23,6 +23,7 @@ module Text.Pandoc.Writers.Ms ( writeMs ) where
 import Control.Monad.State.Strict
 import Data.Char (isLower, isUpper, ord)
 import Data.List (intercalate, intersperse)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Data.Maybe (catMaybes)
 import Data.Text (Text)
@@ -274,8 +275,7 @@ blockToMs opts (BulletList items) = do
   return (vcat contents)
 blockToMs opts (OrderedList attribs items) = do
   let markers = take (length items) $ orderedListMarkers attribs
-  let indent = 2 +
-                     maximum (map T.length markers)
+  let indent = 2 + maybe 0 maximum (nonEmpty (map T.length markers))
   contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $
               zip markers items
   setFirstPara
diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs
index bf3265107..d5100f43f 100644
--- a/src/Text/Pandoc/Writers/Muse.hs
+++ b/src/Text/Pandoc/Writers/Muse.hs
@@ -31,6 +31,7 @@ import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace)
 import Data.Default
 import Data.List (intersperse, transpose)
+import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Set as Set
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -158,7 +159,8 @@ simpleTable caption headers rows = do
   caption' <- inlineListToMuse caption
   headers' <- mapM blockListToMuse headers
   rows' <- mapM (mapM blockListToMuse) rows
-  let widthsInChars = maximum . map offset <$> transpose (headers' : rows')
+  let widthsInChars = maybe 0 maximum . nonEmpty . map offset <$>
+                       transpose (headers' : rows')
   let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks
         where sep' = lblock (T.length sep) $ literal sep
   let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars
@@ -238,7 +240,7 @@ blockToMuse (DefinitionList items) = do
           label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label
           let ind = offset' label' -- using Text.DocLayout.offset results in round trip failures
           hang ind (nowrap label') . vcat <$> mapM descriptionToMuse defs
-          where offset' d = maximum (0: map T.length
+          where offset' d = maximum (0 :| map T.length
                                          (T.lines $ render Nothing d))
         descriptionToMuse :: PandocMonad m
                           => [Block]
@@ -269,7 +271,8 @@ blockToMuse (Table _ blkCapt specs thead tbody tfoot) =
     (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
     blocksToDoc opts blocks =
       local (\env -> env { envOptions = opts }) $ blockListToMuse blocks
-    numcols = maximum (length aligns : length widths : map length (headers:rows))
+    numcols = maximum
+              (length aligns :| length widths : map length (headers:rows))
     isSimple = onlySimpleTableCells (headers : rows) && all (== 0) widths
 blockToMuse (Div _ bs) = flatBlockListToMuse bs
 blockToMuse Null = return empty
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 29d58a161..bb645eaf9 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Writers.Org (writeOrg) where
 import Control.Monad.State.Strict
 import Data.Char (isAlphaNum, isDigit)
 import Data.List (intersect, intersperse, partition, transpose)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
@@ -163,7 +164,7 @@ blockToOrg (Table _ blkCapt specs thead tbody tfoot) =  do
                    else "#+caption: " <> caption''
   headers' <- mapM blockListToOrg headers
   rawRows <- mapM (mapM blockListToOrg) rows
-  let numChars = maximum . map offset
+  let numChars = maybe 0 maximum . nonEmpty . map offset
   -- FIXME: width is not being used.
   let widthsInChars =
        map numChars $ transpose (headers' : rawRows)
@@ -198,7 +199,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do
                     x         -> x
   let markers = take (length items) $ orderedListMarkers
                                       (start, Decimal, delim')
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToOrg markers' items
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 4dbf32c4e..0e515b3c2 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -475,15 +475,16 @@ registerLink link = do
   linkReg <- gets stLinkIds
   mediaReg <- gets stMediaIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+        Just xs -> maximum xs
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+        Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+        Nothing
+          | hasSpeakerNotes -> 2
+          | otherwise       -> 1
       maxId = max maxLinkId maxMediaId
       slideLinks = case M.lookup curSlideId linkReg of
         Just mp -> M.insert (maxId + 1) link mp
@@ -498,20 +499,19 @@ registerMedia fp caption = do
   mediaReg <- gets stMediaIds
   globalIds <- gets stMediaGlobalIds
   hasSpeakerNotes <- curSlideHasSpeakerNotes
-  let maxLinkId = case M.lookup curSlideId linkReg of
-        Just mp -> case M.keys mp of
-          [] -> if hasSpeakerNotes then 2 else 1
-          ks -> maximum ks
-        Nothing -> if hasSpeakerNotes then 2 else 1
-      maxMediaId = case M.lookup curSlideId mediaReg of
-        Just [] -> if hasSpeakerNotes then 2 else 1
-        Just mInfos -> maximum $ map mInfoLocalId mInfos
-        Nothing -> if hasSpeakerNotes then 2 else 1
+  let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of
+          Just ks -> maximum ks
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
+      maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of
+          Just mInfos -> maximum $ fmap mInfoLocalId mInfos
+          Nothing
+            | hasSpeakerNotes -> 2
+            | otherwise       -> 1
       maxLocalId = max maxLinkId maxMediaId
 
-      maxGlobalId = case M.elems globalIds of
-        [] -> 0
-        ids -> maximum ids
+      maxGlobalId = maybe 0 maximum $ nonEmpty $ M.elems globalIds
 
   (imgBytes, mbMt) <- P.fetchItem $ T.pack fp
   let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x))
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
index affec38aa..9246a93e9 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs
@@ -45,6 +45,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.List (intercalate)
+import Data.List.NonEmpty (nonEmpty)
 import Data.Default
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -363,9 +364,7 @@ inlineToParElems (Note blks) = do
     then return []
     else do
     notes <- gets stNoteIds
-    let maxNoteId = case M.keys notes of
-          [] -> 0
-          lst -> maximum lst
+    let maxNoteId = maybe 0 maximum $ nonEmpty $ M.keys notes
         curNoteId = maxNoteId + 1
     modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes }
     local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 54d042332..0b9fc8331 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
 import Data.List (transpose, intersperse, foldl')
+import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -335,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do
                    then replicate (length items) "#."
                    else take (length items) $ orderedListMarkers
                                               (start, style', delim)
-  let maxMarkerLength = maximum $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum $ nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToRST markers' items
@@ -761,8 +762,7 @@ simpleTable opts blocksToDoc headers rows = do
                    then return []
                    else fixEmpties <$> mapM (blocksToDoc opts) headers
   rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows
-  let numChars [] = 0
-      numChars xs = maximum . map offset $ xs
+  let numChars = maybe 0 maximum . nonEmpty . map offset
   let colWidths = map numChars $ transpose (headerDocs : rowDocs)
   let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths
   let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths)
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fc3f8ff3a..91ecb310b 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -44,6 +44,7 @@ import Control.Monad (zipWithM)
 import Data.Aeson (ToJSON (..), encode)
 import Data.Char (chr, ord, isSpace)
 import Data.List (groupBy, intersperse, transpose, foldl')
+import Data.List.NonEmpty (NonEmpty(..), nonEmpty)
 import Data.Text.Conversions (FromText(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
@@ -224,7 +225,7 @@ gridTable :: (Monad m, HasChars a)
           -> m (Doc a)
 gridTable opts blocksToDoc headless aligns widths headers rows = do
   -- the number of columns will be used in case of even widths
-  let numcols = maximum (length aligns : length widths :
+  let numcols = maximum (length aligns :| length widths :
                            map length (headers:rows))
   let officialWidthsInChars widths' = map (
                         (\x -> if x < 1 then 1 else x) .
@@ -253,8 +254,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do
   let handleFullWidths widths' = do
         rawHeaders' <- mapM (blocksToDoc opts) headers
         rawRows' <- mapM (mapM (blocksToDoc opts)) rows
-        let numChars [] = 0
-            numChars xs = maximum . map offset $ xs
+        let numChars = maybe 0 maximum . nonEmpty . map offset
         let minWidthsInChars =
                 map numChars $ transpose (rawHeaders' : rawRows')
         let widthsInChars' = zipWith max
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 9d695563f..0146fdfd8 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -16,6 +16,7 @@ import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
 import Data.Char (chr, ord)
 import Data.List (maximumBy, transpose, foldl')
+import Data.List.NonEmpty (nonEmpty)
 import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
@@ -238,9 +239,13 @@ blockToTexinfo (Table _ blkCapt specs thead tbody tfoot) = do
   colDescriptors <-
     if all (== 0) widths
        then do -- use longest entry instead of column widths
-            cols <- mapM (mapM (liftM (T.unpack . render Nothing . hcat) . mapM blockToTexinfo)) $
+            cols <- mapM (mapM (fmap (T.unpack . render Nothing . hcat) .
+                           mapM blockToTexinfo)) $
                         transpose $ heads : rows
-            return $ concatMap ((\x -> "{"++x++"} ") .  maximumBy (comparing length)) cols
+            return $ concatMap
+                ((\x -> "{"++x++"} ") .
+                        maybe "" (maximumBy (comparing length)) . nonEmpty)
+                cols
        else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
   let tableBody = text ("@multitable " ++ colDescriptors) $$
                   headers $$
diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index 9e45f0417..fcf9e000d 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -20,6 +20,7 @@ import Control.Monad (zipWithM)
 import Control.Monad.State.Strict (StateT, evalStateT, gets, modify)
 import Data.Default (Default (..))
 import Data.List (transpose)
+import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Map as Map
 import Text.DocLayout (render, literal)
 import Data.Maybe (fromMaybe)
@@ -143,7 +144,8 @@ blockToZimWiki opts (Table _ blkCapt specs thead tbody tfoot) = do
                  then zipWithM (tableItemToZimWiki opts) aligns (head rows)
                  else mapM (inlineListToZimWiki opts . removeFormatting)headers  -- emphasis, links etc. are not allowed in table headers
   rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows
-  let widths = map (maximum . map T.length) $ transpose (headers':rows')
+  let widths = map (maybe 0 maximum . nonEmpty . map T.length) $
+                  transpose (headers':rows')
   let padTo (width, al) s =
           case width - T.length s of
                x | x > 0 ->
-- 
cgit v1.2.3


From 005f0fbcd558636f0d5db1203427a4d7b341f36e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 12:34:20 -0700
Subject: T.P.Shared: Remove ToString, ToText typeclasses [API change].

T.P.Parsing: revise type of readWithM so that it takes a Text
rather than a polymorphic ToText value.

These typeclasses were there to ease the transition from String
to Text. They are no longer needed, and they may clash with
more useful versions under the same name.

This will require a bump to 2.13.
---
 src/Text/Pandoc/Parsing.hs |  8 ++++----
 src/Text/Pandoc/Shared.hs  | 20 --------------------
 2 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 8d3799c3e..10a08d410 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1128,13 +1128,13 @@ gridTableFooter = optional blanklines
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: (Stream s m Char, ToText s)
-          => ParserT s st m a    -- ^ parser
+readWithM :: Monad m
+          => ParserT Text st m a -- ^ parser
           -> st                  -- ^ initial state
-          -> s                   -- ^ input
+          -> Text                -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input
+    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
 
 -- | Parse a string with a given parser and state
 readWith :: Parser Text st a
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 0ce9396b3..46aea9c03 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -25,8 +25,6 @@ module Text.Pandoc.Shared (
                      ordNub,
                      findM,
                      -- * Text processing
-                     ToString (..),
-                     ToText (..),
                      tshow,
                      backslashEscapes,
                      escapeStringUsing,
@@ -183,24 +181,6 @@ findM p = foldr go (pure Nothing)
 -- Text processing
 --
 
-class ToString a where
-  toString :: a -> String
-
-instance ToString String where
-  toString = id
-
-instance ToString T.Text where
-  toString = T.unpack
-
-class ToText a where
-  toText :: a -> T.Text
-
-instance ToText String where
-  toText = T.pack
-
-instance ToText T.Text where
-  toText = id
-
 tshow :: Show a => a -> T.Text
 tshow = T.pack . show
 
-- 
cgit v1.2.3


From 7678c48122bb9bb7041b75ddc33061b93cbcdbb2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 14:43:42 -0700
Subject: Hlint suggestion.

---
 src/Text/Pandoc/Readers/Txt2Tags.hs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index f27a3fc2c..d355a4b55 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -56,12 +56,13 @@ getT2TMeta = do
     outp <- fromMaybe "" <$> P.getOutputFile
     curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime
     curMtime <- catchError
-                 ((nonEmpty <$> mapM P.getModificationTime inps) >>=
-                    \case
+                 (mapM P.getModificationTime inps >>=
+                   (\case
                        Nothing ->
                          formatTime defaultTimeLocale "%T" <$> P.getZonedTime
                        Just ts -> return $
                          formatTime defaultTimeLocale "%T" $ maximum ts)
+                    . nonEmpty)
                 (const (return ""))
     return $ T2TMeta (T.pack curDate) (T.pack curMtime)
                      (intercalate ", " inps) outp
-- 
cgit v1.2.3


From eacead3eb316dd8e2d75737589dc017de36326fb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 19 Mar 2021 22:57:48 -0700
Subject: Fix fallback to default partials on templates.

If the directory containing a template does not contain
the partial, it should be sought in the default data files.
Closes #7164.
---
 src/Text/Pandoc/Templates.hs | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index e83f26329..7fd896641 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -34,6 +34,7 @@ import Control.Monad.Except (catchError, throwError)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error
+import System.IO.Error (isDoesNotExistError)
 
 -- | Wrap a Monad in this if you want partials to
 -- be taken only from the default data files.
@@ -70,6 +71,9 @@ getTemplate tp = UTF8.toText <$>
              PandocResourceNotFound _ ->
                 -- see #5987 on reason for takeFileName
                 readDataFile ("templates" </> takeFileName tp)
+             PandocIOError _ ioe | isDoesNotExistError ioe ->
+                -- see #5987 on reason for takeFileName
+                readDataFile ("templates" </> takeFileName tp)
              _ -> throwError e))
 
 -- | Get default template for the specified writer.
-- 
cgit v1.2.3


From a1a57bce4e32cc26b968bcc2847a8e8da30f725b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 00:02:24 -0700
Subject: T.P.Shared: remove `backslashEscapes`, `escapeStringUsing`.

[API change]

These are inefficient association list lookups.
Replace with more efficient functions in the writers that
used them (with 10-25% performance improvements in
haddock, org, rtf, texinfo writers).
---
 src/Text/Pandoc/Shared.hs           | 13 -------------
 src/Text/Pandoc/Writers/AsciiDoc.hs | 11 ++++++++---
 src/Text/Pandoc/Writers/ConTeXt.hs  |  6 +++++-
 src/Text/Pandoc/Writers/Haddock.hs  | 15 +++++++++++++--
 src/Text/Pandoc/Writers/LaTeX.hs    | 14 +++++++++++++-
 src/Text/Pandoc/Writers/Org.hs      | 15 +++++++++------
 src/Text/Pandoc/Writers/RTF.hs      | 26 ++++++++++++++++----------
 src/Text/Pandoc/Writers/Texinfo.hs  | 24 +++++++++++++-----------
 8 files changed, 77 insertions(+), 47 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 46aea9c03..23adff909 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -26,8 +26,6 @@ module Text.Pandoc.Shared (
                      findM,
                      -- * Text processing
                      tshow,
-                     backslashEscapes,
-                     escapeStringUsing,
                      elemText,
                      notElemText,
                      stripTrailingNewlines,
@@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing)
 tshow :: Show a => a -> T.Text
 tshow = T.pack . show
 
--- | Returns an association list of backslash escapes for the
--- designated characters.
-backslashEscapes :: [Char]    -- ^ list of special characters to escape
-                 -> [(Char, T.Text)]
-backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch]))
-
--- | Escape a string of characters, using an association list of
--- characters and strings.
-escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text
-escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl
-
 -- | @True@ exactly when the @Char@ appears in the @Text@.
 elemText :: Char -> T.Text -> Bool
 elemText c = T.any (== c)
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
index 69e608ef9..ab7e5f1a9 100644
--- a/src/Text/Pandoc/Writers/AsciiDoc.hs
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
 
 -- | Escape special characters for AsciiDoc.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing escs
-  where escs = backslashEscapes "{"
+escapeString t
+  | T.any (== '{') t = T.concatMap escChar t
+  | otherwise        = t
+  where escChar '{' = "\\{"
+        escChar c   = T.singleton c
 
 -- | Ordered list start parser for use in Para below.
 olMarker :: Parser Text ParserState Char
@@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do
         | otherwise     -> [Str "``"] ++ lst ++ [Str "''"]
 inlineToAsciiDoc _ (Code _ str) = do
   isAsciidoctor <- gets asciidoctorVariant
-  let contents = literal (escapeStringUsing (backslashEscapes "`") str)
+  let escChar '`' = "\\'"
+      escChar c   = T.singleton c
+  let contents = literal (T.concatMap escChar str)
   return $
     if isAsciidoctor
        then text "`+" <> contents <> "+`"
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 1c56388ed..3c9975be8 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do
   put $ st {stNextRef = next + 1}
   let ref = "url" <> tshow next
   contents <-  inlineListToConTeXt txt
+  let escChar '#' = "\\#"
+      escChar '%' = "\\%"
+      escChar c   = T.singleton c
+  let escContextURL = T.concatMap escChar
   return $ "\\useURL"
            <> brackets (literal ref)
-           <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src)
+           <> brackets (literal $ escContextURL src)
            <> (if isAutolink
                   then empty
                   else brackets empty <> brackets contents)
diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs
index aaa19ed07..75e14714b 100644
--- a/src/Text/Pandoc/Writers/Haddock.hs
+++ b/src/Text/Pandoc/Writers/Haddock.hs
@@ -15,6 +15,7 @@ Haddock:  <http://www.haskell.org/haddock/doc/html/>
 -}
 module Text.Pandoc.Writers.Haddock (writeHaddock) where
 import Control.Monad.State.Strict
+import Data.Char (isAlphaNum)
 import Data.Default
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -71,8 +72,18 @@ notesToHaddock opts notes =
 
 -- | Escape special characters for Haddock.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing haddockEscapes
-  where haddockEscapes = backslashEscapes "\\/'`\"@<"
+escapeString t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+ where
+  escChar '\\' = "\\\\"
+  escChar '/'  = "\\/"
+  escChar '\'' = "\\'"
+  escChar '`'  = "\\`"
+  escChar '"'  = "\\\""
+  escChar '@'  = "\\@"
+  escChar '<'  = "\\<"
+  escChar c    = T.singleton c
 
 -- | Convert Pandoc block element to haddock.
 blockToHaddock :: PandocMonad m
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 6a205a798..1c970e6ad 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do
         let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of
                        (c:_) -> c
                        []    -> '!'
-        let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str
+        let isEscapable '\\' = True
+            isEscapable '{'  = True
+            isEscapable '}'  = True
+            isEscapable '%'  = True
+            isEscapable '~'  = True
+            isEscapable '_'  = True
+            isEscapable '&'  = True
+            isEscapable '#'  = True
+            isEscapable '^'  = True
+            isEscapable _    = False
+        let escChar c | isEscapable c = T.pack ['\\',c]
+                      | otherwise     = T.singleton c
+        let str' = T.concatMap escChar str
         -- we always put lstinline in a dummy 'passthrough' command
         -- (defined in the default template) so that we don't have
         -- to change the way we escape characters depending on whether
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index bb645eaf9..88a2b8314 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -84,12 +84,15 @@ noteToOrg num note = do
 
 -- | Escape special characters for Org.
 escapeString :: Text -> Text
-escapeString = escapeStringUsing
-               [ ('\x2014',"---")
-               , ('\x2013',"--")
-               , ('\x2019',"'")
-               , ('\x2026',"...")
-               ]
+escapeString t
+  | T.all (\c -> c < '\x2013' || c > '\x2026') t = t
+  | otherwise = T.concatMap escChar t
+  where
+   escChar '\x2013' = "--"
+   escChar '\x2014' = "---"
+   escChar '\x2019' = "'"
+   escChar '\x2026' = "..."
+   escChar c        = T.singleton c
 
 isRawFormat :: Format -> Bool
 isRawFormat f =
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index cf27011c2..3527949b4 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad
 import qualified Data.ByteString as B
-import Data.Char (chr, isDigit, ord)
+import Data.Char (chr, isDigit, ord, isAlphaNum)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c ->
 
 -- | Escape special characters.
 escapeSpecial :: Text -> Text
-escapeSpecial = escapeStringUsing $
-  [ ('\t',"\\tab ")
-  , ('\8216',"\\u8216'")
-  , ('\8217',"\\u8217'")
-  , ('\8220',"\\u8220\"")
-  , ('\8221',"\\u8221\"")
-  , ('\8211',"\\u8211-")
-  , ('\8212',"\\u8212-")
-  ] <> backslashEscapes "{\\}"
+escapeSpecial t
+  | T.all isAlphaNum t = t
+  | otherwise          = T.concatMap escChar t
+ where
+  escChar '\t' = "\\tab "
+  escChar '\8216' = "\\u8216'"
+  escChar '\8217' = "\\u8217'"
+  escChar '\8220' = "\\u8220\""
+  escChar '\8221' = "\\u8221\""
+  escChar '\8211' = "\\u8211-"
+  escChar '\8212' = "\\u8212-"
+  escChar '{'     = "\\{"
+  escChar '}'     = "\\}"
+  escChar '\\'    = "\\\\"
+  escChar c       = T.singleton c
 
 -- | Escape strings as needed for rich text format.
 stringToRTF :: Text -> Text
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 0146fdfd8..6a33b4283 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo.
 module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.Char (chr, ord)
+import Data.Char (chr, ord, isAlphaNum)
 import Data.List (maximumBy, transpose, foldl')
 import Data.List.NonEmpty (nonEmpty)
 import Data.Ord (comparing)
@@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do
 
 -- | Escape things as needed for Texinfo.
 stringToTexinfo :: Text -> Text
-stringToTexinfo = escapeStringUsing texinfoEscapes
-  where texinfoEscapes = [ ('{', "@{")
-                         , ('}', "@}")
-                         , ('@', "@@")
-                         , ('\160', "@ ")
-                         , ('\x2014', "---")
-                         , ('\x2013', "--")
-                         , ('\x2026', "@dots{}")
-                         , ('\x2019', "'")
-                         ]
+stringToTexinfo t
+  | T.all isAlphaNum t = t
+  | otherwise = T.concatMap escChar t
+  where escChar '{'      = "@{"
+        escChar '}'      = "@}"
+        escChar '@'      = "@@"
+        escChar '\160'   = "@ "
+        escChar '\x2014' = "---"
+        escChar '\x2013' = "--"
+        escChar '\x2026' = "@dots{}"
+        escChar '\x2019' = "'"
+        escChar c        = T.singleton c
 
 escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text)
 escapeCommas parser = do
-- 
cgit v1.2.3


From 82e8c29cb0a89d7129f459bef6696254ec56e0c6 Mon Sep 17 00:00:00 2001
From: Erik Rask <erik.rask@paligo.net>
Date: Fri, 19 Feb 2021 13:05:35 +0100
Subject: Include Header.Attr.attributes as XML attributes on section

Add key-value pairs found in the attributes list of Header.Attr as
XML attributes on the corresponding section element.

Any key name not allowed as an XML attribute name is dropped, as
are keys with invalid values where they are defined as enums in
DocBook, and xml:id (for DocBook 5)/id (for DocBook 4) to not
intervene with computed identifiers.
---
 src/Text/Pandoc/Writers/Docbook.hs | 47 ++++++++++++++++++++++++++++++++++++--
 test/Tests/Writers/Docbook.hs      | 37 ++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index a6776608d..1f10c9d04 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -168,7 +168,7 @@ blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text)
 blockToDocbook _ Null = return empty
 -- Add ids to paragraphs in divs with ids - this is needed for
 -- pandoc-citeproc to get link anchors in bibliographies:
-blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
+blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do
   version <- ask
   -- Docbook doesn't allow sections with no content, so insert some if needed
   let bs = if null xs
@@ -188,7 +188,10 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do
       -- standalone documents will include them in the template.
                  then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")]
                  else []
-      attribs = nsAttr <> idAttr
+      
+      -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id
+      miscAttr = filter (isSectionAttr version) attrs
+      attribs = nsAttr <> idAttr <> miscAttr
   title' <- inlinesToDocbook opts ils
   contents <- blocksToDocbook opts bs
   return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents
@@ -451,3 +454,43 @@ idAndRole (id',cls,_) = ident <> role
   where
     ident = [("id", id') | not (T.null id')]
     role  = [("role", T.unwords cls) | not (null cls)]
+
+isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool
+isSectionAttr _ ("label",_) = True
+isSectionAttr _ ("status",_) = True
+isSectionAttr DocBook5 ("annotations",_) = True
+isSectionAttr DocBook5 ("dir","ltr") = True
+isSectionAttr DocBook5 ("dir","rtl") = True
+isSectionAttr DocBook5 ("dir","lro") = True
+isSectionAttr DocBook5 ("dir","rlo") = True
+isSectionAttr _ ("remap",_) = True
+isSectionAttr _ ("revisionflag","changed") = True
+isSectionAttr _ ("revisionflag","added") = True
+isSectionAttr _ ("revisionflag","deleted") = True
+isSectionAttr _ ("revisionflag","off") = True
+isSectionAttr _ ("role",_) = True
+isSectionAttr DocBook5 ("version",_) = True
+isSectionAttr DocBook5 ("xml:base",_) = True
+isSectionAttr DocBook5 ("xml:lang",_) = True
+isSectionAttr _ ("xreflabel",_) = True
+isSectionAttr DocBook5 ("linkend",_) = True
+isSectionAttr DocBook5 ("linkends",_) = True
+isSectionAttr DocBook5 ("xlink:actuate",_) = True
+isSectionAttr DocBook5 ("xlink:arcrole",_) = True
+isSectionAttr DocBook5 ("xlink:from",_) = True
+isSectionAttr DocBook5 ("xlink:href",_) = True
+isSectionAttr DocBook5 ("xlink:label",_) = True
+isSectionAttr DocBook5 ("xlink:role",_) = True
+isSectionAttr DocBook5 ("xlink:show",_) = True
+isSectionAttr DocBook5 ("xlink:title",_) = True
+isSectionAttr DocBook5 ("xlink:to",_) = True
+isSectionAttr DocBook5 ("xlink:type",_) = True
+isSectionAttr DocBook4 ("arch",_) = True
+isSectionAttr DocBook4 ("condition",_) = True
+isSectionAttr DocBook4 ("conformance",_) = True
+isSectionAttr DocBook4 ("lang",_) = True
+isSectionAttr DocBook4 ("os",_) = True
+isSectionAttr DocBook4 ("revision",_) = True
+isSectionAttr DocBook4 ("security",_) = True
+isSectionAttr DocBook4 ("vendor",_) = True
+isSectionAttr _ (_,_) = False
\ No newline at end of file
diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs
index 842aed7ae..46203eeae 100644
--- a/test/Tests/Writers/Docbook.hs
+++ b/test/Tests/Writers/Docbook.hs
@@ -11,9 +11,14 @@ import Text.Pandoc.Builder
 docbook :: (ToPandoc a) => a -> String
 docbook = docbookWithOpts def{ writerWrapText = WrapNone }
 
+docbook5 :: (ToPandoc a) => a -> String
+docbook5 = docbook5WithOpts def{ writerWrapText = WrapNone }
+
 docbookWithOpts :: ToPandoc a => WriterOptions -> a -> String
 docbookWithOpts opts = unpack . purely (writeDocbook4 opts) . toPandoc
 
+docbook5WithOpts :: ToPandoc a => WriterOptions -> a -> String
+docbook5WithOpts opts = unpack . purely (writeDocbook5 opts) . toPandoc
 {-
   "my test" =: X =?> Y
 
@@ -366,4 +371,36 @@ tests = [ testGroup "line blocks"
                       ]
             ]
           ]
+          , testGroup "section attributes" $
+            let
+              headers =  headerWith ("myid1",[],[("role","internal"),("xml:id","anotherid"),("dir","rtl")]) 1 "header1"
+                      <> headerWith ("myid2",[],[("invalidname","value"),("arch","linux"),("dir","invaliddir")]) 1 "header2"
+            in
+            [ test docbook5 "sections with attributes (db5)" $
+              headers =?>
+              unlines [ "<section xmlns=\"http://docbook.org/ns/docbook\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:id=\"myid1\" role=\"internal\" dir=\"rtl\">"
+                      , "  <title>header1</title>"
+                      , "  <para>"
+                      , "  </para>"
+                      , "</section>"
+                      , "<section xmlns=\"http://docbook.org/ns/docbook\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:id=\"myid2\">"
+                      , "  <title>header2</title>"
+                      , "  <para>"
+                      , "  </para>"
+                      , "</section>"
+                      ]
+            , test docbook "sections with attributes (db4)" $
+              headers =?>
+              unlines [ "<sect1 id=\"myid1\" role=\"internal\">"
+                      , "  <title>header1</title>"
+                      , "  <para>"
+                      , "  </para>"
+                      , "</sect1>"
+                      , "<sect1 id=\"myid2\" arch=\"linux\">"
+                      , "  <title>header2</title>"
+                      , "  <para>"
+                      , "  </para>"
+                      , "</sect1>"
+                      ]
+            ]
         ]
-- 
cgit v1.2.3


From 84d8f3efd8a45194752209b7d55b18a2291ed1ae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 10:07:51 -0700
Subject: RST writer: use NonEmpty for init, last.

---
 src/Text/Pandoc/Writers/RST.hs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 0b9fc8331..983ef412a 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -17,7 +17,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
 import Control.Monad.State.Strict
 import Data.Char (isSpace)
 import Data.List (transpose, intersperse, foldl')
-import Data.List.NonEmpty (nonEmpty)
+import qualified Data.List.NonEmpty as NE
 import Data.Maybe (fromMaybe)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -336,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do
                    then replicate (length items) "#."
                    else take (length items) $ orderedListMarkers
                                               (start, style', delim)
-  let maxMarkerLength = maybe 0 maximum $ nonEmpty $ map T.length markers
+  let maxMarkerLength = maybe 0 maximum $ NE.nonEmpty $ map T.length markers
   let markers' = map (\m -> let s = maxMarkerLength - T.length m
                             in  m <> T.replicate s " ") markers
   contents <- zipWithM orderedListItemToRST markers' items
@@ -538,11 +538,15 @@ flatten outer
         collapse f i = appendToLast f $ dropInlineParent i
 
         appendToLast :: [Inline] -> [Inline] -> [Inline]
-        appendToLast [] toAppend = [setInlineChildren outer toAppend]
-        appendToLast flattened toAppend
-          | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend]
-          | otherwise =  flattened <> [setInlineChildren outer toAppend]
-          where lastFlat = last flattened
+        appendToLast flattened toAppend =
+          case NE.nonEmpty flattened of
+            Nothing -> [setInlineChildren outer toAppend]
+            Just xs ->
+              if isOuter lastFlat
+                 then NE.init xs <> [appendTo lastFlat toAppend]
+                 else flattened <> [setInlineChildren outer toAppend]
+               where
+                lastFlat = NE.last xs
                 appendTo o i = mapNested (<> i) o
                 isOuter i = emptyParent i == emptyParent outer
                 emptyParent i = setInlineChildren i []
@@ -762,7 +766,7 @@ simpleTable opts blocksToDoc headers rows = do
                    then return []
                    else fixEmpties <$> mapM (blocksToDoc opts) headers
   rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows
-  let numChars = maybe 0 maximum . nonEmpty . map offset
+  let numChars = maybe 0 maximum . NE.nonEmpty . map offset
   let colWidths = map numChars $ transpose (headerDocs : rowDocs)
   let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths
   let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths)
-- 
cgit v1.2.3


From 4d041953f56b85d4db241cea11c764856ccbeebe Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 10:41:12 -0700
Subject: T.P.Readers.Metadata: made `yamlBsToMeta`, `yamlBsToRefs`
 polymorphic...

on the parser state, instead of requiring ParserState.
[API change]
---
 src/Text/Pandoc/Readers/Metadata.hs | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index 927291776..f4a27496f 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -31,10 +31,10 @@ import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
 
-yamlBsToMeta :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Text st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text ParserState m (F Meta)
+             -> ParserT Text st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -63,11 +63,11 @@ lookupYAML t (YAML.Mapping _ _ m) =
 lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
-yamlBsToRefs :: PandocMonad m
-             => ParserT Text ParserState m (F MetaValue)
+yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
+             => ParserT Text st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text ParserState m (F [MetaValue])
+             -> ParserT Text st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -104,10 +104,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t))       = Just t
 nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
-normalizeMetaValue :: PandocMonad m
-                   => ParserT Text ParserState m (F MetaValue)
+normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                   => ParserT Text st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text ParserState m (F MetaValue)
+                   -> ParserT Text st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -129,10 +129,10 @@ checkBoolean t
   | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False
   | otherwise = Nothing
 
-yamlToMetaValue :: PandocMonad m
-                => ParserT Text ParserState m (F MetaValue)
+yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
+                => ParserT Text st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text ParserState m (F MetaValue)
+                -> ParserT Text st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -152,10 +152,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
   fmap MetaMap <$> yamlMap pMetaValue o
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
-yamlMap :: PandocMonad m
-        => ParserT Text ParserState m (F MetaValue)
+yamlMap :: (PandocMonad m, HasLastStrPosition st)
+        => ParserT Text st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text ParserState m (F (M.Map Text MetaValue))
+        -> ParserT Text st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
-- 
cgit v1.2.3


From ce418667ae8a3e6e5bbf2523eef43edf4f803bcf Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 10:41:55 -0700
Subject: Text.Pandoc.Parsing: remove F type synonym.

Muse and Org were defining their own F anyway, with their
own state.  We therefore move this definition to the Markdown
reader.
---
 src/Text/Pandoc/App/Opt.hs             | 3 ++-
 src/Text/Pandoc/Parsing.hs             | 7 ++-----
 src/Text/Pandoc/Readers/Markdown.hs    | 2 ++
 src/Text/Pandoc/Readers/Muse.hs        | 2 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index e5aaec9c5..c72f63464 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -686,7 +686,8 @@ yamlToMeta (Mapping _ _ m) =
     either (fail . show) return $ runEverything (yamlMap pMetaString m)
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
-    runEverything p = runPure (P.readWithM p def "")
+    runEverything p =
+      runPure (P.readWithM p (def :: P.ParserState) "")
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 10a08d410..44e6af59e 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -112,7 +112,6 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              citeKey,
                              Parser,
                              ParserT,
-                             F,
                              Future(..),
                              runF,
                              askF,
@@ -229,8 +228,6 @@ type ParserT = ParsecT
 newtype Future s a = Future { runDelayed :: Reader s a }
   deriving (Monad, Applicative, Functor)
 
-type F = Future ParserState
-
 runF :: Future s a -> s -> a
 runF = runReader . runDelayed
 
@@ -1169,7 +1166,7 @@ data ParserState = ParserState
       stateInNote            :: Bool,          -- ^ True if parsing note contents
       stateNoteNumber        :: Int,           -- ^ Last note number for citations
       stateMeta              :: Meta,          -- ^ Document metadata
-      stateMeta'             :: F Meta,        -- ^ Document metadata
+      stateMeta'             :: Future ParserState Meta, -- ^ Document metadata
       stateCitations         :: M.Map Text Text, -- ^ RST-style citations
       stateHeaderTable       :: [HeaderType],  -- ^ Ordered list of header types used
       stateIdentifiers       :: Set.Set Text, -- ^ Header identifiers used
@@ -1348,7 +1345,7 @@ data QuoteContext
 
 type NoteTable = [(Text, Text)]
 
-type NoteTable' = M.Map Text (SourcePos, F Blocks)
+type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks)
 -- used in markdown reader
 
 newtype Key = Key Text deriving (Show, Read, Eq, Ord)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index a86286b3a..7c557b5a7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -52,6 +52,8 @@ import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
 
 type MarkdownParser m = ParserT Text ParserState m
 
+type F = Future ParserState
+
 -- | Read markdown from an input string and return a Pandoc document.
 readMarkdown :: PandocMonad m
              => ReaderOptions -- ^ Reader options
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b4eea9d3a..698bfd3d7 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -36,7 +36,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Parsing hiding (F)
+import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (crFilter, trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index d33920d47..6ed24a602 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -106,8 +106,8 @@ module Text.Pandoc.Readers.Org.Parsing
 import Data.Text (Text)
 import Text.Pandoc.Readers.Org.ParserState
 
-import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
-                            parseFromString)
+import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline,
+                                   parseFromString)
 import qualified Text.Pandoc.Parsing as P
 
 import Control.Monad (guard)
-- 
cgit v1.2.3


From bea86f394e6bc77d7441bb98f10a8b8ccfee04c9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 11:23:41 -0700
Subject: Markdown reader: export `yamlMetaBlock`.

[API change]

This will allow us to parse YAML metadata blocks in other
readers, potentially.
---
 src/Text/Pandoc/Readers/Markdown.hs | 40 +++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7c557b5a7..8d68510c5 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -15,6 +15,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Markdown (
   readMarkdown,
+  yamlMetaBlock,
   yamlToMeta,
   yamlToRefs ) where
 
@@ -274,24 +275,29 @@ pandocTitleBlock = do
                      $ nullMeta
     updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
-yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
-yamlMetaBlock = do
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Text st m (Future st Blocks)
+              -> ParserT Text st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta (fmap B.toMetaValue <$> parser)
+               $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
+yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  try $ do
-    string "---"
-    blankline
-    notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-    rawYamlLines <- manyTill anyLine stopLine
-    -- by including --- and ..., we allow yaml blocks with just comments:
-    let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-    optional blanklines
-    newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks)
-                $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-    -- Since `<>` is left-biased, existing values are not touched:
-    updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
-    return mempty
+  newMetaF <- yamlMetaBlock parseBlocks
+  -- Since `<>` is left-biased, existing values are not touched:
+  updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
+  return mempty
 
-stopLine :: PandocMonad m => MarkdownParser m ()
+stopLine :: PandocMonad m => ParserT Text st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
 
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
@@ -456,7 +462,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks)
 block = do
   res <- choice [ mempty <$ blanklines
                , codeBlockFenced
-               , yamlMetaBlock
+               , yamlMetaBlock'
                -- note: bulletList needs to be before header because of
                -- the possibility of empty list items: -
                , bulletList
-- 
cgit v1.2.3


From 2274eb88a4dddf622d86bee94bb6f20db6e148b2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 11:37:09 -0700
Subject: Move yamlMetaBlock from Markdown reader to T.P.Readers.Metadata.

---
 src/Text/Pandoc/Readers/Markdown.hs | 24 ++----------------------
 src/Text/Pandoc/Readers/Metadata.hs | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 8d68510c5..6c3947a81 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -15,7 +15,6 @@ Conversion of markdown-formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Markdown (
   readMarkdown,
-  yamlMetaBlock,
   yamlToMeta,
   yamlToRefs ) where
 
@@ -29,7 +28,6 @@ import Data.Maybe
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
 import qualified Data.ByteString.Lazy as BL
 import System.FilePath (addExtension, takeExtension)
 import Text.HTML.TagSoup hiding (Row)
@@ -47,9 +45,8 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
                                  isCommentTag, isInlineTag, isTextTag)
 import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
-import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.XML (fromEntities)
-import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
+import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
 
 type MarkdownParser m = ParserT Text ParserState m
 
@@ -275,31 +272,14 @@ pandocTitleBlock = do
                      $ nullMeta
     updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
 
-yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st Blocks)
-              -> ParserT Text st m (Future st Meta)
-yamlMetaBlock parser = try $ do
-  string "---"
-  blankline
-  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
-  rawYamlLines <- manyTill anyLine stopLine
-  -- by including --- and ..., we allow yaml blocks with just comments:
-  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
-  optional blanklines
-  yamlBsToMeta (fmap B.toMetaValue <$> parser)
-               $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-
 yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
 yamlMetaBlock' = do
   guardEnabled Ext_yaml_metadata_block
-  newMetaF <- yamlMetaBlock parseBlocks
+  newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks)
   -- Since `<>` is left-biased, existing values are not touched:
   updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
   return mempty
 
-stopLine :: PandocMonad m => ParserT Text st m ()
-stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-
 mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
 mmdTitleBlock = do
   guardEnabled Ext_mmd_title_block
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index f4a27496f..cb141cba5 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
 module Text.Pandoc.Readers.Metadata (
   yamlBsToMeta,
   yamlBsToRefs,
+  yamlMetaBlock,
   yamlMap ) where
 
 import Control.Monad
@@ -30,6 +31,8 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Parsing hiding (tableWith)
 import Text.Pandoc.Shared
+import qualified Data.Text.Lazy as TL
+import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
              => ParserT Text st m (Future st MetaValue)
@@ -171,3 +174,20 @@ yamlMap pMetaValue o = do
       return $ do
         v' <- fv
         return (k, v')
+
+-- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
+yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
+              => ParserT Text st m (Future st MetaValue)
+              -> ParserT Text st m (Future st Meta)
+yamlMetaBlock parser = try $ do
+  string "---"
+  blankline
+  notFollowedBy blankline  -- if --- is followed by a blank it's an HRULE
+  rawYamlLines <- manyTill anyLine stopLine
+  -- by including --- and ..., we allow yaml blocks with just comments:
+  let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
+  optional blanklines
+  yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
+
+stopLine :: Monad m => ParserT Text st m ()
+stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
-- 
cgit v1.2.3


From c389211e2f678d9327cd2c008d54a1e438f07a07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 20 Mar 2021 15:45:09 -0700
Subject: Support `yaml_metadata_block` extension form commonmark, gfm.

This is a bit more limited than with markdown, as documented
in the manual:

- The YAML block must be the first thing in the input.
- The leaf notes are parsed in isolation from the rest of
  the document.  So, for example, you can't use reference
  links if the references are defined later in the document.

Closes #6537.
---
 MANUAL.txt                            | 16 ++++++++++++++++
 src/Text/Pandoc/Extensions.hs         |  3 +++
 src/Text/Pandoc/Parsing.hs            |  2 +-
 src/Text/Pandoc/Readers/CommonMark.hs | 30 ++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index bc9e226e9..845d1dbba 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -4240,6 +4240,22 @@ will be interpreted as markdown. For example:
       \renewcommand{\section}[1]{\clearpage\oldsection{#1}}
       ```
 
+Note:  the `yaml_metadata_block` extension works with
+`commonmark` as well as `markdown` (and it is enabled by default
+in `gfm` and `commonmark_x`).  However, in these formats the
+following restrictions apply:
+
+- The YAML metadata block must occur at the beginning of the
+  document (and there can be only one).  If multiple files are
+  given as arguments to pandoc, only the first can be a YAML
+  metadata block.
+
+- The leaf nodes of the YAML structure are parsed in isolation from
+  each other and from the rest of the document.  So, for
+  example,  you can't use a reference link in these contexts
+  if the link definition is somewhere else in the document.
+
+
 ## Backslash escapes
 
 #### Extension: `all_symbols_escapable` ####
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 266a09e3c..6423d5f56 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -354,6 +354,7 @@ getDefaultExtensions "gfm"             = extensionsFromList
   , Ext_strikeout
   , Ext_task_lists
   , Ext_emoji
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "commonmark"      = extensionsFromList
                                           [Ext_raw_html]
@@ -379,6 +380,7 @@ getDefaultExtensions "commonmark_x"    = extensionsFromList
   , Ext_raw_attribute
   , Ext_implicit_header_references
   , Ext_attributes
+  , Ext_yaml_metadata_block
   ]
 getDefaultExtensions "org"             = extensionsFromList
                                           [Ext_citations,
@@ -511,6 +513,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_implicit_header_references
     , Ext_attributes
     , Ext_sourcepos
+    , Ext_yaml_metadata_block
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 44e6af59e..0c2078721 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -517,7 +517,7 @@ parseFromString :: (Stream s m Char, IsString s)
                 -> ParserT s st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos "chunk"
+  setPosition $ initialPos " chunk"
   oldInput <- getInput
   setInput $ fromString $ T.unpack str
   result <- parser
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 150a837e4..244f77940 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -26,13 +26,43 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Options
 import Text.Pandoc.Error
+import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
+import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
+                            runF, defaultParserState, take1WhileP, option)
+import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
 readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readCommonMark opts s
+  | isEnabled Ext_yaml_metadata_block opts
+  , "---" `T.isPrefixOf` s = do
+       let metaValueParser = do
+             inp <- option "" $ take1WhileP (const True)
+             case runIdentity
+                    (commonmarkWith (specFor opts) "metadata value" inp) of
+                    Left _ -> mzero
+                    Right (Cm bls :: Cm () Blocks)
+                             -> return $ return $ B.toMetaValue bls
+       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
+                             pos <- getPosition
+                             return (meta, pos))
+                         defaultParserState "YAML metadata" s
+       case res of
+         Left _ -> readCommonMarkBody opts s
+         Right (meta, pos) -> do
+           let dropLines 0 = id
+               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
+           let metaLines = sourceLine pos - 1
+           let body = T.replicate metaLines "\n" <> dropLines metaLines s
+           Pandoc _ bs <- readCommonMarkBody opts body
+           return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = readCommonMarkBody opts s
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMarkBody opts s
   | isEnabled Ext_sourcepos opts =
     case runIdentity (commonmarkWith (specFor opts) "" s) of
       Left err -> throwError $ PandocParsecError s err
-- 
cgit v1.2.3


From 052056289fc6f884a2a8799dacca64a16248a5c2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 21 Mar 2021 23:38:47 -0700
Subject: Simplify T.P.Asciify and export toAsciiText [API change].

Instead of encoding a giant (and incomplete) map, we now
just use unicode-transforms to normalize the text to
a canonical decomposition, and manipulate the result.

The new `toAsciiText` is equivalent to the old
`T.pack . mapMaybe toAsciiChar . T.unpack` but should be faster.
---
 src/Text/Pandoc/Asciify.hs | 403 ++-------------------------------------------
 src/Text/Pandoc/Parsing.hs |   6 +-
 src/Text/Pandoc/Shared.hs  |   4 +-
 3 files changed, 18 insertions(+), 395 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs
index 01a7b624a..620546c13 100644
--- a/src/Text/Pandoc/Asciify.hs
+++ b/src/Text/Pandoc/Asciify.hs
@@ -10,396 +10,19 @@
 Function to convert accented latin letters to their unaccented
 ascii equivalents (used in constructing HTML identifiers).
 -}
-module Text.Pandoc.Asciify (toAsciiChar)
+module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
 where
-import Data.Char (isAscii)
-import qualified Data.Map as M
+import Data.Char (isAscii, isMark)
+import qualified Data.Text.Normalize as TN
+import Data.Text (Text)
+import qualified Data.Text as T
 
-toAsciiChar :: Char -> Maybe Char
-toAsciiChar c | isAscii c = Just c
-              | otherwise = M.lookup c asciiMap
+toAsciiText :: Text -> Text
+toAsciiText = T.filter isAscii . TN.normalize (TN.NFD)
 
-asciiMap :: M.Map Char Char
-asciiMap = M.fromList
-  [('\192','A')
-  ,('\193','A')
-  ,('\194','A')
-  ,('\195','A')
-  ,('\196','A')
-  ,('\197','A')
-  ,('\199','C')
-  ,('\200','E')
-  ,('\201','E')
-  ,('\202','E')
-  ,('\203','E')
-  ,('\204','I')
-  ,('\205','I')
-  ,('\206','I')
-  ,('\207','I')
-  ,('\209','N')
-  ,('\210','O')
-  ,('\211','O')
-  ,('\212','O')
-  ,('\213','O')
-  ,('\214','O')
-  ,('\217','U')
-  ,('\218','U')
-  ,('\219','U')
-  ,('\220','U')
-  ,('\221','Y')
-  ,('\224','a')
-  ,('\225','a')
-  ,('\226','a')
-  ,('\227','a')
-  ,('\228','a')
-  ,('\229','a')
-  ,('\231','c')
-  ,('\232','e')
-  ,('\233','e')
-  ,('\234','e')
-  ,('\235','e')
-  ,('\236','i')
-  ,('\237','i')
-  ,('\238','i')
-  ,('\239','i')
-  ,('\241','n')
-  ,('\242','o')
-  ,('\243','o')
-  ,('\244','o')
-  ,('\245','o')
-  ,('\246','o')
-  ,('\249','u')
-  ,('\250','u')
-  ,('\251','u')
-  ,('\252','u')
-  ,('\253','y')
-  ,('\255','y')
-  ,('\256','A')
-  ,('\257','a')
-  ,('\258','A')
-  ,('\259','a')
-  ,('\260','A')
-  ,('\261','a')
-  ,('\262','C')
-  ,('\263','c')
-  ,('\264','C')
-  ,('\265','c')
-  ,('\266','C')
-  ,('\267','c')
-  ,('\268','C')
-  ,('\269','c')
-  ,('\270','D')
-  ,('\271','d')
-  ,('\274','E')
-  ,('\275','e')
-  ,('\276','E')
-  ,('\277','e')
-  ,('\278','E')
-  ,('\279','e')
-  ,('\280','E')
-  ,('\281','e')
-  ,('\282','E')
-  ,('\283','e')
-  ,('\284','G')
-  ,('\285','g')
-  ,('\286','G')
-  ,('\287','g')
-  ,('\288','G')
-  ,('\289','g')
-  ,('\290','G')
-  ,('\291','g')
-  ,('\292','H')
-  ,('\293','h')
-  ,('\296','I')
-  ,('\297','i')
-  ,('\298','I')
-  ,('\299','i')
-  ,('\300','I')
-  ,('\301','i')
-  ,('\302','I')
-  ,('\303','i')
-  ,('\304','I')
-  ,('\305','i')
-  ,('\308','J')
-  ,('\309','j')
-  ,('\310','K')
-  ,('\311','k')
-  ,('\313','L')
-  ,('\314','l')
-  ,('\315','L')
-  ,('\316','l')
-  ,('\317','L')
-  ,('\318','l')
-  ,('\323','N')
-  ,('\324','n')
-  ,('\325','N')
-  ,('\326','n')
-  ,('\327','N')
-  ,('\328','n')
-  ,('\332','O')
-  ,('\333','o')
-  ,('\334','O')
-  ,('\335','o')
-  ,('\336','O')
-  ,('\337','o')
-  ,('\340','R')
-  ,('\341','r')
-  ,('\342','R')
-  ,('\343','r')
-  ,('\344','R')
-  ,('\345','r')
-  ,('\346','S')
-  ,('\347','s')
-  ,('\348','S')
-  ,('\349','s')
-  ,('\350','S')
-  ,('\351','s')
-  ,('\352','S')
-  ,('\353','s')
-  ,('\354','T')
-  ,('\355','t')
-  ,('\356','T')
-  ,('\357','t')
-  ,('\360','U')
-  ,('\361','u')
-  ,('\362','U')
-  ,('\363','u')
-  ,('\364','U')
-  ,('\365','u')
-  ,('\366','U')
-  ,('\367','u')
-  ,('\368','U')
-  ,('\369','u')
-  ,('\370','U')
-  ,('\371','u')
-  ,('\372','W')
-  ,('\373','w')
-  ,('\374','Y')
-  ,('\375','y')
-  ,('\376','Y')
-  ,('\377','Z')
-  ,('\378','z')
-  ,('\379','Z')
-  ,('\380','z')
-  ,('\381','Z')
-  ,('\382','z')
-  ,('\416','O')
-  ,('\417','o')
-  ,('\431','U')
-  ,('\432','u')
-  ,('\461','A')
-  ,('\462','a')
-  ,('\463','I')
-  ,('\464','i')
-  ,('\465','O')
-  ,('\466','o')
-  ,('\467','U')
-  ,('\468','u')
-  ,('\486','G')
-  ,('\487','g')
-  ,('\488','K')
-  ,('\489','k')
-  ,('\490','O')
-  ,('\491','o')
-  ,('\496','j')
-  ,('\500','G')
-  ,('\501','g')
-  ,('\504','N')
-  ,('\505','n')
-  ,('\512','A')
-  ,('\513','a')
-  ,('\514','A')
-  ,('\515','a')
-  ,('\516','E')
-  ,('\517','e')
-  ,('\518','E')
-  ,('\519','e')
-  ,('\520','I')
-  ,('\521','i')
-  ,('\522','I')
-  ,('\523','i')
-  ,('\524','O')
-  ,('\525','o')
-  ,('\526','O')
-  ,('\527','o')
-  ,('\528','R')
-  ,('\529','r')
-  ,('\530','R')
-  ,('\531','r')
-  ,('\532','U')
-  ,('\533','u')
-  ,('\534','U')
-  ,('\535','u')
-  ,('\536','S')
-  ,('\537','s')
-  ,('\538','T')
-  ,('\539','t')
-  ,('\542','H')
-  ,('\543','h')
-  ,('\550','A')
-  ,('\551','a')
-  ,('\552','E')
-  ,('\553','e')
-  ,('\558','O')
-  ,('\559','o')
-  ,('\562','Y')
-  ,('\563','y')
-  ,('\894',';')
-  ,('\7680','A')
-  ,('\7681','a')
-  ,('\7682','B')
-  ,('\7683','b')
-  ,('\7684','B')
-  ,('\7685','b')
-  ,('\7686','B')
-  ,('\7687','b')
-  ,('\7690','D')
-  ,('\7691','d')
-  ,('\7692','D')
-  ,('\7693','d')
-  ,('\7694','D')
-  ,('\7695','d')
-  ,('\7696','D')
-  ,('\7697','d')
-  ,('\7698','D')
-  ,('\7699','d')
-  ,('\7704','E')
-  ,('\7705','e')
-  ,('\7706','E')
-  ,('\7707','e')
-  ,('\7710','F')
-  ,('\7711','f')
-  ,('\7712','G')
-  ,('\7713','g')
-  ,('\7714','H')
-  ,('\7715','h')
-  ,('\7716','H')
-  ,('\7717','h')
-  ,('\7718','H')
-  ,('\7719','h')
-  ,('\7720','H')
-  ,('\7721','h')
-  ,('\7722','H')
-  ,('\7723','h')
-  ,('\7724','I')
-  ,('\7725','i')
-  ,('\7728','K')
-  ,('\7729','k')
-  ,('\7730','K')
-  ,('\7731','k')
-  ,('\7732','K')
-  ,('\7733','k')
-  ,('\7734','L')
-  ,('\7735','l')
-  ,('\7738','L')
-  ,('\7739','l')
-  ,('\7740','L')
-  ,('\7741','l')
-  ,('\7742','M')
-  ,('\7743','m')
-  ,('\7744','M')
-  ,('\7745','m')
-  ,('\7746','M')
-  ,('\7747','m')
-  ,('\7748','N')
-  ,('\7749','n')
-  ,('\7750','N')
-  ,('\7751','n')
-  ,('\7752','N')
-  ,('\7753','n')
-  ,('\7754','N')
-  ,('\7755','n')
-  ,('\7764','P')
-  ,('\7765','p')
-  ,('\7766','P')
-  ,('\7767','p')
-  ,('\7768','R')
-  ,('\7769','r')
-  ,('\7770','R')
-  ,('\7771','r')
-  ,('\7774','R')
-  ,('\7775','r')
-  ,('\7776','S')
-  ,('\7777','s')
-  ,('\7778','S')
-  ,('\7779','s')
-  ,('\7786','T')
-  ,('\7787','t')
-  ,('\7788','T')
-  ,('\7789','t')
-  ,('\7790','T')
-  ,('\7791','t')
-  ,('\7792','T')
-  ,('\7793','t')
-  ,('\7794','U')
-  ,('\7795','u')
-  ,('\7796','U')
-  ,('\7797','u')
-  ,('\7798','U')
-  ,('\7799','u')
-  ,('\7804','V')
-  ,('\7805','v')
-  ,('\7806','V')
-  ,('\7807','v')
-  ,('\7808','W')
-  ,('\7809','w')
-  ,('\7810','W')
-  ,('\7811','w')
-  ,('\7812','W')
-  ,('\7813','w')
-  ,('\7814','W')
-  ,('\7815','w')
-  ,('\7816','W')
-  ,('\7817','w')
-  ,('\7818','X')
-  ,('\7819','x')
-  ,('\7820','X')
-  ,('\7821','x')
-  ,('\7822','Y')
-  ,('\7823','y')
-  ,('\7824','Z')
-  ,('\7825','z')
-  ,('\7826','Z')
-  ,('\7827','z')
-  ,('\7828','Z')
-  ,('\7829','z')
-  ,('\7830','h')
-  ,('\7831','t')
-  ,('\7832','w')
-  ,('\7833','y')
-  ,('\7840','A')
-  ,('\7841','a')
-  ,('\7842','A')
-  ,('\7843','a')
-  ,('\7864','E')
-  ,('\7865','e')
-  ,('\7866','E')
-  ,('\7867','e')
-  ,('\7868','E')
-  ,('\7869','e')
-  ,('\7880','I')
-  ,('\7881','i')
-  ,('\7882','I')
-  ,('\7883','i')
-  ,('\7884','O')
-  ,('\7885','o')
-  ,('\7886','O')
-  ,('\7887','o')
-  ,('\7908','U')
-  ,('\7909','u')
-  ,('\7910','U')
-  ,('\7911','u')
-  ,('\7922','Y')
-  ,('\7923','y')
-  ,('\7924','Y')
-  ,('\7925','y')
-  ,('\7926','Y')
-  ,('\7927','y')
-  ,('\7928','Y')
-  ,('\7929','y')
-  ,('\8175','`')
-  ,('\8490','K')
-  ,('\8800','=')
-  ,('\8814','<')
-  ,('\8815','>')
-  ]
+toAsciiChar :: Char -> Maybe Char
+toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of
+                  (x:xs) | isAscii x
+                         , all isMark xs
+                         -> Just x
+                  _      -> Nothing
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0c2078721..847fd2e05 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -195,13 +195,13 @@ import Data.Default
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
-import Data.Maybe (mapMaybe, fromMaybe)
+import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
 import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
@@ -1380,7 +1380,7 @@ registerHeader (ident,classes,kvs) header' = do
      then do
        let id' = uniqueIdent exts (B.toList header') ids
        let id'' = if Ext_ascii_identifiers `extensionEnabled` exts
-                     then T.pack $ mapMaybe toAsciiChar $ T.unpack id'
+                     then toAsciiText id'
                      else id'
        updateState $ updateIdentifierList $ Set.insert id'
        updateState $ updateIdentifierList $ Set.insert id''
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 23adff909..3292b32f4 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -123,7 +123,7 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
 import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
 import qualified Text.Pandoc.Builder as B
 import Data.Time
-import Text.Pandoc.Asciify (toAsciiChar)
+import Text.Pandoc.Asciify (toAsciiText)
 import Text.Pandoc.Definition
 import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled)
 import Text.Pandoc.Generic (bottomUp)
@@ -478,7 +478,7 @@ inlineListToIdentifier exts =
       | otherwise = T.dropWhile (not . isAlpha)
     filterAscii
       | extensionEnabled Ext_ascii_identifiers exts
-        = T.pack . mapMaybe toAsciiChar . T.unpack
+        = toAsciiText
       | otherwise = id
     toIdent
       | extensionEnabled Ext_gfm_auto_identifiers exts =
-- 
cgit v1.2.3


From 56ce1fc12651e9cf60b882a18d72a410967d6540 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 24 Mar 2021 11:57:49 -0700
Subject: Fix DocBook reader mathml regression...

...caused by the switch in XML libraries.
Also fixed a similar issue in JATS.
Closes #7173.
---
 src/Text/Pandoc/Readers/DocBook.hs |   3 +-
 src/Text/Pandoc/Readers/JATS.hs    |   8 ++-
 test/command/7173.md               | 137 +++++++++++++++++++++++++++++++++++++
 test/docbook-reader.docbook        |   4 +-
 4 files changed, 147 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7173.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6f5bb0ad4..4f525cfb1 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1239,7 +1239,8 @@ equation e constructor =
   where
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
-      (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
+      (\x -> qName (elName x) == "math" &&
+             qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML")
       (readMathML . showElement)
 
     latexEquations :: [Text]
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 602f3b4f2..c068f3774 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -491,7 +491,8 @@ parseInline (Elem e) =
 
         "disp-formula" -> formula displayMath
         "inline-formula" -> formula math
-        "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
+        "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML"
+                   -> return . math $ mathML e
         "tex-math" -> return . math $ textContent e
 
         "email" -> return $ link ("mailto:" <> textContent e) ""
@@ -514,8 +515,9 @@ parseInline (Elem e) =
                             filterChildren isMathML whereToLook
             return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs
 
-         isMathML x = qName   (elName x) == "math" &&
-                      qPrefix (elName x) == Just "mml"
+         isMathML x = qName (elName x) == "math" &&
+                      qURI  (elName x) ==
+                                      Just "http://www.w3.org/1998/Math/MathML"
          removePrefix elname = elname { qPrefix = Nothing }
          codeWithLang = do
            let classes' = case attrValue "language" e of
diff --git a/test/command/7173.md b/test/command/7173.md
new file mode 100644
index 000000000..2599dc19b
--- /dev/null
+++ b/test/command/7173.md
@@ -0,0 +1,137 @@
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <m:math xmlns:m="http://www.w3.org/1998/Math/MathML">
+            <m:mrow>
+                <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn>
+                </m:mrow>
+            </m:mrow>
+        </m:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML">
+            <mml:mrow>
+                <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn>
+                </mml:mrow>
+            </mml:mrow>
+        </mml:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <math xmlns="http://www.w3.org/1998/Math/MathML">
+            <mrow>
+                <mrow><mn>1</mn><mo>+</mo><mn>2</mn>
+                </mrow>
+            </mrow>
+        </math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    xmlns:m="http://www.w3.org/1998/Math/MathML"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <m:math>
+            <m:mrow>
+                <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn>
+                </m:mrow>
+            </m:mrow>
+        </m:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
+```
+% pandoc -f docbook -t latex
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">
+<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?>
+<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
+<article xmlns="http://docbook.org/ns/docbook"
+    xmlns:xlink="http://www.w3.org/1999/xlink"
+    xmlns:mml="http://www.w3.org/1998/Math/MathML"
+    version="5.1">
+    <title>Untitled Document</title>
+    <para>Word</para>
+    <informalequation>
+        <alt role='tex'>1+2</alt>
+        <mml:math>
+            <mml:mrow>
+                <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn>
+                </mml:mrow>
+            </mml:mrow>
+        </mml:math>
+    </informalequation>
+</article>
+^D
+Word
+
+\[1 + 2\]
+```
+
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 5717d78d0..51e62942b 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -6,7 +6,9 @@
 <!ENTITY let "LET" >
 <!ENTITY case "CASE" >
 ]>
-<article>
+<article xmlns="http://docbook.org/ns/docbook"
+         xmlns:xlink="http://www.w3.org/1999/xlink"
+         xmlns:mml="http://www.w3.org/1998/Math/MathML">
   <articleinfo>
     <title>Pandoc Test Suite</title>
     <authorgroup>
-- 
cgit v1.2.3


From e22d1fbb144d63d595c5db9225dd912b09fd938f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 29 Mar 2021 14:56:44 -0700
Subject: Powerpoint writer: allow monofont to be specified in metadata...

...not just using `--variable` on the command line (as in
other writers).  Closes #7187.
---
 src/Text/Pandoc/Writers/Powerpoint.hs        |  2 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs
index ca3b74a1d..e0573beca 100644
--- a/src/Text/Pandoc/Writers/Powerpoint.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint.hs
@@ -39,5 +39,5 @@ writePowerpoint opts (Pandoc meta blks) = do
   let blks' = walk fixDisplayMath blks
   let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks')
   mapM_ report logMsgs
-  archv <- presentationToArchive opts pres
+  archv <- presentationToArchive opts meta pres
   return $ fromArchive archv
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 0e515b3c2..f2f54a91c 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -38,17 +38,19 @@ import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Options
 import Text.Pandoc.MIME
 import qualified Data.ByteString.Lazy as BL
+import Text.Pandoc.Writers.Shared (metaToContext)
 import Text.Pandoc.Writers.OOXML
 import qualified Data.Map as M
 import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isJust)
 import Text.Pandoc.ImageSize
 import Control.Applicative ((<|>))
 import System.FilePath.Glob
-import Text.DocTemplates (FromContext(lookupContext))
+import Text.DocTemplates (FromContext(lookupContext), Context)
+import Text.DocLayout (literal)
 import Text.TeXMath
 import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
-import Text.Pandoc.Shared (tshow)
+import Text.Pandoc.Shared (tshow, stringify)
 import Skylighting (fromColor)
 import Data.List.NonEmpty (nonEmpty)
 
@@ -97,6 +99,7 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive
                            , envDistArchive :: Archive
                            , envUTCTime :: UTCTime
                            , envOpts :: WriterOptions
+                           , envContext :: Context Text
                            , envPresentationSize :: (Integer, Integer)
                            , envSlideHasHeader :: Bool
                            , envInList :: Bool
@@ -122,6 +125,7 @@ instance Default WriterEnv where
                   , envDistArchive = emptyArchive
                   , envUTCTime = posixSecondsToUTCTime 0
                   , envOpts = def
+                  , envContext = mempty
                   , envPresentationSize = (720, 540)
                   , envSlideHasHeader = False
                   , envInList = False
@@ -168,7 +172,7 @@ runP env st p = evalStateT (runReaderT p env) st
 
 monospaceFont :: Monad m => P m T.Text
 monospaceFont = do
-  vars <- writerVariables <$> asks envOpts
+  vars <- asks envContext
   case lookupContext "monofont" vars of
     Just s -> return s
     Nothing -> return "Courier"
@@ -304,8 +308,9 @@ makeSpeakerNotesMap (Presentation _ slides) =
                                  then Nothing
                                  else Just n
 
-presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive
-presentationToArchive opts pres = do
+presentationToArchive :: PandocMonad m
+                      => WriterOptions -> Meta -> Presentation -> m Archive
+presentationToArchive opts meta pres = do
   distArchive <- toArchive . BL.fromStrict <$>
                       P.readDefaultDataFile "reference.pptx"
   refArchive <- case writerReferenceDoc opts of
@@ -321,10 +326,18 @@ presentationToArchive opts pres = do
                            PandocSomeError
                            "Could not determine presentation size"
 
+  -- note, we need writerTemplate to be Just _ or metaToContext does
+  -- nothing
+  context <- metaToContext opts{ writerTemplate =
+                                  writerTemplate opts <|> Just mempty }
+                (return . literal . stringify)
+                (return . literal . stringify) meta
+
   let env = def { envRefArchive = refArchive
                 , envDistArchive = distArchive
                 , envUTCTime = utctime
                 , envOpts = opts
+                , envContext = context
                 , envPresentationSize = presSize
                 , envSlideIdMap = makeSlideIdMap pres
                 , envSpeakerNotesIdMap = makeSpeakerNotesMap pres
-- 
cgit v1.2.3


From 40da6c402beb64e47838254c04228098129165df Mon Sep 17 00:00:00 2001
From: niszet <niszet0016@gmail.com>
Date: Thu, 1 Apr 2021 08:44:34 +0900
Subject: Treat tabs as spaces in ODT Reader. (#7185)

---
 src/Text/Pandoc/Readers/Odt/ContentReader.hs |   8 +++++++-
 test/Tests/Readers/Odt.hs                    |   1 +
 test/odt/native/tab.native                   |   1 +
 test/odt/odt/tab.odt                         | Bin 0 -> 10170 bytes
 4 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 test/odt/native/tab.native
 create mode 100644 test/odt/odt/tab.odt

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index df90880fa..c4220b0db 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -577,7 +577,10 @@ read_spaces       = matchingElement NsText "s" (
 read_line_break  :: InlineMatcher
 read_line_break   = matchingElement NsText "line-break"
                     $ returnV linebreak
-
+--
+read_tab         :: InlineMatcher
+read_tab          = matchingElement NsText "tab"
+                    $ returnV space
 --
 read_span        :: InlineMatcher
 read_span         = matchingElement NsText "span"
@@ -585,6 +588,7 @@ read_span         = matchingElement NsText "span"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -604,6 +608,7 @@ read_paragraph    = matchingElement NsText "p"
                     $ matchChildContent [ read_span
                                         , read_spaces
                                         , read_line_break
+                                        , read_tab
                                         , read_link
                                         , read_note
                                         , read_citation
@@ -630,6 +635,7 @@ read_header       = matchingElement NsText "h"
   children <- ( matchChildContent [ read_span
                                   , read_spaces
                                   , read_line_break
+                                  , read_tab
                                   , read_link
                                   , read_note
                                   , read_citation
diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs
index 9b5ec6b9e..edff4fe2c 100644
--- a/test/Tests/Readers/Odt.hs
+++ b/test/Tests/Readers/Odt.hs
@@ -176,6 +176,7 @@ namesOfTestsComparingToNative   = [ "blockquote"
                                   , "referenceToText"
                                   , "simpleTable"
                                   , "simpleTableWithCaption"
+                                  , "tab"
 --                                , "table"
                                   , "textMixedStyles"
                                   , "tableWithContents"
diff --git a/test/odt/native/tab.native b/test/odt/native/tab.native
new file mode 100644
index 000000000..948e81cd5
--- /dev/null
+++ b/test/odt/native/tab.native
@@ -0,0 +1 @@
+[Para [Str "Three",Space,Str "tabs",Space,Str "between",Space,Str "A",Space,Str "and",Space,Str "B",Space,Str "will",Space,Str "be",Space,Str "converted",Space,Str "to",Space,Str "one",Space,Str "Space:",Space,Str "A",Space,Str "B."]]
\ No newline at end of file
diff --git a/test/odt/odt/tab.odt b/test/odt/odt/tab.odt
new file mode 100644
index 000000000..a2c80f45c
Binary files /dev/null and b/test/odt/odt/tab.odt differ
-- 
cgit v1.2.3


From 4371223d132ef822cff0ddee6099a5fe295385cb Mon Sep 17 00:00:00 2001
From: tecosaur <tec@tecosaur.com>
Date: Fri, 2 Apr 2021 05:36:02 +0800
Subject: Org writer: Use LaTeX style maths deliminators (#7196)

Org works better with LaTeX-style delimiters.
---
 src/Text/Pandoc/Writers/Org.hs |  4 ++--
 test/writer.org                | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index 88a2b8314..d0c9813da 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -404,8 +404,8 @@ inlineToOrg (Str str) = return . literal $ escapeString str
 inlineToOrg (Math t str) = do
   modify $ \st -> st{ stHasMath = True }
   return $ if t == InlineMath
-              then "$" <> literal str <> "$"
-              else "$$" <> literal str <> "$$"
+              then "\\(" <> literal str <> "\\)"
+              else "\\[" <> literal str <> "\\]"
 inlineToOrg il@(RawInline f str)
   | isRawFormat f = return $ literal str
   | otherwise     = do
diff --git a/test/writer.org b/test/writer.org
index 34dd51f43..df9d2b3a5 100644
--- a/test/writer.org
+++ b/test/writer.org
@@ -600,14 +600,14 @@ Ellipses...and...and....
   :END:
 
 - \cite[22-23]{smith.1899}
-- $2+2=4$
-- $x \in y$
-- $\alpha \wedge \omega$
-- $223$
-- $p$-Tree
+- \(2+2=4\)
+- \(x \in y\)
+- \(\alpha \wedge \omega\)
+- \(223\)
+- \(p\)-Tree
 - Here's some display math:
-  $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$
-- Here's one that has a line break in it: $\alpha + \omega \times x^2$.
+  \[\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}\]
+- Here's one that has a line break in it: \(\alpha + \omega \times x^2\).
 
 These shouldn't be math:
 
-- 
cgit v1.2.3


From 935d10769df682fa08d947a873ca7665136e0b79 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 2 Apr 2021 17:04:55 -0700
Subject: Fix "phrase" in DocBook: take classes from "role" not "class".

Closes #7195.  Revises #6438.
---
 src/Text/Pandoc/Readers/DocBook.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 4f525cfb1..ac3caa2c0 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1075,7 +1075,7 @@ parseInline (Elem e) =
            return $ spanWith (attrValue "id" e, [], []) mempty
         "phrase" -> do
           let ident = attrValue "id" e
-          let classes = T.words $ attrValue "class" e
+          let classes = T.words $ attrValue "role" e
           if ident /= "" || classes /= []
             then innerInlines (spanWith (ident,classes,[]))
             else innerInlines id
-- 
cgit v1.2.3


From 65a9d3a8786c23f79de9dcdf56ab7efb789726ff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Apr 2021 08:49:03 -0700
Subject: SelfContained: use application/octet-stream for unknown mime types...

instead of halting with an error.
Closes #7202.
---
 src/Text/Pandoc/SelfContained.hs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index c9e20cad0..17c1e18c9 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -244,11 +244,10 @@ getData mimetype src
       let raw' = if ext `elem` [".gz", ".svgz"]
                  then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw]
                  else raw
-      mime <- case (mimetype, respMime) of
-                ("",Nothing) -> throwError $ PandocSomeError
-                  $ "Could not determine mime type for `" <> src <> "'"
-                (x, Nothing) -> return x
-                (_, Just x ) -> return x
+      let mime = case (mimetype, respMime) of
+                  ("",Nothing) -> "application/octet-stream"
+                  (x, Nothing) -> x
+                  (_, Just x ) -> x
       result <- if "text/css" `T.isPrefixOf` mime
                 then do
                   oldInputs <- getInputFiles
-- 
cgit v1.2.3


From 038261ea529bc4516d7cee501db70020938dbf2b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 5 Apr 2021 21:45:52 +0200
Subject: JATS writer: escape disallows chars in identifiers

XML identifiers must start with an underscore or letter, and can contain
only a limited set of punctuation characters. Any IDs not adhering to
these rules are rewritten by writing the offending characters as Uxxxx,
where `xxxx` is the character's hex code.
---
 src/Text/Pandoc/Writers/JATS.hs            |  27 ++--
 src/Text/Pandoc/Writers/JATS/References.hs |   5 +-
 src/Text/Pandoc/Writers/JATS/Table.hs      |   4 +-
 src/Text/Pandoc/XML.hs                     |  30 ++++-
 test/Tests/Writers/JATS.hs                 | 205 ++++++++++++++++-------------
 5 files changed, 162 insertions(+), 109 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index a9369db7a..26f94cb03 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -239,7 +239,7 @@ languageFor classes =
 codeAttr :: Attr -> (Text, [(Text, Text)])
 codeAttr (ident,classes,kvs) = (lang, attr)
     where
-       attr = [("id",ident) | not (T.null ident)] ++
+       attr = [("id", escapeNCName ident) | not (T.null ident)] ++
               [("language",lang) | not (T.null lang)] ++
               [(k,v) | (k,v) <- kvs, k `elem` ["code-type",
                 "code-version", "executable",
@@ -251,7 +251,8 @@ codeAttr (ident,classes,kvs) = (lang, attr)
 blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text)
 blockToJATS _ Null = return empty
 blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do
-  let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')]
+  let idAttr = [ ("id", writerIdentifierPrefix opts <> escapeNCName id')
+               | not (T.null id')]
   let otherAttrs = ["sec-type", "specific-use"]
   let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs]
   title' <- inlinesToJATS opts ils
@@ -260,7 +261,7 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do
       inTagsSimple "title" title' $$ contents
 -- Bibliography reference:
 blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident =
-  inTags True "ref" [("id", ident)] .
+  inTags True "ref" [("id", escapeNCName ident)] .
   inTagsSimple "mixed-citation" <$>
   inlinesToJATS opts lst
 blockToJATS opts (Div ("refs",_,_) xs) = do
@@ -271,14 +272,14 @@ blockToJATS opts (Div ("refs",_,_) xs) = do
   return $ inTagsIndented "ref-list" contents
 blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do
   contents <- blocksToJATS opts bs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
                  "content-type", "orientation", "position"]]
   return $ inTags True cls attr contents
 blockToJATS opts (Div (ident,_,kvs) bs) = do
   contents <- blocksToJATS opts bs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["specific-use",
                  "content-type", "orientation", "position"]]
@@ -296,7 +297,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt
   let capt = if null txt
                 then empty
                 else inTagsSimple "caption" $ inTagsSimple "p" alt
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation",
                                               "position", "specific-use"]]
   let graphicattr = [("mimetype",maintype),
@@ -307,7 +308,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt
               capt $$ selfClosingTag "graphic" graphicattr
 blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do
   let (maintype, subtype) = imageMimeType src kvs
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("mimetype", maintype),
               ("mime-subtype", subtype),
               ("xlink:href", src)] ++
@@ -434,7 +435,7 @@ inlineToJATS opts (Note contents) = do
       let notenum = case notes of
                       (n, _):_ -> n + 1
                       []       -> 1
-      thenote <- inTags True "fn" [("id","fn" <> tshow notenum)]
+      thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)]
                     <$> wrappedBlocksToJATS (not . isPara) opts
                          (walk demoteHeaderAndRefs contents)
       modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes }
@@ -447,7 +448,7 @@ inlineToJATS opts (Cite _ lst) =
 inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils
 inlineToJATS opts (Span (ident,_,kvs) ils) = do
   contents <- inlinesToJATS opts ils
-  let attr = [("id",ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs
                     ,  k `elem` ["content-type", "rationale",
@@ -488,9 +489,9 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _))
   return $ inTagsSimple "email" $ literal (escapeStringForXML email)
 inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do
   let attr = mconcat
-             [ [("id", ident) | not (T.null ident)]
+             [ [("id", escapeNCName ident) | not (T.null ident)]
              , [("alt", stringify txt) | not (null txt)]
-             , [("rid", src)]
+             , [("rid", escapeNCName src)]
              , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]]
              , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src]
              ]
@@ -500,7 +501,7 @@ inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do
         contents <- inlinesToJATS opts txt
         return $ inTags False "xref" attr contents
 inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("ext-link-type", "uri"),
               ("xlink:href", src)] ++
              [("xlink:title", tit) | not (T.null tit)] ++
@@ -518,7 +519,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do
   let subtype = fromMaybe "" $
                   lookup "mime-subtype" kvs `mplus`
                   (T.drop 1 . T.dropWhile (/='/') <$> mbMT)
-  let attr = [("id", ident) | not (T.null ident)] ++
+  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("mimetype", maintype),
               ("mime-subtype", subtype),
               ("xlink:href", src)] ++
diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs
index 903144128..5b19fd034 100644
--- a/src/Text/Pandoc/Writers/JATS/References.hs
+++ b/src/Text/Pandoc/Writers/JATS/References.hs
@@ -29,7 +29,7 @@ import Text.Pandoc.Builder (Inlines)
 import Text.Pandoc.Options (WriterOptions)
 import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Writers.JATS.Types
-import Text.Pandoc.XML (escapeStringForXML, inTags)
+import Text.Pandoc.XML (escapeNCName, escapeStringForXML, inTags)
 import qualified Data.Text as T
 
 referencesToJATS :: PandocMonad m
@@ -46,7 +46,8 @@ referenceToJATS :: PandocMonad m
 referenceToJATS _opts ref = do
   let refType = referenceType ref
   let pubType = [("publication-type", refType) | not (T.null refType)]
-  let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))]
+  let ident = escapeNCName $ "ref-" <> unItemId (referenceId ref)
+  let wrap = inTags True "ref" [("id", ident)]
            . inTags True "element-citation" pubType
   return . wrap . vcat $
     [ authors
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index 465480f59..2e34900d2 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -24,7 +24,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options (WriterOptions)
 import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Writers.JATS.Types
-import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag)
+import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
 
@@ -216,7 +216,7 @@ cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) =
 
 toAttribs :: Attr -> [Text] -> [(Text, Text)]
 toAttribs (ident, _classes, kvs) knownAttribs =
-  (if T.null ident then id else (("id", ident) :)) $
+  (if T.null ident then id else (("id", escapeNCName ident) :)) $
   filter ((`elem` knownAttribs) . fst) kvs
 
 tableCellToJats :: PandocMonad m
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 6dbbce1d2..79b4768ec 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -13,6 +13,7 @@ Functions for escaping and formatting XML.
 -}
 module Text.Pandoc.XML ( escapeCharForXML,
                          escapeStringForXML,
+                         escapeNCName,
                          inTags,
                          selfClosingTag,
                          inTagsSimple,
@@ -24,7 +25,7 @@ module Text.Pandoc.XML ( escapeCharForXML,
                          html5Attributes,
                          rdfaAttributes ) where
 
-import Data.Char (isAscii, isSpace, ord)
+import Data.Char (isAscii, isSpace, ord, isLetter, isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities)
@@ -119,8 +120,33 @@ html5EntityMap = foldr go mempty htmlEntities
              where ent' = T.takeWhile (/=';') (T.pack ent)
            _   -> entmap
 
+-- | Converts a string into an NCName, i.e., an XML name without colons.
+-- Disallowed characters are escaped using @ux%x@, where @%x@ is the
+-- hexadecimal unicode identifier of the escaped character.
+escapeNCName :: Text -> Text
+escapeNCName t = case T.uncons t of
+  Nothing -> T.empty
+  Just (c, cs) -> escapeStartChar c <> T.concatMap escapeNCNameChar cs
+  where
+    escapeStartChar :: Char -> Text
+    escapeStartChar c = if isLetter c || c == '_'
+                        then T.singleton c
+                        else escapeChar c
 
--- Unescapes XML entities
+    escapeNCNameChar :: Char -> Text
+    escapeNCNameChar c = if isNCNameChar c
+                         then T.singleton c
+                         else escapeChar c
+
+    isNCNameChar :: Char -> Bool
+    isNCNameChar c = isLetter c || c `elem` ("_-.·" :: String) || isDigit c
+      || '\x0300' <= c && c <= '\x036f'
+      || '\x203f' <= c && c <= '\x2040'
+
+    escapeChar :: Char -> Text
+    escapeChar = T.pack . printf "U%04X" . ord
+
+-- | Unescapes XML entities
 fromEntities :: Text -> Text
 fromEntities t
   = let (x, y) = T.break (== '&') t
diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs
index 2f501c890..23c1686dc 100644
--- a/test/Tests/Writers/JATS.hs
+++ b/test/Tests/Writers/JATS.hs
@@ -1,21 +1,21 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Writers.JATS (tests) where
 
-import Data.Text (unpack)
+import Data.Text (Text)
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
 import Text.Pandoc.Builder
+import qualified Data.Text as T
 
-jats :: (ToPandoc a) => a -> String
-jats = unpack
-  . purely (writeJATS def{ writerWrapText = WrapNone })
-  . toPandoc
+jats :: (ToPandoc a) => a -> Text
+jats = purely (writeJATS def{ writerWrapText = WrapNone })
+     . toPandoc
 
-jatsArticleAuthoring :: (ToPandoc a) => a -> String
-jatsArticleAuthoring = unpack
-  . purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone })
+jatsArticleAuthoring :: (ToPandoc a) => a -> Text
+jatsArticleAuthoring =
+    purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone })
   . toPandoc
 
 {-
@@ -32,89 +32,114 @@ which is in turn shorthand for
 
 infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
-     => String -> (a, String) -> TestTree
+     => String -> (a, Text) -> TestTree
 (=:) = test jats
 
 tests :: [TestTree]
-tests = [ testGroup "inline code"
-          [ "basic" =: code "@&" =?> "<p><monospace>@&amp;</monospace></p>"
-          , "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p><code language=\"c\">@&amp;</code></p>"
-          ]
-        , testGroup "block code"
-          [ "basic" =: codeBlock "@&" =?> "<preformat>@&amp;</preformat>"
-          , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&amp;</code>"
-          ]
-        , testGroup "images"
-          [ "basic" =:
-            image "/url" "title" mempty
-            =?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
-          ]
-        , testGroup "inlines"
-          [ "Emphasis" =: emph "emphasized"
-            =?> "<p><italic>emphasized</italic></p>"
+tests =
+  [ testGroup "inline code"
+    [ "basic" =: code "@&" =?> "<p><monospace>@&amp;</monospace></p>"
+    , "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p><code language=\"c\">@&amp;</code></p>"
+    ]
+  , testGroup "block code"
+    [ "basic" =: codeBlock "@&" =?> "<preformat>@&amp;</preformat>"
+    , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&amp;</code>"
+    ]
+  , testGroup "images"
+    [ "basic" =:
+      image "/url" "title" mempty
+      =?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />"
+    ]
+  , testGroup "inlines"
+    [ "Emphasis" =: emph "emphasized"
+      =?> "<p><italic>emphasized</italic></p>"
+
+    , test jatsArticleAuthoring "footnote in articleauthoring tag set"
+      ("test" <> note (para "footnote") =?>
+        unlines [ "<p>test<fn>"
+                , "  <p>footnote</p>"
+                , "</fn></p>"
+                ])
+    ]
+  , "bullet list" =: bulletList [ plain $ text "first"
+                                , plain $ text "second"
+                                , plain $ text "third"
+                                ]
+    =?> "<list list-type=\"bullet\">\n\
+        \  <list-item>\n\
+        \    <p>first</p>\n\
+        \  </list-item>\n\
+        \  <list-item>\n\
+        \    <p>second</p>\n\
+        \  </list-item>\n\
+        \  <list-item>\n\
+        \    <p>third</p>\n\
+        \  </list-item>\n\
+        \</list>"
+  , testGroup "definition lists"
+    [ "with internal link" =: definitionList [(link "#go" "" (str "testing"),
+                                               [plain (text "hi there")])] =?>
+      "<def-list>\n\
+      \  <def-item>\n\
+      \    <term><xref alt=\"testing\" rid=\"go\">testing</xref></term>\n\
+      \    <def>\n\
+      \      <p>hi there</p>\n\
+      \    </def>\n\
+      \  </def-item>\n\
+      \</def-list>"
+    ]
+  , testGroup "math"
+    [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?>
+      "<p><inline-formula><alternatives>\n\
+      \<tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
+      \<mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula></p>"
+    ]
+  , testGroup "headers"
+    [ "unnumbered header" =:
+      headerWith ("foo",["unnumbered"],[]) 1
+      (text "Header 1" <> note (plain $ text "note")) =?>
+      "<sec id=\"foo\">\n\
+      \  <title>Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref></title>\n\
+      \</sec>"
+    , "unnumbered sub header" =:
+      headerWith ("foo",["unnumbered"],[]) 1
+      (text "Header")
+      <> headerWith ("foo",["unnumbered"],[]) 2
+      (text "Sub-Header") =?>
+      "<sec id=\"foo\">\n\
+      \  <title>Header</title>\n\
+      \  <sec id=\"foo\">\n\
+      \    <title>Sub-Header</title>\n\
+      \  </sec>\n\
+      \</sec>"
+    , "containing image" =:
+      header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?>
+      "<sec>\n\
+      \  <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\
+      \</sec>"
+    ]
+
+  , testGroup "ids"
+    [ "non-ASCII in header ID" =:
+      headerWith ("smørbrød",[],[]) 1 (text "smørbrød") =?>
+      T.unlines [ "<sec id=\"smørbrød\">"
+                , "  <title>smørbrød</title>"
+                , "</sec>"
+                ]
+
+    , "disallowed symbol in header id" =:
+      headerWith ("i/o",[],[]) 1 (text "I/O") =?>
+      T.unlines [ "<sec id=\"iU002Fo\">"
+                , "  <title>I/O</title>"
+                , "</sec>"
+                ]
+
+    , "disallowed symbols in internal link target" =:
+      link "#foo:bar" "" "baz" =?>
+      "<p><xref alt=\"baz\" rid=\"fooU003Abar\">baz</xref></p>"
 
-          , test jatsArticleAuthoring "footnote in articleauthoring tag set"
-            ("test" <> note (para "footnote") =?>
-             unlines [ "<p>test<fn>"
-                     , "  <p>footnote</p>"
-                     , "</fn></p>"
-                     ])
-          ]
-        , "bullet list" =: bulletList [ plain $ text "first"
-                                      , plain $ text "second"
-                                      , plain $ text "third"
-                                      ]
-            =?> "<list list-type=\"bullet\">\n\
-                \  <list-item>\n\
-                \    <p>first</p>\n\
-                \  </list-item>\n\
-                \  <list-item>\n\
-                \    <p>second</p>\n\
-                \  </list-item>\n\
-                \  <list-item>\n\
-                \    <p>third</p>\n\
-                \  </list-item>\n\
-                \</list>"
-        , testGroup "definition lists"
-          [ "with internal link" =: definitionList [(link "#go" "" (str "testing"),
-             [plain (text "hi there")])] =?>
-            "<def-list>\n\
-            \  <def-item>\n\
-            \    <term><xref alt=\"testing\" rid=\"go\">testing</xref></term>\n\
-            \    <def>\n\
-            \      <p>hi there</p>\n\
-            \    </def>\n\
-            \  </def-item>\n\
-            \</def-list>"
-          ]
-        , testGroup "math"
-          [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?>
-            "<p><inline-formula><alternatives>\n\
-            \<tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
-            \<mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula></p>"
-          ]
-        , testGroup "headers"
-          [ "unnumbered header" =:
-            headerWith ("foo",["unnumbered"],[]) 1
-              (text "Header 1" <> note (plain $ text "note")) =?>
-            "<sec id=\"foo\">\n\
-            \  <title>Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref></title>\n\
-            \</sec>"
-          , "unnumbered sub header" =:
-            headerWith ("foo",["unnumbered"],[]) 1
-              (text "Header")
-            <> headerWith ("foo",["unnumbered"],[]) 2
-              (text "Sub-Header") =?>
-            "<sec id=\"foo\">\n\
-            \  <title>Header</title>\n\
-            \  <sec id=\"foo\">\n\
-            \    <title>Sub-Header</title>\n\
-            \  </sec>\n\
-            \</sec>"
-          , "containing image" =:
-            header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?>
-            "<sec>\n\
-            \  <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\
-            \</sec>"
-          ]
-        ]
+    , "code id starting with a number" =:
+      codeWith ("7y",[],[]) "print 5" =?>
+      "<p><monospace id=\"U0037y\">print 5</monospace></p>"
+    ]
+  ]
-- 
cgit v1.2.3


From 21fed4a9c2c1c4bd2aedb619c3c76e0211b77a25 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Apr 2021 23:26:54 -0700
Subject: SelfContained: remove unneeded imports.

---
 src/Text/Pandoc/SelfContained.hs | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 17c1e18c9..3bbab4bbe 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -16,7 +16,6 @@ the HTML using data URIs.
 module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where
 import Codec.Compression.GZip as Gzip
 import Control.Applicative ((<|>))
-import Control.Monad.Except (throwError)
 import Control.Monad.Trans (lift)
 import Data.ByteString (ByteString)
 import Data.ByteString.Base64
@@ -29,7 +28,6 @@ import System.FilePath (takeDirectory, takeExtension, (</>))
 import Text.HTML.TagSoup
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem,
                                       getInputFiles, report, setInputFiles)
-import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Shared (isURI, renderTags', trim)
-- 
cgit v1.2.3


From 60974538b25657c9aa37e72cc66ca3957912ddec Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Apr 2021 23:29:22 -0700
Subject: Commonmark writer: Use backslash escapes for `<` and `|`...

instead of entities.  Closes #7208.
---
 src/Text/Pandoc/Writers/Markdown.hs | 12 +++++++++++-
 test/command/7208.md                |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7208.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 4d9f3d5b0..daf45ed53 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -67,7 +67,17 @@ writePlain opts document =
 -- | Convert Pandoc to Commonmark.
 writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCommonMark opts document =
-  evalMD (pandocToMarkdown opts document) def{ envVariant = Commonmark } def
+  evalMD (pandocToMarkdown opts' document) def{ envVariant = Commonmark } def
+ where
+  opts' = opts{ writerExtensions =
+                   -- These extensions can't be enabled or disabled
+                   -- for commonmark because they're part of the core;
+                   -- we set them here so that escapeText will behave
+                   -- properly.
+                   enableExtension Ext_all_symbols_escapable $
+                   enableExtension Ext_pipe_tables $
+                   enableExtension Ext_intraword_underscores $
+                     writerExtensions opts }
 
 pandocTitleBlock :: Doc Text -> [Doc Text] -> Doc Text -> Doc Text
 pandocTitleBlock tit auths dat =
diff --git a/test/command/7208.md b/test/command/7208.md
new file mode 100644
index 000000000..e65943ade
--- /dev/null
+++ b/test/command/7208.md
@@ -0,0 +1,6 @@
+```
+% pandoc -t gfm
+\<hi\>
+^D
+\<hi\>
+```
-- 
cgit v1.2.3


From e227496d3a5d07df8183b8d986ea2aa36c90612c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 8 Apr 2021 22:14:47 +0200
Subject: Lua filter: respect Inlines/Blocks filter functions in pandoc.walk_*

---
 src/Text/Pandoc/Lua/Filter.hs        |  2 ++
 src/Text/Pandoc/Lua/Module/Pandoc.hs | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs
index 90967f295..01bf90efa 100644
--- a/src/Text/Pandoc/Lua/Filter.hs
+++ b/src/Text/Pandoc/Lua/Filter.hs
@@ -13,7 +13,9 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction
                               , LuaFilter
                               , runFilterFile
                               , walkInlines
+                              , walkInlineLists
                               , walkBlocks
+                              , walkBlockLists
                               , module Text.Pandoc.Lua.Walk
                               ) where
 import Control.Applicative ((<|>))
diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs
index 8d30f9a0c..5c14b3a30 100644
--- a/src/Text/Pandoc/Lua/Module/Pandoc.hs
+++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs
@@ -23,8 +23,10 @@ import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable)
 import System.Exit (ExitCode (..))
 import Text.Pandoc.Class.PandocIO (runIO)
 import Text.Pandoc.Definition (Block, Inline)
-import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..))
+import Text.Pandoc.Lua.Filter (LuaFilter, SingletonsList (..), walkInlines,
+                               walkInlineLists, walkBlocks, walkBlockLists)
 import Text.Pandoc.Lua.Marshaling ()
+import Text.Pandoc.Lua.Marshaling.List (List (..))
 import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua,
                                   loadDefaultModule)
 import Text.Pandoc.Walk (Walkable)
@@ -51,9 +53,12 @@ pushModule = do
   return 1
 
 walkElement :: (Walkable (SingletonsList Inline) a,
-                Walkable (SingletonsList Block) a)
+                Walkable (SingletonsList Block) a,
+                Walkable (List Inline) a,
+                Walkable (List Block) a)
             => a -> LuaFilter -> PandocLua a
-walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f
+walkElement x f = liftPandocLua $
+  walkInlines f x >>= walkInlineLists f >>= walkBlocks f >>= walkBlockLists f
 
 walk_inline :: Inline -> LuaFilter -> PandocLua Inline
 walk_inline = walkElement
-- 
cgit v1.2.3


From 20cd33e5a44810b68fed74da00f4f51eb2282147 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 8 Apr 2021 14:47:11 -0700
Subject: Fix regression in grid tables for wide characters.

In the translation from String to Text, a char-width-sensitive
splitAt' was dropped.  This commit reinstates it.
Closes #7214.
---
 src/Text/Pandoc/Shared.hs | 18 +++++++++++++-----
 test/command/7214.md      | 28 ++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7214.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 3292b32f4..95cbdc8b8 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -153,12 +153,20 @@ splitTextBy isSep t
                 in  first : splitTextBy isSep (T.dropWhile isSep rest)
 
 splitTextByIndices :: [Int] -> T.Text -> [T.Text]
-splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns))
+splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack
  where
-  splitTextByRelIndices [] t = [t]
-  splitTextByRelIndices (x:xs) t =
-    let (first, rest) = T.splitAt x t
-     in first : splitTextByRelIndices xs rest
+  splitTextByRelIndices [] cs = [T.pack cs]
+  splitTextByRelIndices (x:xs) cs =
+    let (first, rest) = splitAt' x cs
+     in T.pack first : splitTextByRelIndices xs rest
+
+-- Note: don't replace this with T.splitAt, which is not sensitive
+-- to character widths!
+splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' _ []          = ([],[])
+splitAt' n xs | n <= 0 = ([],xs)
+splitAt' n (x:xs)      = (x:ys,zs)
+  where (ys,zs) = splitAt' (n - charWidth x) xs
 
 ordNub :: (Ord a) => [a] -> [a]
 ordNub l = go Set.empty l
diff --git a/test/command/7214.md b/test/command/7214.md
new file mode 100644
index 000000000..43bf9e4ca
--- /dev/null
+++ b/test/command/7214.md
@@ -0,0 +1,28 @@
+```
+% pandoc
++------------+----------+------------------+
+|日本語      | の文字列 | words in english |
++------------+----------+------------------+
+|abc defghij | def      | xyz              |
++------------+----------+------------------+
+^D
+<table style="width:60%;">
+<colgroup>
+<col style="width: 18%" />
+<col style="width: 15%" />
+<col style="width: 26%" />
+</colgroup>
+<tbody>
+<tr class="odd">
+<td>日本語</td>
+<td>の文字列</td>
+<td>words in english</td>
+</tr>
+<tr class="even">
+<td>abc defghij</td>
+<td>def</td>
+<td>xyz</td>
+</tr>
+</tbody>
+</table>
+```
-- 
cgit v1.2.3


From 051b7ffeaffdaf34ed1384a239cf0179aa59b932 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 10 Apr 2021 10:47:10 +0200
Subject: JATS writer: add footnote number as label in backmatter

Footnotes in the backmatter are given the footnote's number as a label.
The articleauthoring output is unaffected from this change, as footnotes
are placed inline there.

Closes: #7210
---
 src/Text/Pandoc/Writers/JATS.hs |  1 +
 test/writer.jats_archiving      | 14 ++++++++------
 test/writer.jats_publishing     | 14 ++++++++------
 3 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 26f94cb03..e78c6dc8f 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -436,6 +436,7 @@ inlineToJATS opts (Note contents) = do
                       (n, _):_ -> n + 1
                       []       -> 1
       thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)]
+                     . (inTagsSimple "label" (literal $ tshow notenum) <>)
                     <$> wrappedBlocksToJATS (not . isPara) opts
                          (walk demoteHeaderAndRefs contents)
       modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes }
diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving
index 1a9537a21..332b5d3fd 100644
--- a/test/writer.jats_archiving
+++ b/test/writer.jats_archiving
@@ -882,11 +882,12 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
 <back>
 <fn-group>
   <fn id="fn1">
-    <p>Here is the footnote. It can go anywhere after the footnote reference.
-    It need not be placed at the end of the document.</p>
+    <label>1</label><p>Here is the footnote. It can go anywhere after the
+    footnote reference. It need not be placed at the end of the document.</p>
   </fn>
   <fn id="fn2">
-    <p>Here’s the long note. This one contains multiple blocks.</p>
+    <label>2</label><p>Here’s the long note. This one contains multiple
+    blocks.</p>
     <p>Subsequent blocks are indented to show that they belong to the footnote
     (as with list items).</p>
     <p specific-use="wrapper">
@@ -896,16 +897,17 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
     just indent the first line of each block.</p>
   </fn>
   <fn id="fn3">
-    <p>This is <italic>easier</italic> to type. Inline notes may contain
+    <label>3</label><p>This is <italic>easier</italic> to type. Inline notes
+    may contain
     <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link>
     and <monospace>]</monospace> verbatim characters, as well as [bracketed
     text].</p>
   </fn>
   <fn id="fn4">
-    <p>In quote.</p>
+    <label>4</label><p>In quote.</p>
   </fn>
   <fn id="fn5">
-    <p>In list.</p>
+    <label>5</label><p>In list.</p>
   </fn>
 </fn-group>
 </back>
diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing
index b85816848..f53fd554d 100644
--- a/test/writer.jats_publishing
+++ b/test/writer.jats_publishing
@@ -882,11 +882,12 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
 <back>
 <fn-group>
   <fn id="fn1">
-    <p>Here is the footnote. It can go anywhere after the footnote reference.
-    It need not be placed at the end of the document.</p>
+    <label>1</label><p>Here is the footnote. It can go anywhere after the
+    footnote reference. It need not be placed at the end of the document.</p>
   </fn>
   <fn id="fn2">
-    <p>Here’s the long note. This one contains multiple blocks.</p>
+    <label>2</label><p>Here’s the long note. This one contains multiple
+    blocks.</p>
     <p>Subsequent blocks are indented to show that they belong to the footnote
     (as with list items).</p>
     <p specific-use="wrapper">
@@ -896,16 +897,17 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
     just indent the first line of each block.</p>
   </fn>
   <fn id="fn3">
-    <p>This is <italic>easier</italic> to type. Inline notes may contain
+    <label>3</label><p>This is <italic>easier</italic> to type. Inline notes
+    may contain
     <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link>
     and <monospace>]</monospace> verbatim characters, as well as [bracketed
     text].</p>
   </fn>
   <fn id="fn4">
-    <p>In quote.</p>
+    <label>4</label><p>In quote.</p>
   </fn>
   <fn id="fn5">
-    <p>In list.</p>
+    <label>5</label><p>In list.</p>
   </fn>
 </fn-group>
 </back>
-- 
cgit v1.2.3


From 2d60524de43d59ffb1763a33a15cc2ecce613ecf Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 10 Apr 2021 11:38:02 +0200
Subject: JATS writer: convert spans to <named-content> elements

Spans with attributes are converted to `<named-content>` elements
instead of being wrapped with `<milestone-start/>` and `<milestone-end>`
elements. Milestone elements are not allowed in documents using the
articleauthoring tag set, so this change ensures the creation of valid
documents.

Closes: #7211
---
 src/Text/Pandoc/Writers/JATS.hs | 13 +++++++------
 test/Tests/Writers/JATS.hs      | 15 +++++++++++++++
 2 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index e78c6dc8f..5b3e439d4 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -444,18 +444,19 @@ inlineToJATS opts (Note contents) = do
                                     ("rid", "fn" <> tshow notenum)]
              $ text (show notenum)
 inlineToJATS opts (Cite _ lst) =
-  -- TODO revisit this after examining the jats.csl pipeline
   inlinesToJATS opts lst
-inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils
 inlineToJATS opts (Span (ident,_,kvs) ils) = do
   contents <- inlinesToJATS opts ils
   let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
              [("xml:lang",l) | ("lang",l) <- kvs] ++
              [(k,v) | (k,v) <- kvs
-                    ,  k `elem` ["content-type", "rationale",
-                                 "rid", "specific-use"]]
-  return $ selfClosingTag "milestone-start" attr <> contents <>
-           selfClosingTag "milestone-end" []
+                    ,  k `elem` ["alt", "content-type", "rid", "specific-use",
+                                 "vocab", "vocab-identifier", "vocab-term",
+                                 "vocab-term-identifier"]]
+  return $
+    if null attr
+    then contents  -- unwrap if no relevant attributes are given
+    else inTags False "named-content" attr contents
 inlineToJATS _ (Math t str) = do
   let addPref (Xml.Attr q v)
          | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v
diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs
index 23c1686dc..e90438176 100644
--- a/test/Tests/Writers/JATS.hs
+++ b/test/Tests/Writers/JATS.hs
@@ -142,4 +142,19 @@ tests =
       codeWith ("7y",[],[]) "print 5" =?>
       "<p><monospace id=\"U0037y\">print 5</monospace></p>"
     ]
+
+  , testGroup "spans"
+    [ "unwrapped if no attributes given" =:
+      spanWith nullAttr "text in span" =?>
+      "<p>text in span</p>"
+
+    , "converted to named-content element" =:
+      spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?>
+      "<p><named-content id=\"a\" alt=\"aa\">text</named-content></p>"
+
+    , "unwrapped if named-content element would have no attributes" =:
+      spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?>
+      "<p>text in span</p>"
+
+    ]
   ]
-- 
cgit v1.2.3


From 5f79a66ed64e9b0cc326e467dcb17239f1596fcc Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 16 Apr 2021 22:13:29 +0200
Subject: JATS writer: reduce unnecessary use of <p> elements for wrapping

The `<p>` element is used for wrapping in cases were the contents would
otherwise not be allowed in a certain context. Unnecessary wrapping is
avoided, especially around quotes (`<disp-quote>` elements).

Closes: #7227
---
 src/Text/Pandoc/Writers/JATS.hs       | 22 +++++----
 src/Text/Pandoc/Writers/JATS/Table.hs | 26 +++++++++--
 src/Text/Pandoc/Writers/JATS/Types.hs | 15 ++++--
 test/command/7041.md                  | 23 +++++++++
 test/writer.jats_archiving            | 68 +++++++++++----------------
 test/writer.jats_articleauthoring     | 88 ++++++++++++++++-------------------
 test/writer.jats_publishing           | 68 +++++++++++----------------
 7 files changed, 166 insertions(+), 144 deletions(-)
 create mode 100644 test/command/7041.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 5b3e439d4..b58ff8aef 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -1,6 +1,7 @@
+{-# LANGUAGE LambdaCase          #-}
+{-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns        #-}
 {- |
    Module      : Text.Pandoc.Writers.JATS
    Copyright   : 2017-2021 John MacFarlane
@@ -80,7 +81,7 @@ writeJats tagSet opts d = do
   let environment = JATSEnv
           { jatsTagSet = tagSet
           , jatsInlinesWriter = inlinesToJATS
-          , jatsBlockWriter = blockToJATS
+          , jatsBlockWriter = wrappedBlocksToJATS
           , jatsReferences = refs
           }
   let initialState = JATSState { jatsNotes = [] }
@@ -162,11 +163,9 @@ wrappedBlocksToJATS needsWrap opts =
     wrappedBlockToJATS b = do
       inner <- blockToJATS opts b
       return $
-        if needsWrap b || isBlockQuote b -- see #7041
+        if needsWrap b
            then inTags True "p" [("specific-use","wrapper")] inner
            else inner
-    isBlockQuote (BlockQuote _) = True
-    isBlockQuote _ = False
 
 -- | Auxiliary function to convert Plain block to Para.
 plainToPara :: Block -> Block
@@ -324,10 +323,13 @@ blockToJATS opts (LineBlock lns) =
   blockToJATS opts $ linesToPara lns
 blockToJATS opts (BlockQuote blocks) = do
   tagSet <- asks jatsTagSet
-  let blocksToJats' = if tagSet == TagSetArticleAuthoring
-                      then wrappedBlocksToJATS (not . isPara)
-                      else blocksToJATS
-  inTagsIndented "disp-quote" <$> blocksToJats' opts blocks
+  let needsWrap = if tagSet == TagSetArticleAuthoring
+                  then not . isPara
+                  else \case
+                    Header{}       -> True
+                    HorizontalRule -> True
+                    _              -> False
+  inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
 blockToJATS _ (CodeBlock a str) = return $
   inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
     where (lang, attr) = codeAttr a
diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs
index 2e34900d2..70569bdcd 100644
--- a/src/Text/Pandoc/Writers/JATS/Table.hs
+++ b/src/Text/Pandoc/Writers/JATS/Table.hs
@@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m
             -> JATS m (Doc Text)
 tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
   let (Caption _maybeShortCaption captionBlocks) = caption
+  -- Only paragraphs are allowed in captions, all other blocks must be
+  -- wrapped in @<p>@ elements.
+  let needsWrapping = \case
+        Plain{} -> False
+        Para{}  -> False
+        _       -> True
   tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
   captionDoc <- if null captionBlocks
                 then return empty
                 else do
                   blockToJATS <- asks jatsBlockWriter
-                  inTagsIndented "caption" . vcat <$>
-                    mapM (blockToJATS opts) captionBlocks
+                  inTagsIndented "caption" <$>
+                    blockToJATS needsWrapping opts captionBlocks
   return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
 
 captionlessTable :: PandocMonad m
@@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
   inlinesToJats <- asks jatsInlinesWriter
   let cellContents = \case
         [Plain inlines] -> inlinesToJats opts inlines
-        blocks          -> vcat <$> mapM (blockToJats opts) blocks
+        blocks          -> blockToJats needsWrapInCell opts blocks
   let tag' = case ctype of
         BodyCell   -> "td"
         HeaderCell -> "th"
@@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
               . maybeCons (colspanAttrib colspan)
               $ toAttribs attr validAttribs
   inTags False tag' attribs <$> cellContents item
+
+-- | Whether the JATS produced from this block should be wrapped in a
+-- @<p>@ element when put directly below a @<td>@ element.
+needsWrapInCell :: Block -> Bool
+needsWrapInCell = \case
+  Plain{}          -> False  -- should be unwrapped anyway
+  Para{}           -> False
+  BulletList{}     -> False
+  OrderedList{}    -> False
+  DefinitionList{} -> False
+  HorizontalRule   -> False
+  CodeBlock{}      -> False
+  RawBlock{}       -> False  -- responsibility of the user
+  _                -> True
diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs
index 6fdddc0b5..8d8673cf6 100644
--- a/src/Text/Pandoc/Writers/JATS/Types.hs
+++ b/src/Text/Pandoc/Writers/JATS/Types.hs
@@ -37,11 +37,20 @@ newtype JATSState = JATSState
   { jatsNotes :: [(Int, Doc Text)]
   }
 
+-- | Environment containing all information relevant for rendering.
 data JATSEnv m = JATSEnv
-  { jatsTagSet :: JATSTagSet
+  { jatsTagSet :: JATSTagSet  -- ^ The tag set that's being ouput
+
+  , jatsBlockWriter   :: (Block -> Bool)
+                      -> WriterOptions -> [Block]  -> JATS m (Doc Text)
+    -- ^ Converts a block list to JATS, wrapping top-level blocks into a
+    -- @<p>@ element if the property evaluates to @True@.
+    -- See #7227.
+
   , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text)
-  , jatsBlockWriter   :: WriterOptions -> Block    -> JATS m (Doc Text)
-  , jatsReferences    :: [Reference Inlines]
+    -- ^ Converts an inline list to JATS.
+
+  , jatsReferences    :: [Reference Inlines] -- ^ List of references
   }
 
 -- | JATS writer type
diff --git a/test/command/7041.md b/test/command/7041.md
new file mode 100644
index 000000000..1773963b8
--- /dev/null
+++ b/test/command/7041.md
@@ -0,0 +1,23 @@
+```
+% pandoc -f html -t jats
+<table>
+  <tr><td><blockquote>Fly, you fools!</blockquote></td></tr>
+</table>
+^D
+<table-wrap>
+  <table>
+    <colgroup>
+      <col width="100%" />
+    </colgroup>
+    <tbody>
+      <tr>
+        <td><p specific-use="wrapper">
+          <disp-quote>
+            <p>Fly, you fools!</p>
+          </disp-quote>
+        </p></td>
+      </tr>
+    </tbody>
+  </table>
+</table-wrap>
+```
diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving
index 332b5d3fd..70e15b6b8 100644
--- a/test/writer.jats_archiving
+++ b/test/writer.jats_archiving
@@ -78,39 +78,31 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <p specific-use="wrapper">
+  <disp-quote>
+    <p>This is a block quote. It is pretty short.</p>
+  </disp-quote>
+  <disp-quote>
+    <p>Code in a block quote:</p>
+    <preformat>sub status {
+    print &quot;working&quot;;
+}</preformat>
+    <p>A list:</p>
+    <list list-type="order">
+      <list-item>
+        <p>item one</p>
+      </list-item>
+      <list-item>
+        <p>item two</p>
+      </list-item>
+    </list>
+    <p>Nested block quotes:</p>
     <disp-quote>
-      <p>This is a block quote. It is pretty short.</p>
+      <p>nested</p>
     </disp-quote>
-  </p>
-  <p specific-use="wrapper">
     <disp-quote>
-      <p>Code in a block quote:</p>
-      <preformat>sub status {
-    print &quot;working&quot;;
-}</preformat>
-      <p>A list:</p>
-      <list list-type="order">
-        <list-item>
-          <p>item one</p>
-        </list-item>
-        <list-item>
-          <p>item two</p>
-        </list-item>
-      </list>
-      <p>Nested block quotes:</p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
+      <p>nested</p>
     </disp-quote>
-  </p>
+  </disp-quote>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -837,12 +829,10 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <p specific-use="wrapper">
-      <disp-quote>
-        <p>Blockquoted:
-        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-      </disp-quote>
-    </p>
+    <disp-quote>
+      <p>Blockquoted:
+      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+    </disp-quote>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -866,11 +856,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
   <italic>not</italic> be a footnote reference, because it contains a
   space.[^my note] Here is an inline
   note.<xref ref-type="fn" rid="fn3">3</xref></p>
-  <p specific-use="wrapper">
-    <disp-quote>
-      <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
-    </disp-quote>
-  </p>
+  <disp-quote>
+    <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
+  </disp-quote>
   <list list-type="order">
     <list-item>
       <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>
diff --git a/test/writer.jats_articleauthoring b/test/writer.jats_articleauthoring
index 956a30faa..59485a114 100644
--- a/test/writer.jats_articleauthoring
+++ b/test/writer.jats_articleauthoring
@@ -67,43 +67,39 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <p specific-use="wrapper">
-    <disp-quote>
-      <p>This is a block quote. It is pretty short.</p>
-    </disp-quote>
-  </p>
-  <p specific-use="wrapper">
-    <disp-quote>
-      <p>Code in a block quote:</p>
-      <p specific-use="wrapper">
-        <preformat>sub status {
+  <disp-quote>
+    <p>This is a block quote. It is pretty short.</p>
+  </disp-quote>
+  <disp-quote>
+    <p>Code in a block quote:</p>
+    <p specific-use="wrapper">
+      <preformat>sub status {
     print &quot;working&quot;;
 }</preformat>
-      </p>
-      <p>A list:</p>
-      <p specific-use="wrapper">
-        <list list-type="order">
-          <list-item>
-            <p>item one</p>
-          </list-item>
-          <list-item>
-            <p>item two</p>
-          </list-item>
-        </list>
-      </p>
-      <p>Nested block quotes:</p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
-    </disp-quote>
-  </p>
+    </p>
+    <p>A list:</p>
+    <p specific-use="wrapper">
+      <list list-type="order">
+        <list-item>
+          <p>item one</p>
+        </list-item>
+        <list-item>
+          <p>item two</p>
+        </list-item>
+      </list>
+    </p>
+    <p>Nested block quotes:</p>
+    <p specific-use="wrapper">
+      <disp-quote>
+        <p>nested</p>
+      </disp-quote>
+    </p>
+    <p specific-use="wrapper">
+      <disp-quote>
+        <p>nested</p>
+      </disp-quote>
+    </p>
+  </disp-quote>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -817,12 +813,10 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <p specific-use="wrapper">
-      <disp-quote>
-        <p>Blockquoted:
-        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-      </disp-quote>
-    </p>
+    <disp-quote>
+      <p>Blockquoted:
+      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+    </disp-quote>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -860,13 +854,11 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
     and <monospace>]</monospace> verbatim characters, as well as [bracketed
     text].</p>
   </fn></p>
-  <p specific-use="wrapper">
-    <disp-quote>
-      <p>Notes can go in quotes.<fn>
-        <p>In quote.</p>
-      </fn></p>
-    </disp-quote>
-  </p>
+  <disp-quote>
+    <p>Notes can go in quotes.<fn>
+      <p>In quote.</p>
+    </fn></p>
+  </disp-quote>
   <list list-type="order">
     <list-item>
       <p>And in list items.<fn>
diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing
index f53fd554d..e6db4172a 100644
--- a/test/writer.jats_publishing
+++ b/test/writer.jats_publishing
@@ -78,39 +78,31 @@ Gruber’s markdown test suite.</p>
 <sec id="block-quotes">
   <title>Block Quotes</title>
   <p>E-mail style:</p>
-  <p specific-use="wrapper">
+  <disp-quote>
+    <p>This is a block quote. It is pretty short.</p>
+  </disp-quote>
+  <disp-quote>
+    <p>Code in a block quote:</p>
+    <preformat>sub status {
+    print &quot;working&quot;;
+}</preformat>
+    <p>A list:</p>
+    <list list-type="order">
+      <list-item>
+        <p>item one</p>
+      </list-item>
+      <list-item>
+        <p>item two</p>
+      </list-item>
+    </list>
+    <p>Nested block quotes:</p>
     <disp-quote>
-      <p>This is a block quote. It is pretty short.</p>
+      <p>nested</p>
     </disp-quote>
-  </p>
-  <p specific-use="wrapper">
     <disp-quote>
-      <p>Code in a block quote:</p>
-      <preformat>sub status {
-    print &quot;working&quot;;
-}</preformat>
-      <p>A list:</p>
-      <list list-type="order">
-        <list-item>
-          <p>item one</p>
-        </list-item>
-        <list-item>
-          <p>item two</p>
-        </list-item>
-      </list>
-      <p>Nested block quotes:</p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
-      <p specific-use="wrapper">
-        <disp-quote>
-          <p>nested</p>
-        </disp-quote>
-      </p>
+      <p>nested</p>
     </disp-quote>
-  </p>
+  </disp-quote>
   <p>This should not be a block quote: 2 &gt; 1.</p>
   <p>And a following paragraph.</p>
 </sec>
@@ -837,12 +829,10 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
       </list-item>
     </list>
     <p>An e-mail address: <email>nobody@nowhere.net</email></p>
-    <p specific-use="wrapper">
-      <disp-quote>
-        <p>Blockquoted:
-        <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
-      </disp-quote>
-    </p>
+    <disp-quote>
+      <p>Blockquoted:
+      <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p>
+    </disp-quote>
     <p>Auto-links should not occur here:
     <monospace>&lt;http://example.com/&gt;</monospace></p>
     <preformat>or here: &lt;http://example.com/&gt;</preformat>
@@ -866,11 +856,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{</preformat>
   <italic>not</italic> be a footnote reference, because it contains a
   space.[^my note] Here is an inline
   note.<xref ref-type="fn" rid="fn3">3</xref></p>
-  <p specific-use="wrapper">
-    <disp-quote>
-      <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
-    </disp-quote>
-  </p>
+  <disp-quote>
+    <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p>
+  </disp-quote>
   <list list-type="order">
     <list-item>
       <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p>
-- 
cgit v1.2.3


From 7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Apr 2021 21:28:19 -0700
Subject: Move getLang from BCP47 -> T.P.Writers.Shared.

[API change]
---
 pandoc.cabal                          |   1 +
 src/Text/Pandoc/BCP47.hs              |  13 ----
 src/Text/Pandoc/Citeproc/Data.hs      |   5 +-
 src/Text/Pandoc/Shared.hs             |   1 -
 src/Text/Pandoc/Writers/LaTeX/Lang.hs | 117 +++++++++++++++++-----------------
 src/Text/Pandoc/Writers/Shared.hs     |  14 ++++
 6 files changed, 77 insertions(+), 74 deletions(-)

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index c8ef3cfb9..b6cbb0d7a 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -493,6 +493,7 @@ library
                  unordered-containers  >= 0.2      && < 0.3,
                  xml                   >= 1.3.12   && < 1.4,
                  xml-conduit           >= 1.9.1.1  && < 1.10,
+                 unicode-collation     >= 0.1      && < 0.2,
                  zip-archive           >= 0.2.3.4  && < 0.5,
                  zlib                  >= 0.5      && < 0.7
   if os(windows) && arch(i386)
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
index 69824aa57..1ecf0bf73 100644
--- a/src/Text/Pandoc/BCP47.hs
+++ b/src/Text/Pandoc/BCP47.hs
@@ -37,19 +37,6 @@ renderLang :: Lang -> T.Text
 renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
                     ([langScript lang, langRegion lang] ++ langVariants lang))
 
--- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe T.Text
-getLang opts meta =
-  case lookupContext "lang" (writerVariables opts) of
-        Just s -> Just s
-        _      ->
-          case lookupMeta "lang" meta of
-               Just (MetaBlocks [Para [Str s]])  -> Just s
-               Just (MetaBlocks [Plain [Str s]]) -> Just s
-               Just (MetaInlines [Str s])        -> Just s
-               Just (MetaString s)               -> Just s
-               _                                 -> Nothing
-
 -- | Parse a BCP 47 string as a Lang.  Currently we parse
 -- extensions and private-use fields as "variants," even
 -- though officially they aren't.
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index dfdaf2598..40430b0f5 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import Citeproc (Lang(..), parseLang)
+import UnicodeCollation.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
@@ -21,7 +21,8 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
+    let Lang lang _ _ _ _ _ = parseLang
+                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
         ls = T.lines $ TE.decodeUtf8 bs
      in if length ls > 4
            then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 95cbdc8b8..e389c1727 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -852,7 +852,6 @@ filteredFilesFromArchive zf f =
     fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
     fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
 
-
 --
 -- IANA URIs
 --
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 41aafee48..871b2692a 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang
     toBabel
   ) where
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang (..))
+import UnicodeCollation.Lang (Lang(..))
 
 
 -- In environments \Arabic instead of \arabic is used
@@ -25,88 +25,89 @@ toPolyglossiaEnv l =
     ("arabic", o) -> ("Arabic", o)
     x             -> x
 
--- Takes a list of the constituents of a BCP 47 language code and
+-- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Polyglossia (language, options) tuple
 -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf
 toPolyglossia :: Lang -> (Text, Text)
-toPolyglossia (Lang "ar" _ "DZ" _)        = ("arabic", "locale=algeria")
-toPolyglossia (Lang "ar" _ "IQ" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "JO" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LB" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "LY" _)        = ("arabic", "locale=libya")
-toPolyglossia (Lang "ar" _ "MA" _)        = ("arabic", "locale=morocco")
-toPolyglossia (Lang "ar" _ "MR" _)        = ("arabic", "locale=mauritania")
-toPolyglossia (Lang "ar" _ "PS" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "SY" _)        = ("arabic", "locale=mashriq")
-toPolyglossia (Lang "ar" _ "TN" _)        = ("arabic", "locale=tunisia")
-toPolyglossia (Lang "de" _ _ vars)
-  | "1901" `elem` vars                    = ("german", "spelling=old")
-toPolyglossia (Lang "de" _ "AT" vars)
-  | "1901" `elem` vars                    = ("german", "variant=austrian, spelling=old")
-toPolyglossia (Lang "de" _ "AT" _)        = ("german", "variant=austrian")
-toPolyglossia (Lang "de" _ "CH" vars)
-  | "1901" `elem` vars                    = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ "CH" _)        = ("german", "variant=swiss")
-toPolyglossia (Lang "de" _ _ _)           = ("german", "")
-toPolyglossia (Lang "dsb" _ _ _)          = ("lsorbian", "")
-toPolyglossia (Lang "el" _ "polyton" _)   = ("greek",   "variant=poly")
-toPolyglossia (Lang "en" _ "AU" _)        = ("english", "variant=australian")
-toPolyglossia (Lang "en" _ "CA" _)        = ("english", "variant=canadian")
-toPolyglossia (Lang "en" _ "GB" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "NZ" _)        = ("english", "variant=newzealand")
-toPolyglossia (Lang "en" _ "UK" _)        = ("english", "variant=british")
-toPolyglossia (Lang "en" _ "US" _)        = ("english", "variant=american")
-toPolyglossia (Lang "grc" _ _ _)          = ("greek",   "variant=ancient")
-toPolyglossia (Lang "hsb" _ _  _)         = ("usorbian", "")
-toPolyglossia (Lang "la" _ _ vars)
-  | "x-classic" `elem` vars               = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _)        = ("portuguese", "variant=brazilian")
-toPolyglossia (Lang "sl" _ _ _)           = ("slovenian", "")
+toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _)  = ("arabic", "locale=algeria")
+toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "JO") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LB") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "LY") _ _ _)  = ("arabic", "locale=libya")
+toPolyglossia (Lang "ar" _ (Just "MA") _ _ _)  = ("arabic", "locale=morocco")
+toPolyglossia (Lang "ar" _ (Just "MR") _ _ _)  = ("arabic", "locale=mauritania")
+toPolyglossia (Lang "ar" _ (Just "PS") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "SY") _ _ _)  = ("arabic", "locale=mashriq")
+toPolyglossia (Lang "ar" _ (Just "TN") _ _ _)  = ("arabic", "locale=tunisia")
+toPolyglossia (Lang "de" _ _ vars _ _)
+  | "1901" `elem` vars                         = ("german", "spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
+  | "1901" `elem` vars         = ("german", "variant=austrian, spelling=old")
+toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
+toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
+  | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
+toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
+toPolyglossia (Lang "el" _ _ vars _ _)
+  | "polyton" `elem` vars                     = ("greek",   "variant=poly")
+toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian")
+toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian")
+toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand")
+toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british")
+toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american")
+toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
+toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
+toPolyglossia (Lang "la" _ _ vars _ _)
+  | "x-classic" `elem` vars                   = ("latin", "variant=classic")
+toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
 toPolyglossia x                           = (commonFromBcp47 x, "")
 
--- Takes a list of the constituents of a BCP 47 language code and
+-- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
 -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf
 -- List of supported languages (slightly outdated):
 -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf
 toBabel :: Lang -> Text
-toBabel (Lang "de" _ "AT" vars)
+toBabel (Lang "de" _ (Just "AT") vars _ _)
   | "1901" `elem` vars                  = "austrian"
   | otherwise                           = "naustrian"
-toBabel (Lang "de" _ "CH" vars)
+toBabel (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars                  = "swissgerman"
   | otherwise                           = "nswissgerman"
-toBabel (Lang "de" _ _ vars)
+toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
-toBabel (Lang "dsb" _ _ _)              = "lowersorbian"
+toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
 toBabel (Lang "el" _ _ vars)
   | "polyton" `elem` vars               = "polutonikogreek"
-toBabel (Lang "en" _ "AU" _)            = "australian"
-toBabel (Lang "en" _ "CA" _)            = "canadian"
-toBabel (Lang "en" _ "GB" _)            = "british"
-toBabel (Lang "en" _ "NZ" _)            = "newzealand"
-toBabel (Lang "en" _ "UK" _)            = "british"
-toBabel (Lang "en" _ "US" _)            = "american"
-toBabel (Lang "fr" _ "CA" _)            = "canadien"
-toBabel (Lang "fra" _ _ vars)
+toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
+toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
+toBabel (Lang "en" _ (Just "GB") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand"
+toBabel (Lang "en" _ (Just "UK") _ _ _) = "british"
+toBabel (Lang "en" _ (Just "US") _ _ _) = "american"
+toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien"
+toBabel (Lang "fra" _ _ vars _ _)
   | "aca" `elem` vars                   = "acadian"
-toBabel (Lang "grc" _ _ _)              = "polutonikogreek"
-toBabel (Lang "hsb" _ _ _)              = "uppersorbian"
-toBabel (Lang "la" _ _ vars)
+toBabel (Lang "grc" _ _ _ _ _)          = "polutonikogreek"
+toBabel (Lang "hsb" _ _ _ _ _)          = "uppersorbian"
+toBabel (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars             = "classiclatin"
-toBabel (Lang "pt" _ "BR" _)            = "brazilian"
-toBabel (Lang "sl" _ _ _)               = "slovene"
+toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian"
+toBabel (Lang "sl" _ _ _ _ _)           = "slovene"
 toBabel x                               = commonFromBcp47 x
 
--- Takes a list of the constituents of a BCP 47 language code
+-- Takes a list of the constituents of a BCP47 language code
 -- and converts it to a string shared by Babel and Polyglossia.
 -- https://tools.ietf.org/html/bcp47#section-2.1
 commonFromBcp47 :: Lang -> Text
-commonFromBcp47 (Lang "sr" "Cyrl" _ _)          = "serbianc"
-commonFromBcp47 (Lang "zh" "Latn" _ vars)
-  | "pinyin" `elem` vars                        = "pinyin"
-commonFromBcp47 (Lang l _ _ _) = fromIso l
+commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _)      = "serbianc"
+commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _)
+  | "pinyin" `elem` vars                               = "pinyin"
+commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l
   where
     fromIso "af"  = "afrikaans"
     fromIso "am"  = "amharic"
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index 91ecb310b..fcb47bd5a 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared (
                      , setField
                      , resetField
                      , defField
+                     , getLang
                      , tagWithAttrs
                      , isDisplayMath
                      , fixDisplayMath
@@ -147,6 +148,19 @@ defField field val (Context m) =
   where
     f _newval oldval = oldval
 
+-- | Get the contents of the `lang` metadata field or variable.
+getLang :: WriterOptions -> Meta -> Maybe Text
+getLang opts meta =
+  case lookupContext "lang" (writerVariables opts) of
+        Just s -> Just s
+        _      ->
+          case lookupMeta "lang" meta of
+               Just (MetaBlocks [Para [Str s]])  -> Just s
+               Just (MetaBlocks [Plain [Str s]]) -> Just s
+               Just (MetaInlines [Str s])        -> Just s
+               Just (MetaString s)               -> Just s
+               _                                 -> Nothing
+
 -- | Produce an HTML tag with the given pandoc attributes.
 tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a
 tagWithAttrs tag (ident,classes,kvs) = hsep
-- 
cgit v1.2.3


From aecbf8156eb7c36c4b41de27797e262c23728db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Apr 2021 21:28:48 -0700
Subject: Remove Text.Pandoc.BCP47 module.

[API change]

Use Lang from UnicodeCollation.Lang instead.
This is a richer implementation of BCP 47.
---
 pandoc.cabal                            |   1 -
 src/Text/Pandoc/App.hs                  |   6 +-
 src/Text/Pandoc/BCP47.hs                |  99 -------------
 src/Text/Pandoc/Citeproc.hs             |  10 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs      |  20 +--
 src/Text/Pandoc/Citeproc/Data.hs        |  12 +-
 src/Text/Pandoc/Class/CommonState.hs    |   2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |   4 +-
 src/Text/Pandoc/Readers/BibTeX.hs       |   9 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   | 241 ++++++++++++++++----------------
 src/Text/Pandoc/Writers/BibTeX.hs       |   2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  48 +++----
 src/Text/Pandoc/Writers/CslJson.hs      |   7 +-
 src/Text/Pandoc/Writers/Docx.hs         |   2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |   2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |   8 +-
 src/Text/Pandoc/Writers/ODT.hs          |   7 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |   8 +-
 src/Text/Pandoc/Writers/Shared.hs       |   2 +-
 20 files changed, 198 insertions(+), 294 deletions(-)
 delete mode 100644 src/Text/Pandoc/BCP47.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index b6cbb0d7a..8816767e9 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -601,7 +601,6 @@ library
                    Text.Pandoc.Asciify,
                    Text.Pandoc.Emoji,
                    Text.Pandoc.ImageSize,
-                   Text.Pandoc.BCP47,
                    Text.Pandoc.Class,
                    Text.Pandoc.Citeproc
   other-modules:   Text.Pandoc.App.CommandLineOptions,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 6b45e5418..67d3cce7d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
@@ -200,8 +200,8 @@ convertWithOpts opts = do
                     Just f  -> readFileStrict f
 
     case lookupMetaString "lang" (optMetadata opts) of
-           ""      -> setTranslations $ Lang "en" "" "US" []
-           l       -> case parseBCP47 l of
+           ""      -> setTranslations $ Lang "en" Nothing (Just "US") [] [] []
+           l       -> case parseLang l of
                            Left _   -> report $ InvalidLang l
                            Right l' -> setTranslations l'
 
diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs
deleted file mode 100644
index 1ecf0bf73..000000000
--- a/src/Text/Pandoc/BCP47.hs
+++ /dev/null
@@ -1,99 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.BCP47
-   Copyright   : Copyright (C) 2017-2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing and rendering BCP47 language identifiers.
--}
-module Text.Pandoc.BCP47 (
-                       getLang
-                     , parseBCP47
-                     , Lang(..)
-                     , renderLang
-                     )
-where
-import Control.Monad (guard)
-import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper)
-import Text.Pandoc.Definition
-import Text.Pandoc.Options
-import Text.DocTemplates (FromContext(..))
-import qualified Data.Text as T
-import qualified Text.Parsec as P
-
--- | Represents BCP 47 language/country code.
-data Lang = Lang{ langLanguage :: T.Text
-                , langScript   :: T.Text
-                , langRegion   :: T.Text
-                , langVariants :: [T.Text] }
-                deriving (Eq, Ord, Show)
-
--- | Render a Lang as BCP 47.
-renderLang :: Lang -> T.Text
-renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null)
-                    ([langScript lang, langRegion lang] ++ langVariants lang))
-
--- | Parse a BCP 47 string as a Lang.  Currently we parse
--- extensions and private-use fields as "variants," even
--- though officially they aren't.
-parseBCP47 :: T.Text -> Either T.Text Lang
-parseBCP47 lang =
-  case P.parse bcp47 "lang" lang of
-       Right r -> Right r
-       Left e  -> Left $ T.pack $ show e
-  where bcp47 = do
-          language <- pLanguage
-          script <- P.option "" pScript
-          region <- P.option "" pRegion
-          variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse)
-          P.eof
-          return Lang{   langLanguage = language
-                       , langScript = script
-                       , langRegion = region
-                       , langVariants = variants }
-        asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
-        pLanguage = do
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toLower $ T.pack cs
-        pScript = P.try $ do
-          P.char '-'
-          x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
-          xs <- P.count 3
-                 (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
-          return $ T.toLower $ T.pack (x:xs)
-        pRegion = P.try $ do
-          P.char '-'
-          cs <- P.many1 asciiLetter
-          let lcs = length cs
-          guard $ lcs == 2 || lcs == 3
-          return $ T.toUpper $ T.pack cs
-        pVariant = P.try $ do
-          P.char '-'
-          ds <- P.option "" (P.count 1 P.digit)
-          cs <- P.many1 asciiLetter
-          let var = ds ++ cs
-              lv = length var
-          guard $ if null ds
-                     then lv >= 5 && lv <= 8
-                     else lv == 4
-          return $ T.toLower $ T.pack var
-        pExtension = P.try $ do
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          let lcs = length cs
-          guard $ lcs >= 2 && lcs <= 8
-          return $ T.toLower $ T.pack cs
-        pPrivateUse = P.try $ do
-          P.char '-'
-          P.char 'x'
-          P.char '-'
-          cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c)
-          guard $ not (null cs) && length cs <= 8
-          let var = "x-" ++ cs
-          return $ T.toLower $ T.pack var
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index af302f782..c9f1806e4 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
 import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
 import Text.Pandoc.Readers.Markdown (yamlToRefs)
-import qualified Text.Pandoc.BCP47 as BCP47
 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Definition as Pandoc
@@ -630,13 +629,8 @@ removeFinalPeriod ils =
 
 bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang)
 bcp47LangToIETF bcplang =
-  case BCP47.parseBCP47 bcplang of
+  case parseLang bcplang of
     Left _ -> do
       report $ InvalidLang bcplang
       return Nothing
-    Right lang ->
-      return $ Just
-             $ Lang (BCP47.langLanguage lang)
-                    (if T.null (BCP47.langRegion lang)
-                        then Nothing
-                        else Just (BCP47.langRegion lang))
+    Right lang -> return $ Just lang
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index c0752dadc..510e56f9c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref =
                    [ (", " <>) <$> nameGiven name,
                      nameDroppingParticle name ]
 
-  mblang' = (parseLang <$> getVariableAsText "language") <|> mblang
+  mblang' = case getVariableAsText "language" of
+              Just l  -> either (const Nothing) Just $ parseLang l
+              Nothing -> mblang
 
   titlecase = case mblang' of
-                Just (Lang "en" _) -> titlecase'
+                Just lang | langLanguage lang == "en"
+                                   -> titlecase'
                 Nothing            -> titlecase'
                 _                  ->
                   case variant of
@@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref =
   renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField
 
 defaultLang :: Lang
-defaultLang = Lang "en" (Just "US")
+defaultLang = Lang "en" Nothing (Just "US") [] [] []
 
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
@@ -351,9 +354,7 @@ itemToReference locale variant item = do
   bib item $ do
     let lang = fromMaybe defaultLang $ localeLanguage locale
     modify $ \st -> st{ localeLang = lang,
-                        untitlecase = case lang of
-                                           (Lang "en" _) -> True
-                                           _             -> False }
+                        untitlecase = langLanguage lang == "en" }
 
     id' <- asks identifier
     otherIds <- (Just <$> getRawField "ids")
@@ -711,7 +712,7 @@ itemToReference locale variant item = do
 
 
 bib :: Item -> Bib a -> BibParser a
-bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
+bib entry m = fst <$> evalRWST m entry (BibState True defaultLang)
 
 resolveCrossRefs :: Variant -> [Item] -> [Item]
 resolveCrossRefs variant entries =
@@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils
         go x       = x
 
 resolveKey' :: Lang -> Text -> Text
-resolveKey' lang@(Lang l _) k =
-  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
+resolveKey' lang k =
+  case Map.lookup (langLanguage lang) biblatexStringMap >>=
+        Map.lookup (T.toLower k) of
     Nothing     -> k
     Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
 
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 40430b0f5..388b9ba62 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text))
 biblatexStringMap = foldr go mempty biblatexLocalizations
  where
   go (fp, bs) =
-    let Lang lang _ _ _ _ _ = parseLang
-                                (toIETF $ T.takeWhile (/= '.') $ T.pack fp)
-        ls = T.lines $ TE.decodeUtf8 bs
-     in if length ls > 4
-           then M.insert lang (toStringMap $ map (T.splitOn "|") ls)
-           else id
+    let ls = T.lines $ TE.decodeUtf8 bs
+     in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of
+          Right lang | length ls > 4
+            -> M.insert (langLanguage lang)
+                        (toStringMap $ map (T.splitOn "|") ls)
+          _ -> id
   toStringMap = foldr go' mempty
   go' [term, x, y] = M.insert term (x, y)
   go' _ = id
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 7e1735c2b..0fd094d99 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import Text.Pandoc.BCP47 (Lang)
+import UnicodeCollation.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 293a822a0..76f1fa32b 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang)
+import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError
 toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang)
 toLang Nothing = return Nothing
 toLang (Just s) =
-  case parseBCP47 s of
+  case parseLang s of
        Left _ -> do
          report $ InvalidLang s
          return Nothing
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index 956b9f1f7..b82a81350 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
 readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
 readBibTeX' variant _opts t = do
-  lang <- maybe (Lang "en" (Just "US")) parseLang
-             <$> lookupEnv "LANG"
+  mblangEnv <- lookupEnv "LANG"
+  let defaultLang = Lang "en" Nothing (Just "US") [] [] []
+  let lang = case mblangEnv of
+              Nothing  -> defaultLang
+              Just l   -> either (const defaultLang) id $ parseLang l
   locale <- case getLocale lang of
                Left e  ->
-                 case getLocale (Lang "en" (Just "US")) of
+                 case getLocale (Lang "en" Nothing (Just "US") [] [] []) of
                    Right l -> return l
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 851756065..83caf742a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import Text.Pandoc.BCP47 (renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index 08e217bdb..b92e6ab57 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import Text.Pandoc.BCP47 (Lang(..), renderLang)
+import UnicodeCollation.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
@@ -99,133 +99,136 @@ setDefaultLanguage = do
 polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
 polyglossiaLangToBCP47 = M.fromList
   [ ("arabic", \o -> case T.filter (/=' ') o of
-       "locale=algeria"    -> Lang "ar" "" "DZ" []
-       "locale=mashriq"    -> Lang "ar" "" "SY" []
-       "locale=libya"      -> Lang "ar" "" "LY" []
-       "locale=morocco"    -> Lang "ar" "" "MA" []
-       "locale=mauritania" -> Lang "ar" "" "MR" []
-       "locale=tunisia"    -> Lang "ar" "" "TN" []
-       _                   -> Lang "ar" "" "" [])
+       "locale=algeria"    -> Lang "ar" Nothing (Just "DZ") [] [] []
+       "locale=mashriq"    -> Lang "ar" Nothing (Just "SY") [] [] []
+       "locale=libya"      -> Lang "ar" Nothing (Just "LY") [] [] []
+       "locale=morocco"    -> Lang "ar" Nothing (Just "MA") [] [] []
+       "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] []
+       "locale=tunisia"    -> Lang "ar" Nothing (Just "TN") [] [] []
+       _                   -> Lang "ar" Nothing (Just "")   [] [] [])
   , ("german", \o -> case T.filter (/=' ') o of
-       "spelling=old" -> Lang "de" "" "DE" ["1901"]
+       "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] []
        "variant=austrian,spelling=old"
-                       -> Lang "de" "" "AT" ["1901"]
-       "variant=austrian" -> Lang "de" "" "AT" []
+                       -> Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] []
        "variant=swiss,spelling=old"
-                       -> Lang "de" "" "CH" ["1901"]
-       "variant=swiss" -> Lang "de" "" "CH" []
-       _ -> Lang "de" "" "" [])
-  , ("lsorbian", \_ -> Lang "dsb" "" "" [])
+                       -> Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] []
+       _ -> Lang "de" Nothing Nothing [] [] [])
+  , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] [])
   , ("greek", \o -> case T.filter (/=' ') o of
-       "variant=poly"    -> Lang "el" "" "polyton" []
-       "variant=ancient" -> Lang "grc" "" "" []
-       _                 -> Lang "el" "" "" [])
+       "variant=poly"    -> Lang "el" Nothing (Just "polyton") [] [] []
+       "variant=ancient" -> Lang "grc" Nothing Nothing [] [] []
+       _                 -> Lang "el" Nothing Nothing [] [] [])
   , ("english", \o -> case T.filter (/=' ') o of
-       "variant=australian" -> Lang "en" "" "AU" []
-       "variant=canadian"   -> Lang "en" "" "CA" []
-       "variant=british"    -> Lang "en" "" "GB" []
-       "variant=newzealand" -> Lang "en" "" "NZ" []
-       "variant=american"   -> Lang "en" "" "US" []
-       _                    -> Lang "en" "" "" [])
-  , ("usorbian", \_ -> Lang "hsb" "" "" [])
+       "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] []
+       "variant=canadian"   -> Lang "en" Nothing (Just "CA") [] [] []
+       "variant=british"    -> Lang "en" Nothing (Just "GB") [] [] []
+       "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] []
+       "variant=american"   -> Lang "en" Nothing (Just "US") [] [] []
+       _                    -> Lang "en" Nothing (Just "")   [] [] [])
+  , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] [])
   , ("latin", \o -> case T.filter (/=' ') o of
-       "variant=classic" -> Lang "la" "" "" ["x-classic"]
-       _                 -> Lang "la" "" "" [])
-  , ("slovenian", \_ -> Lang "sl" "" "" [])
-  , ("serbianc", \_ -> Lang "sr" "cyrl" "" [])
-  , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"])
-  , ("afrikaans", \_ -> Lang "af" "" "" [])
-  , ("amharic", \_ -> Lang "am" "" "" [])
-  , ("assamese", \_ -> Lang "as" "" "" [])
-  , ("asturian", \_ -> Lang "ast" "" "" [])
-  , ("bulgarian", \_ -> Lang "bg" "" "" [])
-  , ("bengali", \_ -> Lang "bn" "" "" [])
-  , ("tibetan", \_ -> Lang "bo" "" "" [])
-  , ("breton", \_ -> Lang "br" "" "" [])
-  , ("catalan", \_ -> Lang "ca" "" "" [])
-  , ("welsh", \_ -> Lang "cy" "" "" [])
-  , ("czech", \_ -> Lang "cs" "" "" [])
-  , ("coptic", \_ -> Lang "cop" "" "" [])
-  , ("danish", \_ -> Lang "da" "" "" [])
-  , ("divehi", \_ -> Lang "dv" "" "" [])
-  , ("esperanto", \_ -> Lang "eo" "" "" [])
-  , ("spanish", \_ -> Lang "es" "" "" [])
-  , ("estonian", \_ -> Lang "et" "" "" [])
-  , ("basque", \_ -> Lang "eu" "" "" [])
-  , ("farsi", \_ -> Lang "fa" "" "" [])
-  , ("finnish", \_ -> Lang "fi" "" "" [])
-  , ("french", \_ -> Lang "fr" "" "" [])
-  , ("friulan", \_ -> Lang "fur" "" "" [])
-  , ("irish", \_ -> Lang "ga" "" "" [])
-  , ("scottish", \_ -> Lang "gd" "" "" [])
-  , ("ethiopic", \_ -> Lang "gez" "" "" [])
-  , ("galician", \_ -> Lang "gl" "" "" [])
-  , ("hebrew", \_ -> Lang "he" "" "" [])
-  , ("hindi", \_ -> Lang "hi" "" "" [])
-  , ("croatian", \_ -> Lang "hr" "" "" [])
-  , ("magyar", \_ -> Lang "hu" "" "" [])
-  , ("armenian", \_ -> Lang "hy" "" "" [])
-  , ("interlingua", \_ -> Lang "ia" "" "" [])
-  , ("indonesian", \_ -> Lang "id" "" "" [])
-  , ("icelandic", \_ -> Lang "is" "" "" [])
-  , ("italian", \_ -> Lang "it" "" "" [])
-  , ("japanese", \_ -> Lang "jp" "" "" [])
-  , ("khmer", \_ -> Lang "km" "" "" [])
-  , ("kurmanji", \_ -> Lang "kmr" "" "" [])
-  , ("kannada", \_ -> Lang "kn" "" "" [])
-  , ("korean", \_ -> Lang "ko" "" "" [])
-  , ("lao", \_ -> Lang "lo" "" "" [])
-  , ("lithuanian", \_ -> Lang "lt" "" "" [])
-  , ("latvian", \_ -> Lang "lv" "" "" [])
-  , ("malayalam", \_ -> Lang "ml" "" "" [])
-  , ("mongolian", \_ -> Lang "mn" "" "" [])
-  , ("marathi", \_ -> Lang "mr" "" "" [])
-  , ("dutch", \_ -> Lang "nl" "" "" [])
-  , ("nynorsk", \_ -> Lang "nn" "" "" [])
-  , ("norsk", \_ -> Lang "no" "" "" [])
-  , ("nko", \_ -> Lang "nqo" "" "" [])
-  , ("occitan", \_ -> Lang "oc" "" "" [])
-  , ("panjabi", \_ -> Lang "pa" "" "" [])
-  , ("polish", \_ -> Lang "pl" "" "" [])
-  , ("piedmontese", \_ -> Lang "pms" "" "" [])
-  , ("portuguese", \_ -> Lang "pt" "" "" [])
-  , ("romansh", \_ -> Lang "rm" "" "" [])
-  , ("romanian", \_ -> Lang "ro" "" "" [])
-  , ("russian", \_ -> Lang "ru" "" "" [])
-  , ("sanskrit", \_ -> Lang "sa" "" "" [])
-  , ("samin", \_ -> Lang "se" "" "" [])
-  , ("slovak", \_ -> Lang "sk" "" "" [])
-  , ("albanian", \_ -> Lang "sq" "" "" [])
-  , ("serbian", \_ -> Lang "sr" "" "" [])
-  , ("swedish", \_ -> Lang "sv" "" "" [])
-  , ("syriac", \_ -> Lang "syr" "" "" [])
-  , ("tamil", \_ -> Lang "ta" "" "" [])
-  , ("telugu", \_ -> Lang "te" "" "" [])
-  , ("thai", \_ -> Lang "th" "" "" [])
-  , ("turkmen", \_ -> Lang "tk" "" "" [])
-  , ("turkish", \_ -> Lang "tr" "" "" [])
-  , ("ukrainian", \_ -> Lang "uk" "" "" [])
-  , ("urdu", \_ -> Lang "ur" "" "" [])
-  , ("vietnamese", \_ -> Lang "vi" "" "" [])
+       "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] []
+       _                 -> Lang "la" Nothing Nothing [] [] [])
+  , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] [])
+  , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] [])
+  , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] [])
+  , ("afrikaans", \_ -> simpleLang "af")
+  , ("amharic", \_ -> simpleLang "am")
+  , ("assamese", \_ -> simpleLang "as")
+  , ("asturian", \_ -> simpleLang "ast")
+  , ("bulgarian", \_ -> simpleLang "bg")
+  , ("bengali", \_ -> simpleLang "bn")
+  , ("tibetan", \_ -> simpleLang "bo")
+  , ("breton", \_ -> simpleLang "br")
+  , ("catalan", \_ -> simpleLang "ca")
+  , ("welsh", \_ -> simpleLang "cy")
+  , ("czech", \_ -> simpleLang "cs")
+  , ("coptic", \_ -> simpleLang "cop")
+  , ("danish", \_ -> simpleLang "da")
+  , ("divehi", \_ -> simpleLang "dv")
+  , ("esperanto", \_ -> simpleLang "eo")
+  , ("spanish", \_ -> simpleLang "es")
+  , ("estonian", \_ -> simpleLang "et")
+  , ("basque", \_ -> simpleLang "eu")
+  , ("farsi", \_ -> simpleLang "fa")
+  , ("finnish", \_ -> simpleLang "fi")
+  , ("french", \_ -> simpleLang "fr")
+  , ("friulan", \_ -> simpleLang "fur")
+  , ("irish", \_ -> simpleLang "ga")
+  , ("scottish", \_ -> simpleLang "gd")
+  , ("ethiopic", \_ -> simpleLang "gez")
+  , ("galician", \_ -> simpleLang "gl")
+  , ("hebrew", \_ -> simpleLang "he")
+  , ("hindi", \_ -> simpleLang "hi")
+  , ("croatian", \_ -> simpleLang "hr")
+  , ("magyar", \_ -> simpleLang "hu")
+  , ("armenian", \_ -> simpleLang "hy")
+  , ("interlingua", \_ -> simpleLang "ia")
+  , ("indonesian", \_ -> simpleLang "id")
+  , ("icelandic", \_ -> simpleLang "is")
+  , ("italian", \_ -> simpleLang "it")
+  , ("japanese", \_ -> simpleLang "jp")
+  , ("khmer", \_ -> simpleLang "km")
+  , ("kurmanji", \_ -> simpleLang "kmr")
+  , ("kannada", \_ -> simpleLang "kn")
+  , ("korean", \_ -> simpleLang "ko")
+  , ("lao", \_ -> simpleLang "lo")
+  , ("lithuanian", \_ -> simpleLang "lt")
+  , ("latvian", \_ -> simpleLang "lv")
+  , ("malayalam", \_ -> simpleLang "ml")
+  , ("mongolian", \_ -> simpleLang "mn")
+  , ("marathi", \_ -> simpleLang "mr")
+  , ("dutch", \_ -> simpleLang "nl")
+  , ("nynorsk", \_ -> simpleLang "nn")
+  , ("norsk", \_ -> simpleLang "no")
+  , ("nko", \_ -> simpleLang "nqo")
+  , ("occitan", \_ -> simpleLang "oc")
+  , ("panjabi", \_ -> simpleLang "pa")
+  , ("polish", \_ -> simpleLang "pl")
+  , ("piedmontese", \_ -> simpleLang "pms")
+  , ("portuguese", \_ -> simpleLang "pt")
+  , ("romansh", \_ -> simpleLang "rm")
+  , ("romanian", \_ -> simpleLang "ro")
+  , ("russian", \_ -> simpleLang "ru")
+  , ("sanskrit", \_ -> simpleLang "sa")
+  , ("samin", \_ -> simpleLang "se")
+  , ("slovak", \_ -> simpleLang "sk")
+  , ("albanian", \_ -> simpleLang "sq")
+  , ("serbian", \_ -> simpleLang "sr")
+  , ("swedish", \_ -> simpleLang "sv")
+  , ("syriac", \_ -> simpleLang "syr")
+  , ("tamil", \_ -> simpleLang "ta")
+  , ("telugu", \_ -> simpleLang "te")
+  , ("thai", \_ -> simpleLang "th")
+  , ("turkmen", \_ -> simpleLang "tk")
+  , ("turkish", \_ -> simpleLang "tr")
+  , ("ukrainian", \_ -> simpleLang "uk")
+  , ("urdu", \_ -> simpleLang "ur")
+  , ("vietnamese", \_ -> simpleLang "vi")
   ]
 
+simpleLang :: Text -> Lang
+simpleLang l = Lang l Nothing Nothing [] [] []
+
 babelLangToBCP47 :: T.Text -> Maybe Lang
 babelLangToBCP47 s =
   case s of
-       "austrian" -> Just $ Lang "de" "" "AT" ["1901"]
-       "naustrian" -> Just $ Lang "de" "" "AT" []
-       "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"]
-       "nswissgerman" -> Just $ Lang "de" "" "CH" []
-       "german" -> Just $ Lang "de" "" "DE" ["1901"]
-       "ngerman" -> Just $ Lang "de" "" "DE" []
-       "lowersorbian" -> Just $ Lang "dsb" "" "" []
-       "uppersorbian" -> Just $ Lang "hsb" "" "" []
-       "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"]
-       "slovene" -> Just $ Lang "sl" "" "" []
-       "australian" -> Just $ Lang "en" "" "AU" []
-       "canadian" -> Just $ Lang "en" "" "CA" []
-       "british" -> Just $ Lang "en" "" "GB" []
-       "newzealand" -> Just $ Lang "en" "" "NZ" []
-       "american" -> Just $ Lang "en" "" "US" []
-       "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"]
+       "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] []
+       "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] []
+       "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] []
+       "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] []
+       "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] []
+       "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] []
+       "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] []
+       "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] []
+       "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] []
+       "slovene" -> Just $ simpleLang "sl"
+       "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] []
+       "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] []
+       "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] []
+       "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] []
+       "american" -> Just $ Lang "en" Nothing (Just "US") [] [] []
+       "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] []
        _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47
diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs
index b9ae0c13a..95de6b71f 100644
--- a/src/Text/Pandoc/Writers/BibTeX.hs
+++ b/src/Text/Pandoc/Writers/BibTeX.hs
@@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text
 writeBibTeX' variant opts (Pandoc meta _) = do
   let mblang = case lookupMetaString "lang" meta of
                  "" -> Nothing
-                 t  -> Just $ parseLang t
+                 t  -> either (const Nothing) Just $ parseLang t
   let refs = case lookupMeta "references" meta of
                Just (MetaList xs) -> mapMaybe metaValueToReference xs
                _ -> []
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 3c9975be8..f352c84bc 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import Text.Pandoc.BCP47
+import UnicodeCollation.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
@@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs
 -- https://tools.ietf.org/html/bcp47#section-2.1
 -- http://wiki.contextgarden.net/Language_Codes
 fromBCP47' :: Maybe Lang -> Maybe Text
-fromBCP47' (Just (Lang "ar" _ "SY" _)     ) = Just "ar-sy"
-fromBCP47' (Just (Lang "ar" _ "IQ" _)     ) = Just "ar-iq"
-fromBCP47' (Just (Lang "ar" _ "JO" _)     ) = Just "ar-jo"
-fromBCP47' (Just (Lang "ar" _ "LB" _)     ) = Just "ar-lb"
-fromBCP47' (Just (Lang "ar" _ "DZ" _)     ) = Just "ar-dz"
-fromBCP47' (Just (Lang "ar" _ "MA" _)     ) = Just "ar-ma"
-fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo"
-fromBCP47' (Just (Lang "de" _ "DE" _)     ) = Just "de-de"
-fromBCP47' (Just (Lang "de" _ "AT" _)     ) = Just "de-at"
-fromBCP47' (Just (Lang "de" _ "CH" _)     ) = Just "de-ch"
-fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr"
-fromBCP47' (Just (Lang "en" _ "US" _)     ) = Just "en-us"
-fromBCP47' (Just (Lang "en" _ "GB" _)     ) = Just "en-gb"
-fromBCP47' (Just (Lang "grc"_  _ _)       ) = Just "agr"
-fromBCP47' (Just (Lang "el" _ _ _)        ) = Just "gr"
-fromBCP47' (Just (Lang "eu" _ _ _)        ) = Just "ba"
-fromBCP47' (Just (Lang "he" _ _ _)        ) = Just "il"
-fromBCP47' (Just (Lang "jp" _ _ _)        ) = Just "ja"
-fromBCP47' (Just (Lang "uk" _ _ _)        ) = Just "ua"
-fromBCP47' (Just (Lang "vi" _ _ _)        ) = Just "vn"
-fromBCP47' (Just (Lang "zh" _ _ _)        ) = Just "cn"
-fromBCP47' (Just (Lang l _ _ _)           ) = Just l
-fromBCP47' Nothing                          = Nothing
+fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy"
+fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq"
+fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo"
+fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb"
+fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz"
+fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma"
+fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _))    = Just "deo"
+fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de"
+fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at"
+fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch"
+fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _))    = Just "agr"
+fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us"
+fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb"
+fromBCP47' (Just (Lang "grc"_ _ _ _ _))           = Just "agr"
+fromBCP47' (Just (Lang "el" _ _ _ _ _))           = Just "gr"
+fromBCP47' (Just (Lang "eu" _ _ _ _ _))           = Just "ba"
+fromBCP47' (Just (Lang "he" _ _ _ _ _))           = Just "il"
+fromBCP47' (Just (Lang "jp" _ _ _ _ _))           = Just "ja"
+fromBCP47' (Just (Lang "uk" _ _ _ _ _))           = Just "ua"
+fromBCP47' (Just (Lang "vi" _ _ _ _ _))           = Just "vn"
+fromBCP47' (Just (Lang "zh" _ _ _ _ _))           = Just "cn"
+fromBCP47' (Just (Lang l _ _ _ _ _))              = Just l
+fromBCP47' Nothing                                = Nothing
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
index a10def95e..395335667 100644
--- a/src/Text/Pandoc/Writers/CslJson.hs
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -34,15 +34,16 @@ import Control.Monad.Identity
 import Citeproc.Locale (getLocale)
 import Citeproc.CslJson
 import Text.Pandoc.Options (WriterOptions)
-import Data.Maybe (mapMaybe)
+import Data.Maybe (mapMaybe, fromMaybe)
 import Data.Aeson.Encode.Pretty         (Config (..), Indent (Spaces),
                                          NumberFormat (Generic),
                                          defConfig, encodePretty')
 
 writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeCslJson _opts (Pandoc meta _) = do
-  let lang = maybe (Lang "en" (Just "US")) parseLang
-               (lookupMeta "lang" meta >>= metaValueToText)
+  let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] [])
+               (lookupMeta "lang" meta >>= metaValueToText >>=
+                  either (const Nothing) Just . parseLang)
   locale <- case getLocale lang of
                Left e  -> throwError $ PandocCiteprocError e
                Right l -> return l
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 20bcd0324..7781df8e7 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import Text.Pandoc.BCP47 (getLang, renderLang)
+import UnicodeCollation.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 1c970e6ad..e99bad738 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 871b2692a..437b84120 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _)
 toPolyglossia (Lang "de" _ (Just "AT") _ _ _)  = ("german", "variant=austrian")
 toPolyglossia (Lang "de" _ (Just "CH") vars _ _)
   | "1901" `elem` vars        = ("german", "variant=swiss, spelling=old")
-toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss")
+toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss")
 toPolyglossia (Lang "de" _ _ _ _ _)           = ("german", "")
 toPolyglossia (Lang "dsb" _ _ _ _ _)          = ("lsorbian", "")
 toPolyglossia (Lang "el" _ _ vars _ _)
@@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _)          = ("greek",   "variant=ancient")
 toPolyglossia (Lang "hsb" _ _ _ _ _)          = ("usorbian", "")
 toPolyglossia (Lang "la" _ _ vars _ _)
   | "x-classic" `elem` vars                   = ("latin", "variant=classic")
-toPolyglossia (Lang "pt" _ "BR" _ _ _)        = ("portuguese", "variant=brazilian")
+toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian")
 toPolyglossia (Lang "sl" _ _ _ _ _)           = ("slovenian", "")
-toPolyglossia x                           = (commonFromBcp47 x, "")
+toPolyglossia x                               = (commonFromBcp47 x, "")
 
 -- Takes a list of the constituents of a BCP47 language code and
 -- converts it to a Babel language string.
@@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _)
   | "1901" `elem` vars                  = "german"
   | otherwise                           = "ngerman"
 toBabel (Lang "dsb" _ _ _ _ _)          = "lowersorbian"
-toBabel (Lang "el" _ _ vars)
+toBabel (Lang "el" _ _ vars _ _)
   | "polyton" `elem` vars               = "polutonikogreek"
 toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian"
 toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 101b236aa..6fd4cdeb4 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -16,6 +16,7 @@ import Codec.Archive.Zip
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
+import Data.Maybe (fromMaybe)
 import Data.Generics (everywhere', mkT)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
@@ -23,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
+import UnicodeCollation.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
@@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..))
 import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
-                                   fixDisplayMath)
+                                   fixDisplayMath, getLang)
 import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
@@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
                            = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (langRegion lang)
+                           = Attr n (fromMaybe "" $ langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index cf42f2228..6c265090c 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
+import UnicodeCollation.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
@@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of
 -- | Convert Pandoc document to string in OpenDocument format.
 writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text
 writeOpenDocument opts (Pandoc meta blocks) = do
-  let defLang = Lang "en" "US" "" []
+  let defLang = Lang "en" (Just "US") Nothing [] [] []
   lang <- case lookupMetaString "lang" meta of
             "" -> pure defLang
             s  -> fromMaybe defLang <$> toLang (Just s)
@@ -893,7 +893,7 @@ textStyleAttr m s
                     Map.insert "style:font-name-complex" "Courier New" $ m
     | Language lang <- s
                   = Map.insert "fo:language" (langLanguage lang) .
-                    Map.insert "fo:country" (langRegion lang) $ m
+                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
     | otherwise   = m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
@@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action =
   case lookup "lang" kvs of
        Nothing -> action
        Just l  ->
-         case parseBCP47 l of
+         case parseLang l of
               Right lang -> withTextStyle (Language lang) action
               Left _ -> do
                 report $ InvalidLang l
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index fcb47bd5a..a09d18571 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -149,7 +149,7 @@ defField field val (Context m) =
     f _newval oldval = oldval
 
 -- | Get the contents of the `lang` metadata field or variable.
-getLang :: WriterOptions -> Meta -> Maybe Text
+getLang :: WriterOptions -> Meta -> Maybe T.Text
 getLang opts meta =
   case lookupContext "lang" (writerVariables opts) of
         Just s -> Just s
-- 
cgit v1.2.3


From 7a7fefce5ef395f1c88db8b984618a4bd3c7d916 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 15 Apr 2021 17:45:28 -0700
Subject: Use document's lang for the lang parameter of citeproc...

even if it differs from localeLanguage.  (It is designed
to be possible to override the locale language, and this
is especially useful when one wants to use the unicode
extension syntx, e.g. fr-u-kb.)
---
 src/Text/Pandoc/Citeproc.hs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index c9f1806e4..2f4936fa6 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -74,8 +74,7 @@ processCitations (Pandoc meta bs) = do
 
   let linkCites = maybe False truish $ lookupMeta "link-citations" meta
   let opts = defaultCiteprocOptions{ linkCitations = linkCites }
-  let result = Citeproc.citeproc opts style (localeLanguage locale)
-                  refs citations
+  let result = Citeproc.citeproc opts style mblang refs citations
   mapM_ (report . CiteprocWarning) (resultWarnings result)
   let sopts = styleOptions style
   let classes = "references" : -- TODO remove this or keep for compatibility?
-- 
cgit v1.2.3


From a478a5c4c8753fd0bf272cd540ca197ae146a196 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 17 Apr 2021 11:47:54 -0700
Subject: Update to released unicode-collation, latest citeproc dev version.

Update citeproc test.
---
 cabal.project                           | 11 +----------
 src/Text/Pandoc/App.hs                  |  2 +-
 src/Text/Pandoc/Citeproc/Data.hs        |  2 +-
 src/Text/Pandoc/Class/CommonState.hs    |  2 +-
 src/Text/Pandoc/Class/PandocMonad.hs    |  2 +-
 src/Text/Pandoc/Readers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Readers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ConTeXt.hs      |  2 +-
 src/Text/Pandoc/Writers/Docbook.hs      |  2 +-
 src/Text/Pandoc/Writers/Docx.hs         |  2 +-
 src/Text/Pandoc/Writers/LaTeX.hs        |  2 +-
 src/Text/Pandoc/Writers/LaTeX/Lang.hs   |  2 +-
 src/Text/Pandoc/Writers/ODT.hs          |  2 +-
 src/Text/Pandoc/Writers/OpenDocument.hs |  2 +-
 stack.yaml                              |  5 ++---
 test/command/pandoc-citeproc-320a.md    |  8 ++++----
 16 files changed, 20 insertions(+), 30 deletions(-)

(limited to 'src')

diff --git a/cabal.project b/cabal.project
index fa17a20a6..77bc8ef88 100644
--- a/cabal.project
+++ b/cabal.project
@@ -5,14 +5,5 @@ flags: +embed_data_files
 source-repository-package
   type: git
   location: https://github.com/jgm/citeproc
-  tag: b42857be658b8f2649e989e061978e304986f853
+  tag: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 
-source-repository-package
-  type: git
-  location: https://github.com/jgm/unicode-collation
-  tag: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
-
--- source-repository-package
---  type: git
---  location: https://github.com/jgm/citeproc
---  tag: d44e24696ab444090d0e63e321c3a573f68b2e74
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 67d3cce7d..4e8c9f2ab 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
 import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs,
                                            options)
 import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings)
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
 import Text.Pandoc.PDF (makePDF)
 import Text.Pandoc.SelfContained (makeSelfContained)
diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs
index 388b9ba62..848a83a1e 100644
--- a/src/Text/Pandoc/Citeproc/Data.hs
+++ b/src/Text/Pandoc/Citeproc/Data.hs
@@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE
 import qualified Data.Text as T
 import Data.Text (Text)
 import Text.Pandoc.Citeproc.Util (toIETF)
-import UnicodeCollation.Lang (Lang(..), parseLang)
+import Text.Collate.Lang (Lang(..), parseLang)
 
 biblatexLocalizations :: [(FilePath, ByteString)]
 biblatexLocalizations = $(embedDir "citeproc/biblatex-localization")
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 0fd094d99..796a4afd5 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -19,7 +19,7 @@ where
 
 import Data.Default (Default (def))
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang)
+import Text.Collate.Lang (Lang)
 import Text.Pandoc.MediaBag (MediaBag)
 import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
 import Text.Pandoc.Translations (Translations)
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 76f1fa32b..7559cd7cd 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
 import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
-import UnicodeCollation.Lang (Lang(..), parseLang, renderLang)
+import Text.Collate.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 83caf742a..203dab83c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -33,7 +33,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import System.FilePath (addExtension, replaceExtension, takeExtension)
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocPure (PandocPure)
 import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath,
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index b92e6ab57..6a8327904 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -23,7 +23,7 @@ import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Shared (extractSpaces)
-import UnicodeCollation.Lang (Lang(..), renderLang)
+import Text.Collate.Lang (Lang(..), renderLang)
 import Text.Pandoc.Class (PandocMonad(..), setTranslations)
 import Text.Pandoc.Readers.LaTeX.Parsing
 import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..),
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index f352c84bc..f14b1d894 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Network.URI (unEscapeString)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 1f10c9d04..02b141250 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -493,4 +493,4 @@ isSectionAttr DocBook4 ("os",_) = True
 isSectionAttr DocBook4 ("revision",_) = True
 isSectionAttr DocBook4 ("security",_) = True
 isSectionAttr DocBook4 ("vendor",_) = True
-isSectionAttr _ (_,_) = False
\ No newline at end of file
+isSectionAttr _ (_,_) = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 7781df8e7..749ad9a21 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import UnicodeCollation.Lang (renderLang)
+import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e99bad738..8b1f3df1d 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -29,7 +29,7 @@ import qualified Data.Text as T
 import Network.URI (unEscapeString)
 import Text.DocTemplates (FromContext(lookupContext), renderTemplate,
                           Val(..), Context(..))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import Text.Pandoc.Definition
 import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
index 437b84120..0ba68b74e 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs
@@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang
     toBabel
   ) where
 import Data.Text (Text)
-import UnicodeCollation.Lang (Lang(..))
+import Text.Collate.Lang (Lang(..))
 
 
 -- In environments \Arabic instead of \arabic is used
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 6fd4cdeb4..e4eb4fd25 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -24,7 +24,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import Data.Time
 import System.FilePath (takeDirectory, takeExtension, (<.>))
-import UnicodeCollation.Lang (Lang (..), renderLang)
+import Text.Collate.Lang (Lang (..), renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 6c265090c..34a3a4aa5 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -25,7 +25,7 @@ import Data.Ord (comparing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import UnicodeCollation.Lang (Lang (..), parseLang)
+import Text.Collate.Lang (Lang (..), parseLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm,
                                       setTranslations, toLang)
 import Text.Pandoc.Definition
diff --git a/stack.yaml b/stack.yaml
index 159bf74b9..7bc33fa43 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -16,11 +16,10 @@ extra-deps:
 - texmath-0.12.2
 - random-1.2.0
 - xml-conduit-1.9.1.1
+- unicode-collation-0.1
 # - citeproc-0.3.0.9
 - git: https://github.com/jgm/citeproc
-  commit: b42857be658b8f2649e989e061978e304986f853
-- git: https://github.com/jgm/unicode-collation
-  commit: 9d229a5c6bcbaf53d7022575234eb223cfa90d55
+  commit: f9439e07e9271c7c2674a51efcad2fb8c663b2c8
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-17.5
diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md
index 1c3b47de0..79dacfa10 100644
--- a/test/command/pandoc-citeproc-320a.md
+++ b/test/command/pandoc-citeproc-320a.md
@@ -56,6 +56,10 @@ n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
 Uch, Ann. n.d.
 :::
 
+::: {#ref-item4 .csl-entry}
+'Udhrī, Jamīl al-. n.d.
+:::
+
 ::: {#ref-item1 .csl-entry}
 ʾUdhrī, Jamīl al-. n.d.
 :::
@@ -68,10 +72,6 @@ Uch, Ann. n.d.
 \'Udhrī, Jamīl al-. n.d.
 :::
 
-::: {#ref-item4 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
-:::
-
 ::: {#ref-item5 .csl-entry}
 'Udhrī, Jamīl al-. n.d.
 :::
-- 
cgit v1.2.3


From 73d394ca2adff31a384404ae25665b36c7d0bba0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 18 Apr 2021 22:01:12 -0700
Subject: Use MetaInlines not MetaBlocks for multimarkdown metadata fields.

This gives better results in converting to e.g. pandoc markdown.

Ref: <https://groups.google.com/d/msgid/pandoc-discuss/9728d1f4-040e-4392-aa04-148f648a8dfdn%40googlegroups.com>
---
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 test/command/mmd-metadata.md        | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/command/mmd-metadata.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 6c3947a81..4b20e3a8b 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -298,7 +298,7 @@ kvPair allowEmpty = try $ do
           (try $ newline >> lookAhead (blankline <|> nonspaceChar))
   guard $ allowEmpty || not (T.null val)
   let key' = T.concat $ T.words $ T.toLower key
-  let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
+  let val' = MetaInlines $ B.toList $ B.text val
   return (key',val')
 
 parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
diff --git a/test/command/mmd-metadata.md b/test/command/mmd-metadata.md
new file mode 100644
index 000000000..3cda34873
--- /dev/null
+++ b/test/command/mmd-metadata.md
@@ -0,0 +1,20 @@
+```
+% pandoc -f markdown_mmd -t markdown -s
+Title: Blah blah blah
+Author: Doo de Doo
+Base Header Level: 1
+Bibliography: Pubs.bib
+Lang: en-GB
+
+body
+^D
+---
+author: Doo de Doo
+baseheaderlevel: 1
+bibliography: Pubs.bib
+lang: en-GB
+title: Blah blah blah
+---
+
+body
+```
-- 
cgit v1.2.3


From 16d372abcb991caf26077c7f1cd0be4680894170 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 19 Apr 2021 08:38:31 -0700
Subject: Issue error message when  reader or writer format is malformed.

Previously we exited with an error status but (due to a bug)
no message.

Closes #7231.
---
 src/Text/Pandoc/Readers.hs | 6 +++---
 src/Text/Pandoc/Writers.hs | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index ac70f7d4c..7ae9db34f 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -65,6 +65,7 @@ import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Class
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -101,7 +102,6 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Parsec.Error
 
 data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
@@ -152,8 +152,8 @@ readers = [ ("native"       , TextReader readNative)
 getReader :: PandocMonad m => Text -> m (Reader m, Extensions)
 getReader s =
   case parseFormatSpec s of
-       Left e  -> throwError $ PandocAppError
-                    $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e]
+       Left e  -> throwError $ PandocAppError $
+                    "Error parsing reader format " <> tshow s <> ": " <> tshow e
        Right (readerName, extsToEnable, extsToDisable) ->
            case lookup readerName readers of
                    Nothing  -> throwError $ PandocUnknownReaderError
diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs
index 95d6270b5..c348477c2 100644
--- a/src/Text/Pandoc/Writers.hs
+++ b/src/Text/Pandoc/Writers.hs
@@ -81,6 +81,7 @@ import Data.Aeson
 import qualified Data.ByteString.Lazy as BL
 import Data.Text (Text)
 import qualified Data.Text as T
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.Class
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
@@ -122,7 +123,6 @@ import Text.Pandoc.Writers.Texinfo
 import Text.Pandoc.Writers.Textile
 import Text.Pandoc.Writers.XWiki
 import Text.Pandoc.Writers.ZimWiki
-import Text.Parsec.Error
 
 data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text)
               | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString)
@@ -196,8 +196,8 @@ writers = [
 getWriter :: PandocMonad m => Text -> m (Writer m, Extensions)
 getWriter s =
   case parseFormatSpec s of
-        Left e  -> throwError $ PandocAppError
-                    $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e]
+        Left e  -> throwError $ PandocAppError $
+                    "Error parsing writer format " <> tshow s <> ": " <> tshow e
         Right (writerName, extsToEnable, extsToDisable) ->
            case lookup writerName writers of
                    Nothing  -> throwError $
-- 
cgit v1.2.3


From 0b74bbbdaa643a473e26ea14d0a94efac6078d8c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 20 Apr 2021 10:54:46 +0200
Subject: Docx writer: extract Table handling into separate module

---
 pandoc.cabal                          |   2 +
 src/Text/Pandoc/Writers/Docx.hs       | 226 +---------------------------------
 src/Text/Pandoc/Writers/Docx/Table.hs | 114 +++++++++++++++++
 3 files changed, 121 insertions(+), 221 deletions(-)
 create mode 100644 src/Text/Pandoc/Writers/Docx/Table.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 744f3f095..14d2da0ae 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -657,6 +657,8 @@ library
                    Text.Pandoc.Readers.Metadata,
                    Text.Pandoc.Readers.Roff,
                    Text.Pandoc.Writers.Docx.StyleMap,
+                   Text.Pandoc.Writers.Docx.Table,
+                   Text.Pandoc.Writers.Docx.Types,
                    Text.Pandoc.Writers.JATS.References,
                    Text.Pandoc.Writers.JATS.Table,
                    Text.Pandoc.Writers.JATS.Types,
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 749ad9a21..7064ded09 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -22,7 +22,6 @@ import Control.Applicative ((<|>))
 import Control.Monad.Except (catchError, throwError)
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import Data.Char (isSpace, isLetter)
 import Data.List (intercalate, isPrefixOf, isSuffixOf)
@@ -47,123 +46,24 @@ import Text.Pandoc.Highlighting (highlight)
 import Text.Pandoc.Error
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
-import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType,
-                         getMimeTypeDef)
+import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef)
 import Text.Pandoc.Options
 import Text.Pandoc.Writers.Docx.StyleMap
+import Text.Pandoc.Writers.Docx.Table
+import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
-import Text.Printf (printf)
 import Text.TeXMath
 import Text.Pandoc.Writers.OOXML
 import Text.Pandoc.XML.Light as XML
 import Data.Generics (mkT, everywhere)
 
-data ListMarker = NoMarker
-                | BulletMarker
-                | NumberMarker ListNumberStyle ListNumberDelim Int
-                deriving (Show, Read, Eq, Ord)
-
-listMarkerToId :: ListMarker -> Text
-listMarkerToId NoMarker = "990"
-listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) = T.pack $
-  '9' : '9' : styNum : delimNum : show n
-  where styNum = case sty of
-                      DefaultStyle -> '2'
-                      Example      -> '3'
-                      Decimal      -> '4'
-                      LowerRoman   -> '5'
-                      UpperRoman   -> '6'
-                      LowerAlpha   -> '7'
-                      UpperAlpha   -> '8'
-        delimNum = case delim of
-                      DefaultDelim -> '0'
-                      Period       -> '1'
-                      OneParen     -> '2'
-                      TwoParens    -> '3'
-
-data EnvProps = EnvProps{ styleElement  :: Maybe Element
-                        , otherElements :: [Element]
-                        }
-
-instance Semigroup EnvProps where
-  EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
-
-instance Monoid EnvProps where
-  mempty = EnvProps Nothing []
-  mappend = (<>)
-
 squashProps :: EnvProps -> [Element]
 squashProps (EnvProps Nothing es) = es
 squashProps (EnvProps (Just e) es) = e : es
 
-data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
-                          , envParaProperties :: EnvProps
-                          , envRTL            :: Bool
-                          , envListLevel      :: Int
-                          , envListNumId      :: Int
-                          , envInDel          :: Bool
-                          , envChangesAuthor  :: Text
-                          , envChangesDate    :: Text
-                          , envPrintWidth     :: Integer
-                          }
-
-defaultWriterEnv :: WriterEnv
-defaultWriterEnv = WriterEnv{ envTextProperties = mempty
-                            , envParaProperties = mempty
-                            , envRTL = False
-                            , envListLevel = -1
-                            , envListNumId = 1
-                            , envInDel = False
-                            , envChangesAuthor  = "unknown"
-                            , envChangesDate    = "1969-12-31T19:00:00Z"
-                            , envPrintWidth     = 1
-                            }
-
-data WriterState = WriterState{
-         stFootnotes      :: [Element]
-       , stComments       :: [([(Text, Text)], [Inline])]
-       , stSectionIds     :: Set.Set Text
-       , stExternalLinks  :: M.Map Text Text
-       , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
-       , stLists          :: [ListMarker]
-       , stInsId          :: Int
-       , stDelId          :: Int
-       , stStyleMaps      :: StyleMaps
-       , stFirstPara      :: Bool
-       , stInTable        :: Bool
-       , stInList         :: Bool
-       , stTocTitle       :: [Inline]
-       , stDynamicParaProps :: Set.Set ParaStyleName
-       , stDynamicTextProps :: Set.Set CharStyleName
-       , stCurId          :: Int
-       }
-
-defaultWriterState :: WriterState
-defaultWriterState = WriterState{
-        stFootnotes      = defaultFootnotes
-      , stComments       = []
-      , stSectionIds     = Set.empty
-      , stExternalLinks  = M.empty
-      , stImages         = M.empty
-      , stLists          = [NoMarker]
-      , stInsId          = 1
-      , stDelId          = 1
-      , stStyleMaps      = StyleMaps M.empty M.empty
-      , stFirstPara      = False
-      , stInTable        = False
-      , stInList         = False
-      , stTocTitle       = [Str "Table of Contents"]
-      , stDynamicParaProps = Set.empty
-      , stDynamicTextProps = Set.empty
-      , stCurId          = 20
-      }
-
-type WS m = ReaderT WriterEnv (StateT WriterState m)
-
 renumIdMap :: Int -> [Element] -> M.Map Text Text
 renumIdMap _ [] = M.empty
 renumIdMap n (e:es)
@@ -858,12 +758,6 @@ separateTables (x@Table{}:xs@(Table{}:_)) =
   x : RawBlock (Format "openxml") "<w:p />" : separateTables xs
 separateTables (x:xs) = x : separateTables xs
 
-pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
-pStyleM styleName = do
-  pStyleMap <- gets (smParaStyle . stStyleMaps)
-  let sty' = getStyleIdFromName styleName pStyleMap
-  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
-
 rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
 rStyleM styleName = do
   cStyleMap <- gets (smCharStyle . stStyleMaps)
@@ -995,78 +889,8 @@ blockToOpenXML' _ HorizontalRule = do
     $ mknode "v:rect" [("style","width:0;height:1.5pt"),
                        ("o:hralign","center"),
                        ("o:hrstd","t"),("o:hr","t")] () ]
-blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
-  let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
-  setFirstPara
-  modify $ \s -> s { stInTable = True }
-  let captionStr = stringify caption
-  caption' <- if null caption
-                 then return []
-                 else withParaPropM (pStyleM "Table Caption")
-                      $ blockToOpenXML opts (Para caption)
-  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-  -- Table cells require a <w:p> element, even an empty one!
-  -- Not in the spec but in Word 2007, 2010. See #4953. And
-  -- apparently the last element must be a <w:p>, see #6983.
-  let cellToOpenXML (al, cell) = do
-        es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell
-        return $
-          case reverse (onlyElems es) of
-            b:e:_ | qName (elName b) == "bookmarkEnd"
-                  , qName (elName e) == "p" -> es
-            e:_   | qName (elName e) == "p" -> es
-            _ -> es ++ [Elem $ mknode "w:p" [] ()]
-  headers' <- mapM cellToOpenXML $ zip aligns headers
-  rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  compactStyle <- pStyleM "Compact"
-  let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell contents = mknode "w:tc" []
-                            $ if null contents
-                                 then emptyCell'
-                                 else contents
-  let mkrow cells =
-         mknode "w:tr" [] $
-           map mkcell cells
-  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
-  let fullrow = 5000 -- 100% specified in pct
-  let (rowwidth :: Int) = round $ fullrow * sum widths
-  let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
-  let hasHeader = not $ all null headers
-  modify $ \s -> s { stInTable = False }
-  -- for compatibility with Word <= 2007, we include a val with a bitmask
-  -- 0×0020  Apply first row conditional formatting
-  -- 0×0040  Apply last row conditional formatting
-  -- 0×0080  Apply first column conditional formatting
-  -- 0×0100  Apply last column conditional formatting
-  -- 0×0200  Do not apply row banding conditional formatting
-  -- 0×0400  Do not apply column banding conditional formattin
-  let tblLookVal :: Int
-      tblLookVal = if hasHeader then 0x20 else 0
-  return $
-    caption' ++
-    [Elem $
-     mknode "w:tbl" []
-      ( mknode "w:tblPr" []
-        (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
-            mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
-                               ,("w:lastRow","0")
-                               ,("w:firstColumn","0")
-                               ,("w:lastColumn","0")
-                               ,("w:noHBand","0")
-                               ,("w:noVBand","0")
-                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
-                               ] () :
-          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
-          | not (null caption) ] )
-      : mknode "w:tblGrid" []
-        (if all (==0) widths
-            then []
-            else map mkgridcol widths)
-      : [ mkrow headers' | hasHeader ] ++
-      map mkrow rows'
-      )]
+blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) =
+  tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
   | OrderedList (start, numstyle, numdelim) lst <- el
@@ -1121,13 +945,6 @@ listItemToOpenXML opts numid (first:rest) = do
   modify $ \st -> st{ stInList = oldInList }
   return $ first'' ++ rest''
 
-alignmentToString :: Alignment -> Text
-alignmentToString alignment = case alignment of
-                                 AlignLeft    -> "left"
-                                 AlignRight   -> "right"
-                                 AlignCenter  -> "center"
-                                 AlignDefault -> "left"
-
 -- | Convert a list of inline elements to OpenXML.
 inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content]
 inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
@@ -1138,10 +955,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid }
 asList :: (PandocMonad m) => WS m a -> WS m a
 asList = local $ \env -> env{ envListLevel = envListLevel env + 1 }
 
-isStyle :: Element -> Bool
-isStyle e = isElem [] "w" "rStyle" e ||
-            isElem [] "w" "pStyle" e
-
 getTextProps :: (PandocMonad m) => WS m [Element]
 getTextProps = do
   props <- asks envTextProperties
@@ -1170,16 +983,6 @@ getParaProps displayMathPara = do
                 [] -> []
                 ps -> [mknode "w:pPr" [] ps]
 
-withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
-withParaProp d p =
-  local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
-  where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
-
-withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
-withParaPropM md p = do
-  d <- md
-  withParaProp d p
-
 formattedString :: PandocMonad m => Text -> WS m [Element]
 formattedString str =
   -- properly handle soft hyphens
@@ -1200,9 +1003,6 @@ formattedRun els = do
   props <- getTextProps
   return [ mknode "w:r" [] $ props ++ els ]
 
-setFirstPara :: PandocMonad m => WS m ()
-setFirstPara =  modify $ \s -> s { stFirstPara = True }
-
 -- | Convert an inline element to OpenXML.
 inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content]
 inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il
@@ -1494,22 +1294,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
 br :: Element
 br = mknode "w:r" [] [mknode "w:br" [] ()]
 
--- Word will insert these footnotes into the settings.xml file
--- (whether or not they're visible in the document). If they're in the
--- file, but not in the footnotes.xml file, it will produce
--- problems. So we want to make sure we insert them into our document.
-defaultFootnotes :: [Element]
-defaultFootnotes = [ mknode "w:footnote"
-                     [("w:type", "separator"), ("w:id", "-1")]
-                     [ mknode "w:p" []
-                       [mknode "w:r" []
-                        [ mknode "w:separator" [] ()]]]
-                   , mknode "w:footnote"
-                     [("w:type", "continuationSeparator"), ("w:id", "0")]
-                     [ mknode "w:p" []
-                       [ mknode "w:r" []
-                         [ mknode "w:continuationSeparator" [] ()]]]]
-
 
 withDirection :: PandocMonad m => WS m a -> WS m a
 withDirection x = do
diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
new file mode 100644
index 000000000..a6b137fc4
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -0,0 +1,114 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{- |
+Module      : Text.Pandoc.Writers.Docx
+Copyright   : Copyright (C) 2012-2021 John MacFarlane
+License     : GNU GPL, version 2 or above
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Table
+  ( tableToOpenXML
+  ) where
+
+import Control.Monad.State.Strict
+import Data.Text (Text)
+import Text.Pandoc.Definition
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Writers.Docx.Types
+import Text.Pandoc.Shared
+import Text.Pandoc.Writers.Shared
+import Text.Printf (printf)
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import qualified Data.Text as T
+
+tableToOpenXML :: PandocMonad m
+               => ([Block] -> WS m [Content])
+               -> Caption
+               -> [ColSpec]
+               -> TableHead
+               -> [TableBody]
+               -> TableFoot
+               -> WS m [Content]
+tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
+  let (caption, aligns, widths, headers, rows) =
+        toLegacyTable blkCapt specs thead tbody tfoot
+  setFirstPara
+  modify $ \s -> s { stInTable = True }
+  let captionStr = stringify caption
+  caption' <- if null caption
+                 then return []
+                 else withParaPropM (pStyleM "Table Caption")
+                      $ blocksToOpenXML [Para caption]
+  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
+  -- Table cells require a <w:p> element, even an empty one!
+  -- Not in the spec but in Word 2007, 2010. See #4953. And
+  -- apparently the last element must be a <w:p>, see #6983.
+  let cellToOpenXML (al, cell) = do
+        es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell
+        return $
+          case reverse (onlyElems es) of
+            b:e:_ | qName (elName b) == "bookmarkEnd"
+                  , qName (elName e) == "p" -> es
+            e:_   | qName (elName e) == "p" -> es
+            _ -> es ++ [Elem $ mknode "w:p" [] ()]
+  headers' <- mapM cellToOpenXML $ zip aligns headers
+  rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
+  compactStyle <- pStyleM "Compact"
+  let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+  let mkcell contents = mknode "w:tc" []
+                            $ if null contents
+                                 then emptyCell'
+                                 else contents
+  let mkrow cells =
+         mknode "w:tr" [] $
+           map mkcell cells
+  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
+  let fullrow = 5000 -- 100% specified in pct
+  let (rowwidth :: Int) = round $ fullrow * sum widths
+  let mkgridcol w = mknode "w:gridCol"
+                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
+  let hasHeader = not $ all null headers
+  modify $ \s -> s { stInTable = False }
+  -- for compatibility with Word <= 2007, we include a val with a bitmask
+  -- 0×0020  Apply first row conditional formatting
+  -- 0×0040  Apply last row conditional formatting
+  -- 0×0080  Apply first column conditional formatting
+  -- 0×0100  Apply last column conditional formatting
+  -- 0×0200  Do not apply row banding conditional formatting
+  -- 0×0400  Do not apply column banding conditional formattin
+  let tblLookVal :: Int
+      tblLookVal = if hasHeader then 0x20 else 0
+  return $
+    caption' ++
+    [Elem $
+     mknode "w:tbl" []
+      ( mknode "w:tblPr" []
+        (   mknode "w:tblStyle" [("w:val","Table")] () :
+            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
+            mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
+                               ,("w:lastRow","0")
+                               ,("w:firstColumn","0")
+                               ,("w:lastColumn","0")
+                               ,("w:noHBand","0")
+                               ,("w:noVBand","0")
+                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
+                               ] () :
+          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+          | not (null caption) ] )
+      : mknode "w:tblGrid" []
+        (if all (==0) widths
+            then []
+            else map mkgridcol widths)
+      : [ mkrow headers' | hasHeader ] ++
+      map mkrow rows'
+      )]
+
+alignmentToString :: Alignment -> Text
+alignmentToString alignment = case alignment of
+                                 AlignLeft    -> "left"
+                                 AlignRight   -> "right"
+                                 AlignCenter  -> "center"
+                                 AlignDefault -> "left"
-- 
cgit v1.2.3


From dc0ba7294d9c40987b016ac886440a85ade8ae44 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 20 Apr 2021 13:38:16 +0200
Subject: Docx writer: add missing file

---
 src/Text/Pandoc/Writers/Docx/Types.hs | 181 ++++++++++++++++++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100644 src/Text/Pandoc/Writers/Docx/Types.hs

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
new file mode 100644
index 000000000..006584c30
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -0,0 +1,181 @@
+{-# LANGUAGE OverloadedStrings   #-}
+{- |
+Module      : Text.Pandoc.Writers.Docx
+Copyright   : Copyright (C) 2012-2021 John MacFarlane
+License     : GNU GPL, version 2 or above
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+
+Conversion of table blocks to docx.
+-}
+module Text.Pandoc.Writers.Docx.Types
+  ( EnvProps (..)
+  , WriterEnv (..)
+  , defaultWriterEnv
+  , WriterState (..)
+  , defaultWriterState
+  , WS
+  , ListMarker (..)
+  , listMarkerToId
+  , pStyleM
+  , isStyle
+  , setFirstPara
+  , withParaProp
+  , withParaPropM
+  ) where
+
+import Control.Applicative ((<|>))
+import Control.Monad.Reader
+import Control.Monad.State.Strict
+import Data.Text (Text)
+import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Definition
+import Text.Pandoc.MIME (MimeType)
+import Text.Pandoc.Writers.Docx.StyleMap
+import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import qualified Data.ByteString as B
+import qualified Data.Map as M
+import qualified Data.Set as Set
+import qualified Data.Text as T
+
+data ListMarker = NoMarker
+                | BulletMarker
+                | NumberMarker ListNumberStyle ListNumberDelim Int
+                deriving (Show, Read, Eq, Ord)
+
+listMarkerToId :: ListMarker -> Text
+listMarkerToId NoMarker = "990"
+listMarkerToId BulletMarker = "991"
+listMarkerToId (NumberMarker sty delim n) = T.pack $
+  '9' : '9' : styNum : delimNum : show n
+  where styNum = case sty of
+                      DefaultStyle -> '2'
+                      Example      -> '3'
+                      Decimal      -> '4'
+                      LowerRoman   -> '5'
+                      UpperRoman   -> '6'
+                      LowerAlpha   -> '7'
+                      UpperAlpha   -> '8'
+        delimNum = case delim of
+                      DefaultDelim -> '0'
+                      Period       -> '1'
+                      OneParen     -> '2'
+                      TwoParens    -> '3'
+
+
+data EnvProps = EnvProps{ styleElement  :: Maybe Element
+                        , otherElements :: [Element]
+                        }
+
+instance Semigroup EnvProps where
+  EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es')
+
+instance Monoid EnvProps where
+  mempty = EnvProps Nothing []
+  mappend = (<>)
+
+data WriterEnv = WriterEnv
+  { envTextProperties :: EnvProps
+  , envParaProperties :: EnvProps
+  , envRTL            :: Bool
+  , envListLevel      :: Int
+  , envListNumId      :: Int
+  , envInDel          :: Bool
+  , envChangesAuthor  :: Text
+  , envChangesDate    :: Text
+  , envPrintWidth     :: Integer
+  }
+
+defaultWriterEnv :: WriterEnv
+defaultWriterEnv = WriterEnv
+  { envTextProperties = mempty
+  , envParaProperties = mempty
+  , envRTL = False
+  , envListLevel = -1
+  , envListNumId = 1
+  , envInDel = False
+  , envChangesAuthor  = "unknown"
+  , envChangesDate    = "1969-12-31T19:00:00Z"
+  , envPrintWidth     = 1
+  }
+
+
+data WriterState = WriterState{
+         stFootnotes      :: [Element]
+       , stComments       :: [([(Text, Text)], [Inline])]
+       , stSectionIds     :: Set.Set Text
+       , stExternalLinks  :: M.Map Text Text
+       , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
+       , stLists          :: [ListMarker]
+       , stInsId          :: Int
+       , stDelId          :: Int
+       , stStyleMaps      :: StyleMaps
+       , stFirstPara      :: Bool
+       , stInTable        :: Bool
+       , stInList         :: Bool
+       , stTocTitle       :: [Inline]
+       , stDynamicParaProps :: Set.Set ParaStyleName
+       , stDynamicTextProps :: Set.Set CharStyleName
+       , stCurId          :: Int
+       }
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+        stFootnotes      = defaultFootnotes
+      , stComments       = []
+      , stSectionIds     = Set.empty
+      , stExternalLinks  = M.empty
+      , stImages         = M.empty
+      , stLists          = [NoMarker]
+      , stInsId          = 1
+      , stDelId          = 1
+      , stStyleMaps      = StyleMaps M.empty M.empty
+      , stFirstPara      = False
+      , stInTable        = False
+      , stInList         = False
+      , stTocTitle       = [Str "Table of Contents"]
+      , stDynamicParaProps = Set.empty
+      , stDynamicTextProps = Set.empty
+      , stCurId          = 20
+      }
+
+setFirstPara :: PandocMonad m => WS m ()
+setFirstPara =  modify $ \s -> s { stFirstPara = True }
+
+type WS m = ReaderT WriterEnv (StateT WriterState m)
+
+-- Word will insert these footnotes into the settings.xml file
+-- (whether or not they're visible in the document). If they're in the
+-- file, but not in the footnotes.xml file, it will produce
+-- problems. So we want to make sure we insert them into our document.
+defaultFootnotes :: [Element]
+defaultFootnotes = [ mknode "w:footnote"
+                     [("w:type", "separator"), ("w:id", "-1")]
+                     [ mknode "w:p" []
+                       [mknode "w:r" []
+                        [ mknode "w:separator" [] ()]]]
+                   , mknode "w:footnote"
+                     [("w:type", "continuationSeparator"), ("w:id", "0")]
+                     [ mknode "w:p" []
+                       [ mknode "w:r" []
+                         [ mknode "w:continuationSeparator" [] ()]]]]
+
+pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
+pStyleM styleName = do
+  pStyleMap <- gets (smParaStyle . stStyleMaps)
+  let sty' = getStyleIdFromName styleName pStyleMap
+  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
+
+withParaProp :: PandocMonad m => Element -> WS m a -> WS m a
+withParaProp d p =
+  local (\env -> env {envParaProperties = ep <> envParaProperties env}) p
+  where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d]
+
+withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a
+withParaPropM md p = do
+  d <- md
+  withParaProp d p
+
+isStyle :: Element -> Bool
+isStyle e = isElem [] "w" "rStyle" e ||
+            isElem [] "w" "pStyle" e
-- 
cgit v1.2.3


From 547bc2cdf83b8be926de55521674c0e8fab12db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 25 Apr 2021 10:31:33 -0700
Subject: Add quotes properly in markdown YAML metadata fields.

This fixes a bug, which caused the writer to look at the LAST
rather than the FIRST character in determining whether quotes
were needed.  So we got spurious quotes in some cases and
didn't get necessary quotes in others.

Closes #7245.  Updated a number of test cases accordingly.
---
 src/Text/Pandoc/Writers/Markdown.hs           | 11 +++++------
 test/command/1279.md                          |  2 +-
 test/command/biblatex-inproceedings.md        |  2 +-
 test/command/biblatex-jaffe.md                |  4 ++--
 test/command/biblatex-moraux.md               |  2 +-
 test/command/biblatex-quotes.md               |  2 +-
 test/command/biblatex-spiegelberg.md          |  4 ++--
 test/command/biblatex-test-case-conversion.md |  4 ++--
 test/command/biblatex-textnormal.md           |  2 +-
 9 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index daf45ed53..3295d9e6c 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -26,7 +26,7 @@ import Data.Default
 import Data.List (intersperse, sortOn, transpose)
 import Data.List.NonEmpty (nonEmpty, NonEmpty(..))
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Maybe (fromMaybe, mapMaybe, isNothing)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -142,18 +142,17 @@ valToYaml (SimpleVal x)
   | otherwise =
       if hasNewlines x
          then hang 0 ("|" <> cr) x
-         else if fst $ foldr needsDoubleQuotes (False, True) x
+         else if isNothing $ foldM needsDoubleQuotes True x
            then "\"" <> fmap escapeInDoubleQuotes x <> "\""
            else x
     where
-      needsDoubleQuotes t (positive, isFirst)
+      needsDoubleQuotes isFirst t
         = if T.any isBadAnywhere t ||
              (isFirst && T.any isYamlPunct (T.take 1 t))
-              then (True, False)
-              else (positive, False)
+              then Nothing
+              else Just False
       isBadAnywhere '#' = True
       isBadAnywhere ':' = True
-      isBadAnywhere '`' = False
       isBadAnywhere _   = False
       hasNewlines NewLine = True
       hasNewlines BlankLines{} = True
diff --git a/test/command/1279.md b/test/command/1279.md
index acad6859e..d3572f245 100644
--- a/test/command/1279.md
+++ b/test/command/1279.md
@@ -9,7 +9,7 @@ title: My Article
 [^1]: Dept. of This and That
 ^D
 ---
-author: "John Doe[^1]"
+author: John Doe[^1]
 date: 2014
 title: My Article
 ---
diff --git a/test/command/biblatex-inproceedings.md b/test/command/biblatex-inproceedings.md
index 29a1e0298..76b584800 100644
--- a/test/command/biblatex-inproceedings.md
+++ b/test/command/biblatex-inproceedings.md
@@ -63,7 +63,7 @@ references:
   publisher-place: Cambridge
   title: "Le *De Anima* dans la tradition grècque: Quelques aspects de
     l'interpretation du traité, de Theophraste à Themistius"
-  title-short: *De Anima* dans la tradition grècque
+  title-short: "*De Anima* dans la tradition grècque"
   type: paper-conference
 - author:
   - family: Salam
diff --git a/test/command/biblatex-jaffe.md b/test/command/biblatex-jaffe.md
index f172c6ae7..e0381772b 100644
--- a/test/command/biblatex-jaffe.md
+++ b/test/command/biblatex-jaffe.md
@@ -72,8 +72,8 @@ references:
   language: la
   number-of-volumes: 2
   publisher-place: Leipzig
-  title: "Regesta Pontificum Romanorum ab condita ecclesia ad annum post
-    Christum natum [mcxcviii]{.smallcaps}"
+  title: Regesta Pontificum Romanorum ab condita ecclesia ad annum post
+    Christum natum [mcxcviii]{.smallcaps}
   title-short: Regesta Pontificum Romanorum
   type: book
 ---
diff --git a/test/command/biblatex-moraux.md b/test/command/biblatex-moraux.md
index 8a1b084f8..1624ad960 100644
--- a/test/command/biblatex-moraux.md
+++ b/test/command/biblatex-moraux.md
@@ -86,7 +86,7 @@ references:
   publisher-place: Cambridge
   title: "Le *De Anima* dans la tradition grècque: Quelques aspects de
     l'interpretation du traité, de Theophraste à Themistius"
-  title-short: *De Anima* dans la tradition grècque
+  title-short: "*De Anima* dans la tradition grècque"
   type: paper-conference
 ---
 
diff --git a/test/command/biblatex-quotes.md b/test/command/biblatex-quotes.md
index 9d868cb76..50055316f 100644
--- a/test/command/biblatex-quotes.md
+++ b/test/command/biblatex-quotes.md
@@ -36,7 +36,7 @@ references:
   language: en-US
   publisher: Princeton University Press
   publisher-place: Princeton
-  title: "Aristotle's \"De Motu Animalium\""
+  title: Aristotle's "De Motu Animalium"
   type: book
 ---
 
diff --git a/test/command/biblatex-spiegelberg.md b/test/command/biblatex-spiegelberg.md
index 59d651958..59338ae88 100644
--- a/test/command/biblatex-spiegelberg.md
+++ b/test/command/biblatex-spiegelberg.md
@@ -63,8 +63,8 @@ references:
   issued: 1969
   language: de-DE
   page: 189-216
-  title: "Intention" und "Intentionalität" in der Scholastik, bei
-    Brentano und Husserl
+  title: "\"Intention\" und \"Intentionalität\" in der Scholastik, bei
+    Brentano und Husserl"
   title-short: Intention und Intentionalität
   type: article-journal
   volume: 29
diff --git a/test/command/biblatex-test-case-conversion.md b/test/command/biblatex-test-case-conversion.md
index aa30767ec..e69cb9336 100644
--- a/test/command/biblatex-test-case-conversion.md
+++ b/test/command/biblatex-test-case-conversion.md
@@ -63,9 +63,9 @@ references:
   id: item1
   issued: 2013
   language: en-US
-  title: "A title, in English, with a Proper Name and an ACRONYM and a
+  title: A title, in English, with a Proper Name and an ACRONYM and a
     [camelCase]{.nocase} word and some units, 400 [nm]{.nocase},
-    3 [cm]{.nocase}, and a quote, *Alea [iacta est]{.nocase}*"
+    3 [cm]{.nocase}, and a quote, *Alea [iacta est]{.nocase}*
   type: article-journal
 ---
 
diff --git a/test/command/biblatex-textnormal.md b/test/command/biblatex-textnormal.md
index 0aa5550c3..a4e6107ec 100644
--- a/test/command/biblatex-textnormal.md
+++ b/test/command/biblatex-textnormal.md
@@ -9,7 +9,7 @@
 nocite: "[@*]"
 references:
 - id: item1
-  title: "The title [of this book]{.nodecor}"
+  title: The title [of this book]{.nodecor}
   type: book
 ---
 
-- 
cgit v1.2.3


From e9c0f9f97ba6459530c7bb2ffb55d432a1ba7884 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sun, 25 Apr 2021 19:36:06 +0200
Subject: Markdown writer: Cleaner (code)blocks with single class (#7242)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a block only has a single class and no other attributes,
it is not necessary to wrap the class attribute in curly braces –
the class name can be placed after the opening mark as is.

This will result in bit cleaner output when pandoc is used
as a markdown pretty-printer.
---
 src/Text/Pandoc/Writers/Markdown.hs | 10 ++++++++--
 test/command/5304.md                |  2 +-
 test/command/5519.md                |  2 +-
 test/command/6925.md                |  4 ++--
 test/command/toc.md                 |  4 ++--
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 3295d9e6c..2ad9eabd9 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -278,6 +278,12 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
               escAttrChar '\\' = literal "\\\\"
               escAttrChar c    = literal $ T.singleton c
 
+-- | (Code) blocks with a single class can just use it standalone,
+-- no need to bother with curly braces.
+classOrAttrsToMarkdown :: Attr -> Doc Text
+classOrAttrsToMarkdown ("",[cls],_) = literal cls
+classOrAttrsToMarkdown attrs = attrsToMarkdown attrs
+
 linkAttributes :: WriterOptions -> Attr -> Doc Text
 linkAttributes opts attr =
   if isEnabled Ext_link_attributes opts && attr /= nullAttr
@@ -343,7 +349,7 @@ blockToMarkdown' opts (Div attrs ils) = do
     case () of
          _ | isEnabled Ext_fenced_divs opts &&
              attrs /= nullAttr ->
-                nowrap (literal ":::" <+> attrsToMarkdown attrs) $$
+                nowrap (literal ":::" <+> classOrAttrsToMarkdown attrs) $$
                 chomp contents $$
                 literal ":::" <> blankline
            | isEnabled Ext_native_divs opts ||
@@ -512,7 +518,7 @@ blockToMarkdown' opts (CodeBlock attribs str) = do
      backticks = endline '`'
      tildes = endline '~'
      attrs  = if isEnabled Ext_fenced_code_attributes opts
-                 then nowrap $ " " <> attrsToMarkdown attribs
+                 then nowrap $ " " <> classOrAttrsToMarkdown attribs
                  else case attribs of
                             (_,cls:_,_) -> " " <> literal cls
                             _             -> empty
diff --git a/test/command/5304.md b/test/command/5304.md
index 62b2b9ddd..70f32a96a 100644
--- a/test/command/5304.md
+++ b/test/command/5304.md
@@ -11,7 +11,7 @@
 ... 
 ```
 ^D
-``` {.markdown}
+``` markdown
 `«sträng»`
 
 `` «sträng» ``
diff --git a/test/command/5519.md b/test/command/5519.md
index a175ce9f9..ecde184f1 100644
--- a/test/command/5519.md
+++ b/test/command/5519.md
@@ -6,7 +6,7 @@
  ```
 ``````
 ^D
-```` {.attr}
+```` attr
  ```
  code
  ```
diff --git a/test/command/6925.md b/test/command/6925.md
index 458a0b91d..e0d8e6870 100644
--- a/test/command/6925.md
+++ b/test/command/6925.md
@@ -20,13 +20,13 @@ a
 \end{thm2}
 \end{document}
 ^D
-::: {.thm}
+::: thm
 **Theorem 1**. *a*
 
 ![image](1.png)
 :::
 
-::: {.thm2}
+::: thm2
 **Theorem 1**. a
 
 ![image](1.png)
diff --git a/test/command/toc.md b/test/command/toc.md
index 794af7690..543f97ba9 100644
--- a/test/command/toc.md
+++ b/test/command/toc.md
@@ -31,7 +31,7 @@
 
 ## b
 
-::: {.interior}
+::: interior
 # C
 
 ## cc
@@ -39,7 +39,7 @@
 # D
 :::
 
-::: {.blue}
+::: blue
 # E
 
 ## e
-- 
cgit v1.2.3


From c56d080a253c1364ca55ae97b8d43ed638acf723 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sun, 25 Apr 2021 21:19:07 +0200
Subject: Writers: Recognize custom syntax definitions (#7241)

Languages defined using `--syntax-definition` were not recognized by `languagesByExtension`.
This patch corrects that, allowing the writers to see all custom definitions.

The LaTeX still uses the default syntax map, but that's okay in that context, since
`--syntax-definition` won't create new listings styles.
---
 src/Text/Pandoc/Highlighting.hs    | 10 +++++-----
 src/Text/Pandoc/Readers/LaTeX.hs   |  3 ++-
 src/Text/Pandoc/Writers/Docbook.hs |  7 ++++---
 src/Text/Pandoc/Writers/JATS.hs    | 24 +++++++++++++-----------
 src/Text/Pandoc/Writers/TEI.hs     |  7 ++++---
 5 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 0bb6ed319..62a261e50 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -52,12 +52,12 @@ highlightingStyles =
    ("breezedark", breezeDark),
    ("haddock", haddock)]
 
-languages :: [T.Text]
-languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap]
+languages :: SyntaxMap -> [T.Text]
+languages syntaxmap = [T.toLower (sName s) | s <- M.elems syntaxmap]
 
-languagesByExtension :: T.Text -> [T.Text]
-languagesByExtension ext =
-  [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)]
+languagesByExtension :: SyntaxMap -> T.Text -> [T.Text]
+languagesByExtension syntaxmap ext =
+  [T.toLower (sName s) | s <- syntaxesByExtension syntaxmap (T.unpack ext)]
 
 highlight :: SyntaxMap
           -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 203dab83c..19c257a48 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -32,6 +32,7 @@ import Data.Maybe (fromMaybe, maybeToList)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
+import Skylighting (defaultSyntaxMap)
 import System.FilePath (addExtension, replaceExtension, takeExtension)
 import Text.Collate.Lang (renderLang)
 import Text.Pandoc.Builder as B
@@ -1170,7 +1171,7 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 02b141250..25bd308bf 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -253,17 +253,18 @@ blockToDocbook opts (LineBlock lns) =
   blockToDocbook opts $ linesToPara lns
 blockToDocbook opts (BlockQuote blocks) =
   inTagsIndented "blockquote" <$> blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) = return $
+blockToDocbook opts (CodeBlock (_,classes,_) str) = return $
   literal ("<programlisting" <> lang <> ">") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>")
     where lang  = if null langs
                      then ""
                      else " language=\"" <> escapeStringForXML (head langs) <>
                           "\""
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToDocbook opts (BulletList lst) = do
   let attribs = [("spacing", "compact") | isTightList lst]
diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index b58ff8aef..0bcfa0df4 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -224,19 +224,21 @@ imageMimeType src kvs =
                   (T.drop 1 . T.dropWhile (/='/') <$> mbMT)
   in (maintype, subtype)
 
-languageFor :: [Text] -> Text
-languageFor classes =
+languageFor :: WriterOptions -> [Text] -> Text
+languageFor opts classes =
   case langs of
      (l:_) -> escapeStringForXML l
      []    -> ""
-    where isLang l    = T.toLower l `elem` map T.toLower languages
+    where
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 
-codeAttr :: Attr -> (Text, [(Text, Text)])
-codeAttr (ident,classes,kvs) = (lang, attr)
+codeAttr :: WriterOptions -> Attr -> (Text, [(Text, Text)])
+codeAttr opts (ident,classes,kvs) = (lang, attr)
     where
        attr = [("id", escapeNCName ident) | not (T.null ident)] ++
               [("language",lang) | not (T.null lang)] ++
@@ -244,7 +246,7 @@ codeAttr (ident,classes,kvs) = (lang, attr)
                 "code-version", "executable",
                 "language-version", "orientation",
                     "platforms", "position", "specific-use"]]
-       lang  = languageFor classes
+       lang  = languageFor opts classes
 
 -- | Convert a Pandoc block element to JATS.
 blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text)
@@ -330,9 +332,9 @@ blockToJATS opts (BlockQuote blocks) = do
                     HorizontalRule -> True
                     _              -> False
   inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks
-blockToJATS _ (CodeBlock a str) = return $
+blockToJATS opts (CodeBlock a str) = return $
   inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str)))
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "preformat" else "code"
 blockToJATS _ (BulletList []) = return empty
 blockToJATS opts (BulletList lst) =
@@ -412,9 +414,9 @@ inlineToJATS opts (Quoted SingleQuote lst) = do
 inlineToJATS opts (Quoted DoubleQuote lst) = do
   contents <- inlinesToJATS opts lst
   return $ char '“' <> contents <> char '”'
-inlineToJATS _ (Code a str) =
+inlineToJATS opts (Code a str) =
   return $ inTags False tag attr $ literal (escapeStringForXML str)
-    where (lang, attr) = codeAttr a
+    where (lang, attr) = codeAttr opts a
           tag          = if T.null lang then "monospace" else "code"
 inlineToJATS _ il@(RawInline f x)
   | f == "jats" = return $ literal x
diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs
index b926c48a1..18015259d 100644
--- a/src/Text/Pandoc/Writers/TEI.hs
+++ b/src/Text/Pandoc/Writers/TEI.hs
@@ -146,16 +146,17 @@ blockToTEI opts (LineBlock lns) =
   blockToTEI opts $ linesToPara lns
 blockToTEI opts (BlockQuote blocks) =
   inTagsIndented "quote" <$> blocksToTEI opts blocks
-blockToTEI _ (CodeBlock (_,classes,_) str) =
+blockToTEI opts (CodeBlock (_,classes,_) str) =
   return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <>
      flush (literal (escapeStringForXML str) <> cr <> text "</ab>")
     where lang  = if null langs
                      then ""
                      else escapeStringForXML (head langs)
-          isLang l    = T.toLower l `elem` map T.toLower languages
+          syntaxMap = writerSyntaxMap opts
+          isLang l    = T.toLower l `elem` map T.toLower (languages syntaxMap)
           langsFrom s = if isLang s
                            then [s]
-                           else languagesByExtension . T.toLower $ s
+                           else (languagesByExtension syntaxMap) . T.toLower $ s
           langs       = concatMap langsFrom classes
 blockToTEI opts (BulletList lst) = do
   let attribs = [("type", "unordered")]
-- 
cgit v1.2.3


From 3a98f7a0c767c3b13c7b358ca75d2bddab7cd4d2 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 25 Apr 2021 12:21:00 -0700
Subject: Minor code reformatting.

Also taking this opportunity to note, for the record, that
the commit for #7241 should be marked [API change].
It changes the type of `languagesByExtension` in Highlighting,
adding a parameter for a `SyntaxMap`.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 19c257a48..9ad168293 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1171,7 +1171,8 @@ inputListing = do
   let (ident,classes,kvs) = parseListingsOptions options
   let classes' =
         (case listingsLanguage options of
-           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>)
+           Nothing -> (take 1 (languagesByExtension defaultSyntaxMap
+                                (T.pack $ takeExtension $ T.unpack f)) <>)
            Just _  -> id) classes
   let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
   let lastline = fromMaybe (length codeLines) $
-- 
cgit v1.2.3


From 0921b82d98b6ec7fa80ffd522c129b3828b9c00b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 26 Apr 2021 18:07:01 +0200
Subject: Docx writer: autoset table width if no column has an explicit width.

---
 src/Text/Pandoc/Writers/Docx/Table.hs      |  18 +++++++++++-------
 test/docx/golden/table_one_row.docx        | Bin 9903 -> 9904 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10212 bytes
 test/docx/golden/tables.docx               | Bin 10238 -> 10239 bytes
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index a6b137fc4..349f3a4ce 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-Module      : Text.Pandoc.Writers.Docx
+Module      : Text.Pandoc.Writers.Docx.Table
 Copyright   : Copyright (C) 2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -87,7 +88,10 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
      mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
+            mknode "w:tblW" (if all (== 0) widths
+                              then [("w:type", "auto"), ("w:w", "0")]
+                              else [("w:type", "pct"), ("w:w", tshow rowwidth)])
+                            () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
@@ -107,8 +111,8 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
       )]
 
 alignmentToString :: Alignment -> Text
-alignmentToString alignment = case alignment of
-                                 AlignLeft    -> "left"
-                                 AlignRight   -> "right"
-                                 AlignCenter  -> "center"
-                                 AlignDefault -> "left"
+alignmentToString = \case
+  AlignLeft    -> "left"
+  AlignRight   -> "right"
+  AlignCenter  -> "center"
+  AlignDefault -> "left"
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 6eaea2ac2..cab3fc31c 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 45a97ccaa..9238c7e20 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 115a16a48..6f0379def 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 85f379e474be72ae0f7a53ebc5efe2ad4f8165b4 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 28 Apr 2021 12:46:52 +0200
Subject: JATS writer: use either styled-content or named-content for spans.

If the element has a content-type attribute, or at least one class, then
that value is used as `content-type` and the span is put inside a
`<named-content>` element. Otherwise a `<styled-content>` element is
used instead.

Closes: #7211
---
 src/Text/Pandoc/Writers/JATS.hs | 36 ++++++++++++++++++++++++++----------
 test/Tests/Writers/JATS.hs      | 14 +++++++++-----
 2 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs
index 0bcfa0df4..9db8723d1 100644
--- a/src/Text/Pandoc/Writers/JATS.hs
+++ b/src/Text/Pandoc/Writers/JATS.hs
@@ -21,12 +21,13 @@ module Text.Pandoc.Writers.JATS
   , writeJatsPublishing
   , writeJatsArticleAuthoring
   ) where
+import Control.Applicative ((<|>))
 import Control.Monad.Reader
 import Control.Monad.State
 import Data.Generics (everywhere, mkT)
 import Data.List (partition)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, listToMaybe)
 import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime)
 import qualified Data.Text as T
 import Data.Text (Text)
@@ -449,18 +450,33 @@ inlineToJATS opts (Note contents) = do
              $ text (show notenum)
 inlineToJATS opts (Cite _ lst) =
   inlinesToJATS opts lst
-inlineToJATS opts (Span (ident,_,kvs) ils) = do
+inlineToJATS opts (Span (ident,classes,kvs) ils) = do
   contents <- inlinesToJATS opts ils
-  let attr = [("id", escapeNCName ident) | not (T.null ident)] ++
-             [("xml:lang",l) | ("lang",l) <- kvs] ++
-             [(k,v) | (k,v) <- kvs
-                    ,  k `elem` ["alt", "content-type", "rid", "specific-use",
-                                 "vocab", "vocab-identifier", "vocab-term",
-                                 "vocab-term-identifier"]]
+  let commonAttr = [("id", escapeNCName ident) | not (T.null ident)] ++
+                   [("xml:lang",l) | ("lang",l) <- kvs] ++
+                   [(k,v) | (k,v) <- kvs,  k `elem` ["alt", "specific-use"]]
+  -- A named-content element is a good fit for spans, but requires a
+  -- content-type attribute to be present. We use either the explicit
+  -- attribute or the first class as content type. If neither is
+  -- available, then we fall back to using a @styled-content@ element.
+  let (tag, specificAttr) =
+        case lookup "content-type" kvs <|> listToMaybe classes of
+          Just ct -> ( "named-content"
+                     , ("content-type", ct) :
+                       [(k, v) | (k, v) <- kvs
+                       , k `elem` ["rid", "vocab", "vocab-identifier",
+                                   "vocab-term", "vocab-term-identifier"]])
+          -- Fall back to styled-content
+          Nothing -> ("styled-content"
+                     , [(k, v) | (k,v) <- kvs
+                       , k `elem` ["style", "style-type", "style-detail",
+                                   "toggle"]])
+  let attr = commonAttr ++ specificAttr
+  -- unwrap if wrapping element would have no attributes
   return $
     if null attr
-    then contents  -- unwrap if no relevant attributes are given
-    else inTags False "named-content" attr contents
+    then contents
+    else inTags False tag attr contents
 inlineToJATS _ (Math t str) = do
   let addPref (Xml.Attr q v)
          | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v
diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs
index e90438176..5b96ed2ed 100644
--- a/test/Tests/Writers/JATS.hs
+++ b/test/Tests/Writers/JATS.hs
@@ -148,13 +148,17 @@ tests =
       spanWith nullAttr "text in span" =?>
       "<p>text in span</p>"
 
-    , "converted to named-content element" =:
-      spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?>
-      "<p><named-content id=\"a\" alt=\"aa\">text</named-content></p>"
+    , "converted to named-content element if class given" =:
+      spanWith ("a", ["genus-species"], [("alt", "aa")]) "C. elegans" =?>
+      ("<p><named-content id=\"a\" alt=\"aa\" content-type=\"genus-species\">"
+       <> "C. elegans</named-content></p>")
 
-    , "unwrapped if named-content element would have no attributes" =:
-      spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?>
+    , "unwrapped if styled-content element would have no attributes" =:
+      spanWith ("", [], [("hidden", "true")]) "text in span" =?>
       "<p>text in span</p>"
 
+    , "use content-type attribute if present" =:
+      spanWith ("", [], [("content-type", "species")]) "E. coli" =?>
+      "<p><named-content content-type=\"species\">E. coli</named-content></p>"
     ]
   ]
-- 
cgit v1.2.3


From 80e2e88287f43d88ea92a77779b25e161c81f67b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 28 Apr 2021 23:30:16 -0700
Subject: Smarter smart quotes.

Treat a leading " with no closing " as a left curly quote.
This supports the practice, in fiction, of continuing
paragraphs quoting the same speaker without an end quote.
It also helps with quotes that break over lines in line
blocks.

Closes #7216.
---
 src/Text/Pandoc/Parsing.hs           | 45 +++++++++++++++++++++++-------------
 src/Text/Pandoc/Readers/Markdown.hs  | 18 ++++++++-------
 src/Text/Pandoc/Readers/RST.hs       | 19 +--------------
 src/Text/Pandoc/Readers/TWiki.hs     | 22 +-----------------
 test/Tests/Readers/Markdown.hs       |  2 +-
 test/command/7216.md                 | 19 +++++++++++++++
 test/command/pandoc-citeproc-320a.md | 12 +++++-----
 7 files changed, 67 insertions(+), 70 deletions(-)
 create mode 100644 test/command/7216.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 847fd2e05..2f6189104 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -105,8 +105,9 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              singleQuoteEnd,
                              doubleQuoteStart,
                              doubleQuoteEnd,
-                             ellipses,
                              apostrophe,
+                             doubleCloseQuote,
+                             ellipses,
                              dash,
                              nested,
                              citeKey,
@@ -1398,10 +1399,7 @@ smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
-  choice [ quoted inlineParser, apostrophe, dash, ellipses ]
-
-apostrophe :: Stream s m Char => ParserT s st m Inlines
-apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019")
+  choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
 quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
        => ParserT s st m Inlines
@@ -1411,16 +1409,22 @@ quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-singleQuoted inlineParser = try $ B.singleQuoted . mconcat
-  <$  singleQuoteStart
-  <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)
-
-doubleQuoted :: (HasQuoteContext st m, Stream s m Char)
+singleQuoted inlineParser = do
+  singleQuoteStart
+  (B.singleQuoted . mconcat <$>
+    try
+     (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
+   <|> pure "\8217"
+
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
-doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat
-  <$  doubleQuoteStart
-  <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)
+doubleQuoted inlineParser = do
+  doubleQuoteStart
+  (B.doubleQuoted . mconcat <$>
+    try
+     (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd)))
+   <|> pure (B.str "\8220")
 
 failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t)
                      => QuoteContext
@@ -1443,7 +1447,7 @@ singleQuoteStart = do
   guard =<< notAfterString
   try $ do
     charOrRef "'\8216\145"
-    notFollowedBy (oneOf [' ', '\t', '\n'])
+    notFollowedBy (satisfy isSpaceChar)
 
 singleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
@@ -1451,17 +1455,26 @@ singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
   notFollowedBy alphaNum
 
-doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char)
+doubleQuoteStart :: (HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
+  guard =<< notAfterString
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (oneOf [' ', '\t', '\n'])
+           notFollowedBy (satisfy isSpaceChar)
 
 doubleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
+apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
+
+doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote = B.str "\8221" <$ char '"'
+
 ellipses :: Stream s m Char
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4b20e3a8b..ba8ed147e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2199,25 +2199,27 @@ citation = try $ do
 smart :: PandocMonad m => MarkdownParser m (F Inlines)
 smart = do
   guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [apostrophe, dash, ellipses])
+  doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|>
+    (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses)
 
 singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-singleQuoted = try $ do
+singleQuoted = do
   singleQuoteStart
-  withQuoteContext InSingleQuote $
+  (try (withQuoteContext InSingleQuote $
     fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline singleQuoteEnd
+      many1Till inline singleQuoteEnd))
+    <|> (return (return (B.str "\8217")))
 
 -- doubleQuoted will handle regular double-quoted sections, as well
 -- as dialogues with an open double-quote without a close double-quote
 -- in the same paragraph.
 doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
-doubleQuoted = try $ do
+doubleQuoted = do
   doubleQuoteStart
-  withQuoteContext InDoubleQuote $
+  (try (withQuoteContext InDoubleQuote $
     fmap B.doubleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline doubleQuoteEnd
+      many1Till inline doubleQuoteEnd))
+    <|> (return (return (B.str "\8220")))
 
 toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 514e3b88d..ac4c0b6cb 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1658,21 +1658,4 @@ note = try $ do
       return $ B.note contents
 
 smart :: PandocMonad m => RSTParser m Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [apostrophe, dash, ellipses]
-
-singleQuoted :: PandocMonad m => RSTParser m Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote $
-    B.singleQuoted . trimInlines . mconcat <$>
-      many1Till inline singleQuoteEnd
-
-doubleQuoted :: PandocMonad m => RSTParser m Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  withQuoteContext InDoubleQuote $
-    B.doubleQuoted . trimInlines . mconcat <$>
-      many1Till inline doubleQuoteEnd
+smart = smartPunctuation inline
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 484a6c923..c4d7bcc93 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines
 symbol = B.str <$> countChar 1 nonspaceChar
 
 smart :: PandocMonad m => TWParser m B.Inlines
-smart = do
-  guardEnabled Ext_smart
-  doubleQuoted <|> singleQuoted <|>
-    choice [ apostrophe
-           , dash
-           , ellipses
-           ]
-
-singleQuoted :: PandocMonad m => TWParser m B.Inlines
-singleQuoted = try $ do
-  singleQuoteStart
-  withQuoteContext InSingleQuote
-    (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd)
-
-doubleQuoted :: PandocMonad m => TWParser m B.Inlines
-doubleQuoted = try $ do
-  doubleQuoteStart
-  contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
-  withQuoteContext InDoubleQuote (doubleQuoteEnd >>
-   return (B.doubleQuoted $ B.trimInlines contents))
-   <|> return (B.str "\8220" B.<> contents)
+smart = smartPunctuation inline
 
 link :: PandocMonad m => TWParser m B.Inlines
 link = try $ do
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 0930deae6..6e38da21a 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -358,7 +358,7 @@ tests = [ testGroup "inline code"
               para (text "The value of the " <> math "x" <> text "\8217s and the systems\8217 condition.")
           , test markdownSmart "unclosed double quote"
             ("**this should \"be bold**"
-            =?> para (strong "this should \"be bold"))
+            =?> para (strong "this should \8220be bold"))
           ]
         , testGroup "footnotes"
           [ "indent followed by newline and flush-left text" =:
diff --git a/test/command/7216.md b/test/command/7216.md
new file mode 100644
index 000000000..cab3b9689
--- /dev/null
+++ b/test/command/7216.md
@@ -0,0 +1,19 @@
+```
+pandoc -t latex
+"This is some text in quotes. Another paragraph by the same speaker follows. The first paragraph should have no close quote.
+
+"The second paragraph should have open and close quotes."
+
+| "Open quote on this line,
+| Close quote on the next line."
+| "Quotes on the same line."
+^D
+``This is some text in quotes. Another paragraph by the same speaker
+follows. The first paragraph should have no close quote.
+
+``The second paragraph should have open and close quotes.''
+
+``Open quote on this line,\\
+Close quote on the next line.''\\
+``Quotes on the same line.''
+```
diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md
index 79dacfa10..e894a2250 100644
--- a/test/command/pandoc-citeproc-320a.md
+++ b/test/command/pandoc-citeproc-320a.md
@@ -49,17 +49,13 @@ references:
 Foo [@item1; @item2; @item3; @item4; @item5; @item6; @item7; @item8].
 ^D
 Foo (al-ʾUdhrī, n.d.; al-ʿUdhrī, n.d.; al-\'Udhrī, n.d.; al-'Udhrī,
-n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
+n.d.a, n.d.b; Uch, n.d.; Uebel, n.d.; Zzz, n.d.).
 
 ::: {#refs .references .csl-bib-body .hanging-indent}
 ::: {#ref-item6 .csl-entry}
 Uch, Ann. n.d.
 :::
 
-::: {#ref-item4 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
-:::
-
 ::: {#ref-item1 .csl-entry}
 ʾUdhrī, Jamīl al-. n.d.
 :::
@@ -72,8 +68,12 @@ Uch, Ann. n.d.
 \'Udhrī, Jamīl al-. n.d.
 :::
 
+::: {#ref-item4 .csl-entry}
+'Udhrī, Jamīl al-. n.d.a.
+:::
+
 ::: {#ref-item5 .csl-entry}
-'Udhrī, Jamīl al-. n.d.
+---------. n.d.b.
 :::
 
 ::: {#ref-item7 .csl-entry}
-- 
cgit v1.2.3


From d14c5f94df490996d12f8a56e29f927b9f9549e8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 29 Apr 2021 08:48:49 -0700
Subject: Further improvements in smart quotes.

Improves heuristic for detection of an "open double quote."
Closes #2103.
---
 src/Text/Pandoc/Parsing.hs | 4 ++--
 test/command/2103.md       | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 test/command/2103.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 2f6189104..37ab0adaa 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1447,7 +1447,7 @@ singleQuoteStart = do
   guard =<< notAfterString
   try $ do
     charOrRef "'\8216\145"
-    notFollowedBy (satisfy isSpaceChar)
+    void $ lookAhead (satisfy (not . isSpaceChar))
 
 singleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
@@ -1463,7 +1463,7 @@ doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
   guard =<< notAfterString
   try $ do charOrRef "\"\8220\147"
-           notFollowedBy (satisfy isSpaceChar)
+           void $ lookAhead (satisfy (not . isSpaceChar))
 
 doubleQuoteEnd :: Stream s m Char
                => ParserT s st m ()
diff --git a/test/command/2103.md b/test/command/2103.md
new file mode 100644
index 000000000..14a522a5e
--- /dev/null
+++ b/test/command/2103.md
@@ -0,0 +1,8 @@
+```
+pandoc -t latex
+| A happy pandoc user said "fix this bug please
+| or I'll go crazy!"
+^D
+A happy pandoc user said ``fix this bug please\\
+or I'll go crazy!''
+```
-- 
cgit v1.2.3


From b6a65445e10d74dfe384763c37438338bd395372 Mon Sep 17 00:00:00 2001
From: mbrackeantidot <65160241+mbrackeantidot@users.noreply.github.com>
Date: Thu, 29 Apr 2021 18:11:44 +0200
Subject: Docx reader: add handling of vml image objects (jgm#4735) (#7257)

They represent images, the same way as other images in vml format.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs |  11 +++++++++--
 test/Tests/Readers/Docx.hs            |   4 ++++
 test/docx/image_vml_as_object.docx    | Bin 0 -> 74199 bytes
 test/docx/image_vml_as_object.native  |   2 ++
 4 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 test/docx/image_vml_as_object.docx
 create mode 100644 test/docx/image_vml_as_object.native

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index f8ed248d7..7325ff300 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -709,7 +709,8 @@ elemToParPart ns element
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
        Nothing -> throwError WrongElem
--- The below is an attempt to deal with images in deprecated vml format.
+-- The two cases below are an attempt to deal with images in deprecated vml format.
+-- Todo: check out title and attr for deprecated format.
 elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
@@ -717,9 +718,15 @@ elemToParPart ns element
                   >>= findAttrByName ns "r" "id"
     in
      case drawing of
-       -- Todo: check out title and attr for deprecated format.
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
        Nothing -> throwError WrongElem
+elemToParPart ns element
+  | isElem ns "w" "r" element
+  , Just objectElem <- findChildByName ns "w" "object" element
+  , Just shapeElem <- findChildByName ns "v" "shape" objectElem
+  , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
+  , Just drawingId <- findAttrByName ns "r" "id" imagedataElem
+  = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
 -- Chart
 elemToParPart ns element
   | isElem ns "w" "r" element
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 263e04173..2cce70cc5 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -155,6 +155,10 @@ tests = [ testGroup "document"
             "VML image"
             "docx/image_vml.docx"
             "docx/image_vml.native"
+          , testCompare
+            "VML image as object"
+            "docx/image_vml_as_object.docx"
+            "docx/image_vml_as_object.native"
           , testCompare
             "inline image in links"
             "docx/inline_images.docx"
diff --git a/test/docx/image_vml_as_object.docx b/test/docx/image_vml_as_object.docx
new file mode 100644
index 000000000..7e1e4d2ca
Binary files /dev/null and b/test/docx/image_vml_as_object.docx differ
diff --git a/test/docx/image_vml_as_object.native b/test/docx/image_vml_as_object.native
new file mode 100644
index 000000000..6e689486a
--- /dev/null
+++ b/test/docx/image_vml_as_object.native
@@ -0,0 +1,2 @@
+[Para [Str "Test",Space,Str "with",Space,Str "object",Space,Str "as",Space,Str "image:"]
+,Para [Image ("",[],[]) [] ("media/image1.emf","")]]
-- 
cgit v1.2.3


From 6b16f3bb0d4c3a0ddc051ff9948c67d5cac72bcb Mon Sep 17 00:00:00 2001
From: tecosaur <tec@tecosaur.com>
Date: Fri, 30 Apr 2021 16:23:28 +0800
Subject: Org writer: inline latex envs need newlines (#7259)

Closes #7252

As specified in https://orgmode.org/manual/LaTeX-fragments.html, an
inline \begin{}...\end{} LaTeX block must start on a new line.
---
 src/Text/Pandoc/Writers/Org.hs | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index d0c9813da..d404f1c8d 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -407,6 +407,8 @@ inlineToOrg (Math t str) = do
               then "\\(" <> literal str <> "\\)"
               else "\\[" <> literal str <> "\\]"
 inlineToOrg il@(RawInline f str)
+  | elem f ["tex", "latex"] && T.isPrefixOf "\\begin" str =
+    return $ cr <> literal str <> cr
   | isRawFormat f = return $ literal str
   | otherwise     = do
       report $ InlineNotRendered il
-- 
cgit v1.2.3


From 3da919e35d02ec1a7e3719e2fdfd699a69d74921 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 1 May 2021 18:52:24 +0200
Subject: Add new internal module Text.Pandoc.Writers.GridTable

---
 pandoc.cabal                         |   2 +
 src/Text/Pandoc/Writers/GridTable.hs | 157 +++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+)
 create mode 100644 src/Text/Pandoc/Writers/GridTable.hs

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 29a5bfc7a..8ea3aa681 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -438,6 +438,7 @@ library
                  SHA                   >= 1.6      && < 1.7,
                  aeson                 >= 0.7      && < 1.6,
                  aeson-pretty          >= 0.8.5    && < 0.9,
+                 array                 >= 0.5      && < 0.6,
                  attoparsec            >= 0.12     && < 0.15,
                  base64-bytestring     >= 0.1      && < 1.3,
                  binary                >= 0.7      && < 0.11,
@@ -659,6 +660,7 @@ library
                    Text.Pandoc.Writers.Docx.StyleMap,
                    Text.Pandoc.Writers.Docx.Table,
                    Text.Pandoc.Writers.Docx.Types,
+                   Text.Pandoc.Writers.GridTable
                    Text.Pandoc.Writers.JATS.References,
                    Text.Pandoc.Writers.JATS.Table,
                    Text.Pandoc.Writers.JATS.Types,
diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs
new file mode 100644
index 000000000..c6f4cf456
--- /dev/null
+++ b/src/Text/Pandoc/Writers/GridTable.hs
@@ -0,0 +1,157 @@
+{-# LANGUAGE FlexibleContexts           #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE LambdaCase                 #-}
+{-# LANGUAGE TupleSections              #-}
+
+{- |
+Module      : Text.Pandoc.Writers.GridTable
+Copyright   : © 2020-2021 Albert Krewinkel
+License     : GNU GPL, version 2 or above
+
+Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
+
+Grid representation of pandoc tables.
+-}
+module Text.Pandoc.Writers.GridTable
+  ( Table (..)
+  , GridCell (..)
+  , RowIndex (..)
+  , ColIndex (..)
+  , CellIndex
+  , Part (..)
+  , toTable
+  , rowArray
+  ) where
+
+import Control.Monad (forM_)
+import Control.Monad.ST
+import Data.Array
+import Data.Array.MArray
+import Data.Array.ST
+import Data.Maybe (listToMaybe)
+import Data.STRef
+import Text.Pandoc.Definition hiding (Table)
+import qualified Text.Pandoc.Builder as B
+
+-- | A grid cell contains either a real table cell, or is the
+-- continuation of a column or row-spanning cell. In the latter case,
+-- the index of the continued cell is provided.
+data GridCell
+  = ContentCell Attr Alignment RowSpan ColSpan [Block]
+  | ContinuationCell CellIndex
+  deriving (Show)
+
+-- | Row index in a table part.
+newtype RowIndex = RowIndex Int deriving (Enum, Eq, Ix, Ord, Show)
+-- | Column index in a table part.
+newtype ColIndex = ColIndex Int deriving (Enum, Eq, Ix, Ord, Show)
+
+-- | Index to a cell in a table part.
+type CellIndex = (RowIndex, ColIndex)
+
+-- | Cells are placed on a grid. Row attributes are stored in a separate
+-- array.
+data Part = Part
+  { partAttr :: Attr
+  , partCellArray :: Array (RowIndex,ColIndex) GridCell
+  , partRowAttrs  :: Array RowIndex Attr
+  }
+
+data Table = Table
+  { tableAttr     :: Attr
+  , tableCaption  :: Caption
+  , tableColSpecs :: Array ColIndex ColSpec
+  , tableRowHeads :: RowHeadColumns
+  , tableHead     :: Part
+  , tableBodies   :: [Part]
+  , tableFoot     :: Part
+  }
+
+toTable
+  :: B.Attr
+  -> B.Caption
+  -> [B.ColSpec]
+  -> B.TableHead
+  -> [B.TableBody]
+  -> B.TableFoot
+  -> Table
+toTable attr caption colSpecs  thead tbodies tfoot =
+  Table attr caption colSpecs' rowHeads thGrid tbGrids tfGrid
+  where
+    colSpecs' = listArray (ColIndex 1, ColIndex $ length colSpecs) colSpecs
+    rowHeads = case listToMaybe tbodies of
+      Nothing -> RowHeadColumns 0
+      Just (TableBody _attr rowHeadCols _headerRows _rows) -> rowHeadCols
+    thGrid = let (TableHead headAttr rows) = thead
+             in rowsToPart headAttr rows
+    tbGrids = map bodyToGrid tbodies
+    tfGrid = let (TableFoot footAttr rows) = tfoot
+             in rowsToPart footAttr rows
+    bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) =
+      rowsToPart bodyAttr rows
+
+data BuilderCell
+  = FilledCell GridCell
+  | FreeCell
+
+fromBuilderCell :: BuilderCell -> GridCell
+fromBuilderCell = \case
+  FilledCell c -> c
+  FreeCell -> error "Found an unassigned cell."
+
+rowsToPart :: Attr -> [B.Row] -> Part
+rowsToPart attr = \case
+  [] -> Part
+        attr
+        (listArray ((RowIndex 1, ColIndex 1), (RowIndex 0, ColIndex 0)) [])
+        (listArray (RowIndex 1, RowIndex 0) [])
+  rows@(Row _attr firstRow:_) ->
+    let nrows = length rows
+        ncols = sum $ map (\(Cell _ _ _ (ColSpan cs) _) -> cs) firstRow
+        gbounds = ((RowIndex 1, ColIndex 1), (RowIndex nrows, ColIndex ncols))
+        mutableGrid :: ST s (STArray s CellIndex GridCell)
+        mutableGrid = do
+          grid <- newArray gbounds FreeCell
+          ridx <- newSTRef (RowIndex 1)
+          forM_ rows $ \(Row _attr cells) -> do
+            cidx <- newSTRef (ColIndex 1)
+            forM_ cells $ \(Cell cellAttr align rs cs blks) -> do
+              ridx' <- readSTRef ridx
+              let nextFreeInRow colindex@(ColIndex c) = do
+                    readArray grid (ridx', colindex) >>= \case
+                      FreeCell -> pure colindex
+                      _ -> nextFreeInRow $ ColIndex (c + 1)
+              cidx' <- readSTRef cidx >>= nextFreeInRow
+              writeArray grid (ridx', cidx') . FilledCell $
+                ContentCell cellAttr align rs cs blks
+              forM_ (continuationIndices ridx' cidx' rs cs) $ \idx -> do
+                writeArray grid idx . FilledCell $
+                  ContinuationCell (ridx', cidx')
+              -- go to new column
+              writeSTRef cidx cidx'
+            -- go to next row
+            modifySTRef ridx (incrRowIndex 1)
+          -- Swap BuilderCells with normal GridCells.
+          mapArray fromBuilderCell grid
+    in Part
+       { partCellArray = runSTArray mutableGrid
+       , partRowAttrs = listArray (RowIndex 1, RowIndex nrows) $
+                        map (\(Row rowAttr _) -> rowAttr) rows
+       , partAttr = attr
+       }
+
+continuationIndices :: RowIndex -> ColIndex -> RowSpan -> ColSpan -> [CellIndex]
+continuationIndices (RowIndex ridx) (ColIndex cidx) rowspan colspan =
+  let (RowSpan rs) = rowspan
+      (ColSpan cs) = colspan
+  in [ (RowIndex r, ColIndex c) | r <- [ridx..(ridx + rs - 1)]
+                                , c <- [cidx..(cidx + cs - 1)]
+                                , (r, c) /= (ridx, cidx)]
+
+rowArray :: RowIndex -> Array CellIndex GridCell -> Array ColIndex GridCell
+rowArray ridx grid =
+  let ((_minRidx, minCidx), (_maxRidx, maxCidx)) = bounds grid
+  in ixmap (minCidx, maxCidx) (ridx,) grid
+
+incrRowIndex :: RowSpan -> RowIndex -> RowIndex
+incrRowIndex (RowSpan n) (RowIndex r) = RowIndex $ r + n
-- 
cgit v1.2.3


From ddbf83f62c8bb6516203c99acd894c404351b5ae Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 1 May 2021 18:52:24 +0200
Subject: Docx writer: support colspans and rowspans in tables

See: #6315
---
 src/Text/Pandoc/Writers/Docx.hs            |   6 +-
 src/Text/Pandoc/Writers/Docx/Table.hs      | 200 +++++++++++++++++++----------
 src/Text/Pandoc/Writers/GridTable.hs       |   4 +-
 test/docx/golden/table_one_row.docx        | Bin 9904 -> 9925 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10230 bytes
 test/docx/golden/tables.docx               | Bin 10239 -> 10271 bytes
 6 files changed, 140 insertions(+), 70 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 7064ded09..e11961bfd 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Writers.Docx.Table
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
+import qualified Text.Pandoc.Writers.GridTable as Grid
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import Text.TeXMath
@@ -889,8 +890,9 @@ blockToOpenXML' _ HorizontalRule = do
     $ mknode "v:rect" [("style","width:0;height:1.5pt"),
                        ("o:hralign","center"),
                        ("o:hrstd","t"),("o:hr","t")] () ]
-blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) =
-  tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot
+blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) =
+  tableToOpenXML (blocksToOpenXML opts)
+                 (Grid.toTable attr caption colspecs thead tbodies tfoot)
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
   | OrderedList (start, numstyle, numdelim) lst <- el
diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index 349f3a4ce..bb931bf08 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -14,65 +14,39 @@ module Text.Pandoc.Writers.Docx.Table
   ) where
 
 import Control.Monad.State.Strict
+import Data.Array
 import Data.Text (Text)
 import Text.Pandoc.Definition
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
+import Text.Pandoc.Writers.GridTable hiding (Table)
 import Text.Pandoc.Writers.OOXML
-import Text.Pandoc.XML.Light as XML
+import Text.Pandoc.XML.Light as XML hiding (Attr)
 import qualified Data.Text as T
+import qualified Text.Pandoc.Writers.GridTable as Grid
 
 tableToOpenXML :: PandocMonad m
                => ([Block] -> WS m [Content])
-               -> Caption
-               -> [ColSpec]
-               -> TableHead
-               -> [TableBody]
-               -> TableFoot
+               -> Grid.Table
                -> WS m [Content]
-tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
-  let (caption, aligns, widths, headers, rows) =
-        toLegacyTable blkCapt specs thead tbody tfoot
+tableToOpenXML blocksToOpenXML gridTable = do
   setFirstPara
   modify $ \s -> s { stInTable = True }
-  let captionStr = stringify caption
-  caption' <- if null caption
-                 then return []
-                 else withParaPropM (pStyleM "Table Caption")
-                      $ blocksToOpenXML [Para caption]
-  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-  -- Table cells require a <w:p> element, even an empty one!
-  -- Not in the spec but in Word 2007, 2010. See #4953. And
-  -- apparently the last element must be a <w:p>, see #6983.
-  let cellToOpenXML (al, cell) = do
-        es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell
-        return $
-          case reverse (onlyElems es) of
-            b:e:_ | qName (elName b) == "bookmarkEnd"
-                  , qName (elName e) == "p" -> es
-            e:_   | qName (elName e) == "p" -> es
-            _ -> es ++ [Elem $ mknode "w:p" [] ()]
-  headers' <- mapM cellToOpenXML $ zip aligns headers
-  rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  compactStyle <- pStyleM "Compact"
-  let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell contents = mknode "w:tc" []
-                            $ if null contents
-                                 then emptyCell'
-                                 else contents
-  let mkrow cells =
-         mknode "w:tr" [] $
-           map mkcell cells
-  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
-  let fullrow = 5000 -- 100% specified in pct
-  let (rowwidth :: Int) = round $ fullrow * sum widths
-  let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
-  let hasHeader = not $ all null headers
-  modify $ \s -> s { stInTable = False }
+  let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) =
+        gridTable
+  let (Caption _maybeShortCaption captionBlocks) = caption
+  let captionStr = stringify captionBlocks
+  captionXml <- if null captionBlocks
+                then return []
+                else withParaPropM (pStyleM "Table Caption")
+                     $ blocksToOpenXML captionBlocks
+  head' <- cellGridToOpenXML blocksToOpenXML thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML tfoot
+
+  let hasHeader = not . null . indices . partRowAttrs $ thead
   -- for compatibility with Word <= 2007, we include a val with a bitmask
   -- 0×0020  Apply first row conditional formatting
   -- 0×0040  Apply last row conditional formatting
@@ -80,18 +54,12 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
   -- 0×0100  Apply last column conditional formatting
   -- 0×0200  Do not apply row banding conditional formatting
   -- 0×0400  Do not apply column banding conditional formattin
-  let tblLookVal :: Int
-      tblLookVal = if hasHeader then 0x20 else 0
-  return $
-    caption' ++
-    [Elem $
-     mknode "w:tbl" []
-      ( mknode "w:tblPr" []
-        (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" (if all (== 0) widths
-                              then [("w:type", "auto"), ("w:w", "0")]
-                              else [("w:type", "pct"), ("w:w", tshow rowwidth)])
-                            () :
+  let tblLookVal = if hasHeader then (0x20 :: Int) else 0
+  let (gridCols, tblWattr) = tableLayout (elems colspecs)
+  let tbl = mknode "w:tbl" []
+        ( mknode "w:tblPr" []
+          ( mknode "w:tblStyle" [("w:val","Table")] () :
+            mknode "w:tblW" tblWattr () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
@@ -100,15 +68,14 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
                                ,("w:noVBand","0")
                                ,("w:val", T.pack $ printf "%04x" tblLookVal)
                                ] () :
-          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
-          | not (null caption) ] )
-      : mknode "w:tblGrid" []
-        (if all (==0) widths
-            then []
-            else map mkgridcol widths)
-      : [ mkrow headers' | hasHeader ] ++
-      map mkrow rows'
-      )]
+            [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+            | not (T.null captionStr) ]
+          )
+          : mknode "w:tblGrid" [] gridCols
+          : head' ++ mconcat bodies ++ foot'
+        )
+  modify $ \s -> s { stInTable = False }
+  return $ captionXml ++ [Elem tbl]
 
 alignmentToString :: Alignment -> Text
 alignmentToString = \case
@@ -116,3 +83,104 @@ alignmentToString = \case
   AlignRight   -> "right"
   AlignCenter  -> "center"
   AlignDefault -> "left"
+
+tableLayout :: [ColSpec] -> ([Element], [(Text, Text)])
+tableLayout specs =
+  let
+    textwidth = 7920  -- 5.5 in in twips       (1 twip == 1/20 pt)
+    fullrow   = 5000  -- 100% specified in pct (1 pct  == 1/50th of a percent)
+    ncols = length specs
+    getWidth = \case
+      ColWidth n -> n
+      _          -> 0
+    widths = map (getWidth . snd) specs
+    rowwidth  = round (fullrow * sum widths) :: Int
+    widthToTwips w = floor (textwidth * w)   :: Int
+    mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] ()
+  in if all (== 0) widths
+     then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols)
+          , [ ("w:type", "auto"), ("w:w", "0")])
+     else ( map mkGridCol widths
+          , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
+
+cellGridToOpenXML :: PandocMonad m
+                   => ([Block] -> WS m [Content])
+                   -> Part
+                   -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) =
+  if null (indices rowAttrs)
+  then return mempty
+  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part
+
+data OOXMLCell
+  = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
+  | OOXMLCellMerge ColSpan
+
+data OOXMLRow = OOXMLRow Attr [OOXMLCell]
+
+partToRows :: Part -> [OOXMLRow]
+partToRows part =
+  let
+    toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
+    toOOXMLCell ridx cidx = \case
+      ContentCell attr align rowspan colspan blocks ->
+        [OOXMLCell attr align rowspan colspan blocks]
+      ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' ->
+        case (partCellArray part)!idx' of
+          (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan]
+          x -> error $ "Content cell expected, got, " ++ show x ++
+                       " at index " ++ show idx'
+      _ -> mempty
+    mkRow :: (RowIndex, Attr) -> OOXMLRow
+    mkRow (ridx, attr) = OOXMLRow attr
+                       . concatMap (uncurry $ toOOXMLCell ridx)
+                       . assocs
+                       . rowArray ridx
+                       $ partCellArray part
+  in map mkRow $ assocs (partRowAttrs part)
+
+rowToOpenXML :: PandocMonad m
+             => ([Block] -> WS m [Content])
+             -> OOXMLRow
+             -> WS m Element
+rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do
+  xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+  -- let align' = case align of
+  --       AlignDefault -> colAlign
+  --       _            -> align
+  return $ mknode "w:tr" [] xmlcells
+
+ooxmlCellToOpenXML :: PandocMonad m
+                   => ([Block] -> WS m [Content])
+                   -> OOXMLCell
+                   -> WS m Element
+ooxmlCellToOpenXML blocksToOpenXML = \case
+  OOXMLCellMerge (ColSpan colspan) -> do
+    return $ mknode "w:tc" []
+      [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                           , mknode "w:vMerge"   [("w:val", "continue")] () ]
+      , mknode "w:p" [] [mknode "w:pPr" [] ()]]
+  OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
+    -- we handle rowspans via 'leftpad', so we can ignore those here
+
+    compactStyle <- pStyleM "Compact"
+    es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
+    -- Table cells require a <w:p> element, even an empty one!
+    -- Not in the spec but in Word 2007, 2010. See #4953. And
+    -- apparently the last element must be a <w:p>, see #6983.
+    return . mknode "w:tc" [] $
+      Elem
+       (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                            | colspan > 1] ++
+                            [ mknode "w:vMerge" [("w:val", "restart")] ()
+                            | rowspan > RowSpan 1 ])) :
+      if null contents
+      then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+      else case reverse (onlyElems es) of
+             b:e:_ | qName (elName b) == "bookmarkEnd"  -- y tho?
+                   , qName (elName e) == "p" -> es
+             e:_   | qName (elName e) == "p" -> es
+             _ -> es ++ [Elem $ mknode "w:p" [] ()]
+
+alignmentFor :: Alignment -> Element
+alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs
index c6f4cf456..bc468febc 100644
--- a/src/Text/Pandoc/Writers/GridTable.hs
+++ b/src/Text/Pandoc/Writers/GridTable.hs
@@ -87,8 +87,8 @@ toTable attr caption colSpecs  thead tbodies tfoot =
     tbGrids = map bodyToGrid tbodies
     tfGrid = let (TableFoot footAttr rows) = tfoot
              in rowsToPart footAttr rows
-    bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) =
-      rowsToPart bodyAttr rows
+    bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) =
+      rowsToPart bodyAttr (headRows ++ rows)
 
 data BuilderCell
   = FilledCell GridCell
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index cab3fc31c..e60bb303f 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 9238c7e20..a4037cf32 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 6f0379def..bc1bc27f8 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 8357b835d9c6d17f32bded56aa24059c2f6e0678 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Wed, 5 May 2021 19:00:53 +0200
Subject: App: allow tabs expansion even if file-scope is used

Tabs in plain-text inputs are now handled correctly, even if the
`--file-scope` flag is used.

Closes: #6709
---
 src/Text/Pandoc/App.hs | 18 +++++++++++-------
 test/command/6709.md   | 11 +++++++++++
 2 files changed, 22 insertions(+), 7 deletions(-)
 create mode 100644 test/command/6709.md

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 4e8c9f2ab..96e4b5f47 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -255,13 +255,17 @@ convertWithOpts opts = do
     let sourceToDoc :: [FilePath] -> PandocIO Pandoc
         sourceToDoc sources' =
            case reader of
-                TextReader r
-                  | optFileScope opts || readerNameBase == "json" ->
-                      mconcat <$> mapM (readSource >=> r readerOpts) sources'
-                  | otherwise ->
-                      readSources sources' >>= r readerOpts
-                ByteStringReader r ->
-                  mconcat <$> mapM (readFile' >=> r readerOpts) sources'
+             TextReader r
+               | readerNameBase == "json" ->
+                   mconcat <$> mapM (readSource >=> r readerOpts) sources'
+               | optFileScope opts  ->
+                   -- Read source and convert tabs (see #6709)
+                   let readSource' = fmap convertTabs . readSource
+                   in mconcat <$> mapM (readSource' >=> r readerOpts) sources'
+               | otherwise ->
+                   readSources sources' >>= r readerOpts
+             ByteStringReader r ->
+               mconcat <$> mapM (readFile' >=> r readerOpts) sources'
 
 
     when (readerNameBase == "markdown_github" ||
diff --git a/test/command/6709.md b/test/command/6709.md
new file mode 100644
index 000000000..d6d514552
--- /dev/null
+++ b/test/command/6709.md
@@ -0,0 +1,11 @@
+Tabs must be expanded even if --file-scope is used
+````
+% pandoc -t native --file-scope --tab-stop=2
+```
+if true; then
+	echo "yup"
+fi
+```
+^D
+[CodeBlock ("",[],[]) "if true; then\n  echo \"yup\"\nfi"]
+````
-- 
cgit v1.2.3


From 295d93e96b1853c2ff4658aa7206ea1329024fab Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 7 May 2021 17:10:56 +0200
Subject: ConTeXt writer: support blank lines in line blocks.

Fixes: #6564

Thanks to @denismaier.
---
 src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index f14b1d894..b694437d8 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -178,8 +178,12 @@ blockToConTeXt (Para lst) = do
   contents <- inlineListToConTeXt lst
   return $ contents <> blankline
 blockToConTeXt (LineBlock lns) = do
-  doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns
-  return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline
+  let emptyToBlankline doc = if isEmpty doc
+                             then blankline
+                             else doc
+  doclines <- mapM inlineListToConTeXt lns
+  let contextLines = vcat . map emptyToBlankline $ doclines
+  return $ "\\startlines" $$ contextLines $$ "\\stoplines" <> blankline
 blockToConTeXt (BlockQuote lst) = do
   contents <- blockListToConTeXt lst
   return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
-- 
cgit v1.2.3


From 6e45607f9948f45b2e94f54b4825b667ca0d5441 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 1 May 2021 13:17:45 -0700
Subject: Change reader types, allowing better tracking of source positions.

Previously, when multiple file arguments were provided, pandoc
simply concatenated them and passed the contents to the readers,
which took a Text argument.

As a result, the readers had no way of knowing which file
was the source of any particular bit of text.  This meant that
we couldn't report accurate source positions on errors or
include accurate source positions as attributes in the AST.
More seriously, it meant that we couldn't resolve resource
paths relative to the files containing them
(see e.g. #5501, #6632, #6384, #3752).

Add Text.Pandoc.Sources (exported module), with a `Sources` type
and a `ToSources` class.  A `Sources` wraps a list of `(SourcePos,
Text)` pairs. [API change] A parsec `Stream` instance is provided for
`Sources`.  The module also exports versions of parsec's `satisfy` and
other Char parsers that track source positions accurately from a
`Sources` stream (or any instance of the new `UpdateSourcePos` class).

Text.Pandoc.Parsing now exports these modified Char parsers instead of
the ones parsec provides.  Modified parsers to use a `Sources` as stream
[API change].

The readers that previously took a `Text` argument have been
modified to take any instance of `ToSources`. So, they may still
be used with a `Text`, but they can also be used with a `Sources`
object.

In Text.Pandoc.Error, modified the constructor PandocParsecError
to take a `Sources` rather than a `Text` as first argument,
so parse error locations can be accurately reported.

T.P.Error: showPos, do not print "-" as source name.
---
 .hlint.yaml                               |   1 +
 pandoc.cabal                              |   1 +
 src/Text/Pandoc/App.hs                    |   8 +-
 src/Text/Pandoc/App/Opt.hs                |   2 +-
 src/Text/Pandoc/Citeproc/BibTeX.hs        |  14 +-
 src/Text/Pandoc/Error.hs                  |  39 ++-
 src/Text/Pandoc/Logging.hs                |   8 +-
 src/Text/Pandoc/Parsing.hs                | 474 ++++++++++++++++++------------
 src/Text/Pandoc/Readers.hs                |  97 +++---
 src/Text/Pandoc/Readers/BibTeX.hs         |  13 +-
 src/Text/Pandoc/Readers/CSV.hs            |  14 +-
 src/Text/Pandoc/Readers/CommonMark.hs     |  70 +++--
 src/Text/Pandoc/Readers/Creole.hs         |  11 +-
 src/Text/Pandoc/Readers/CslJson.hs        |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs        |  11 +-
 src/Text/Pandoc/Readers/DokuWiki.hs       |  17 +-
 src/Text/Pandoc/Readers/FB2.hs            |   9 +-
 src/Text/Pandoc/Readers/HTML.hs           |  33 ++-
 src/Text/Pandoc/Readers/Haddock.hs        |  12 +-
 src/Text/Pandoc/Readers/Ipynb.hs          |   8 +-
 src/Text/Pandoc/Readers/JATS.hs           |  11 +-
 src/Text/Pandoc/Readers/Jira.hs           |  16 +-
 src/Text/Pandoc/Readers/LaTeX.hs          |  23 +-
 src/Text/Pandoc/Readers/LaTeX/Citation.hs |   2 +-
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs  |  12 +-
 src/Text/Pandoc/Readers/LaTeX/Types.hs    |  15 +-
 src/Text/Pandoc/Readers/Man.hs            |  29 +-
 src/Text/Pandoc/Readers/Markdown.hs       |  49 +--
 src/Text/Pandoc/Readers/MediaWiki.hs      |  13 +-
 src/Text/Pandoc/Readers/Metadata.hs       |  26 +-
 src/Text/Pandoc/Readers/Muse.hs           |  17 +-
 src/Text/Pandoc/Readers/Native.hs         |  12 +-
 src/Text/Pandoc/Readers/OPML.hs           |  13 +-
 src/Text/Pandoc/Readers/Org.hs            |  11 +-
 src/Text/Pandoc/Readers/Org/Inlines.hs    |   3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs    |   2 +-
 src/Text/Pandoc/Readers/RST.hs            |  76 ++---
 src/Text/Pandoc/Readers/Roff.hs           |  34 +--
 src/Text/Pandoc/Readers/TWiki.hs          |  12 +-
 src/Text/Pandoc/Readers/Textile.hs        | 172 +++++------
 src/Text/Pandoc/Readers/TikiWiki.hs       |  12 +-
 src/Text/Pandoc/Readers/Txt2Tags.hs       |  12 +-
 src/Text/Pandoc/Readers/Vimwiki.hs        |  19 +-
 src/Text/Pandoc/Shared.hs                 |   1 +
 src/Text/Pandoc/Sources.hs                | 195 ++++++++++++
 test/Tests/Readers/Markdown.hs            |   4 +-
 46 files changed, 1025 insertions(+), 617 deletions(-)
 create mode 100644 src/Text/Pandoc/Sources.hs

(limited to 'src')

diff --git a/.hlint.yaml b/.hlint.yaml
index 350794803..ad0f7ddb9 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -9,6 +9,7 @@
 # Ignore some builtin hints
 #
 - ignore: {name: "Avoid lambda"}
+- ignore: {name: "Use bimap"}
 - ignore: {name: "Eta reduce"}
 - ignore: {name: "Evaluate"}
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
diff --git a/pandoc.cabal b/pandoc.cabal
index 8ea3aa681..de7951c54 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -513,6 +513,7 @@ library
                    Text.Pandoc.Options,
                    Text.Pandoc.Extensions,
                    Text.Pandoc.Shared,
+                   Text.Pandoc.Sources,
                    Text.Pandoc.MediaBag,
                    Text.Pandoc.Error,
                    Text.Pandoc.Filter,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 96e4b5f47..98b072ffb 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -160,9 +160,11 @@ convertWithOpts opts = do
                                     else optTabStop opts)
 
 
-    let readSources :: [FilePath] -> PandocIO Text
-        readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$>
-                              mapM readSource srcs
+    let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
+        readSources srcs =
+          mapM (\fp -> do
+                   t <- readSource fp
+                   return (if fp == "-" then "" else fp, convertTabs t)) srcs
 
 
     outputSettings <- optToOutputSettings opts
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index c72f63464..d54d932b7 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) =
   where
     pMetaString = pure . MetaString <$> P.manyChar P.anyChar
     runEverything p =
-      runPure (P.readWithM p (def :: P.ParserState) "")
+      runPure (P.readWithM p (def :: P.ParserState) ("" :: Text))
       >>= fmap (Meta . flip P.runF def)
 yamlToMeta _ = return mempty
 
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 510e56f9c..f6833000c 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex
   deriving (Show, Eq, Ord)
 
 -- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
-readBibtexString :: Variant           -- ^ bibtex or biblatex
+readBibtexString :: ToSources a
+                 => Variant           -- ^ bibtex or biblatex
                  -> Locale            -- ^ Locale
                  -> (Text -> Bool)    -- ^ Filter on citation ids
-                 -> Text              -- ^ bibtex/biblatex text
+                 -> a                 -- ^ bibtex/biblatex text
                  -> Either ParseError [Reference Inlines]
 readBibtexString variant locale idpred contents = do
   case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
@@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" contents of
+           "" (toSources contents) of
           Left err -> Left err
           Right xs -> return xs
 
@@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] []
 -- a map of bibtex "string" macros
 type StringMap = Map.Map Text Text
 
-type BibParser = Parser Text (Lang, StringMap)
+type BibParser = Parser Sources (Lang, StringMap)
 
 data Item = Item{ identifier :: Text
                 , sourcePos  :: SourcePos
@@ -804,7 +805,7 @@ bibEntries = do
                        (bibComment <|> bibPreamble <|> bibString))
 
 bibSkip :: BibParser ()
-bibSkip = () <$ take1WhileP (/='@')
+bibSkip = skipMany1 (satisfy (/='@'))
 
 bibComment :: BibParser ()
 bibComment = do
@@ -829,6 +830,9 @@ bibString = do
   updateState (\(l,m) -> (l, Map.insert k v m))
   return ()
 
+take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text
+take1WhileP f = T.pack <$> many1 (satisfy f)
+
 inBraces :: BibParser Text
 inBraces = do
   char '{'
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 8102f04cc..81eb41f85 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException)
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Data.Text (Text)
+import Data.List (sortOn)
 import qualified Data.Text as T
+import Data.Ord (Down(..))
 import GHC.Generics (Generic)
 import Network.HTTP.Client (HttpException)
 import System.Exit (ExitCode (..), exitWith)
 import System.IO (stderr)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (Sources(..))
 import Text.Printf (printf)
 import Text.Parsec.Error
 import Text.Parsec.Pos hiding (Line)
 import Text.Pandoc.Shared (tshow)
 import Citeproc (CiteprocError, prettyCiteprocError)
 
-type Input = Text
-
 data PandocError = PandocIOError Text IOError
                  | PandocHttpError Text HttpException
                  | PandocShouldNeverHappenError Text
                  | PandocSomeError Text
                  | PandocParseError Text
-                 | PandocParsecError Input ParseError
+                 | PandocParsecError Sources ParseError
                  | PandocMakePDFError Text
                  | PandocOptionError Text
                  | PandocSyntaxMapError Text
@@ -81,22 +82,28 @@ renderError e =
       "Please report this to pandoc's developers: " <> s
     PandocSomeError s -> s
     PandocParseError s -> s
-    PandocParsecError input err' ->
+    PandocParsecError (Sources inputs) err' ->
         let errPos = errorPos err'
             errLine = sourceLine errPos
             errColumn = sourceColumn errPos
-            ls = T.lines input <> [""]
-            errorInFile = if length ls > errLine - 1
-                            then T.concat ["\n", ls !! (errLine - 1)
-                                          ,"\n", T.replicate (errColumn - 1) " "
-                                          ,"^"]
-                        else ""
-        in  "\nError at " <> tshow  err' <>
-                     -- if error comes from a chunk or included file,
-                     -- then we won't get the right text this way:
-                     if sourceName errPos == "source"
-                        then errorInFile
-                        else ""
+            errFile = sourceName errPos
+            errorInFile =
+              case sortOn (Down . sourceLine . fst)
+                      [ (pos,t)
+                        | (pos,t) <- inputs
+                        , sourceName pos == errFile
+                        , sourceLine pos <= errLine
+                      ] of
+                []  -> ""
+                ((pos,txt):_) ->
+                  let ls = T.lines txt <> [""]
+                      ln = errLine - sourceLine pos
+                   in if length ls > ln - 1
+                         then T.concat ["\n", ls !! (ln - 1)
+                                       ,"\n", T.replicate (errColumn - 1) " "
+                                       ,"^"]
+                         else ""
+        in  "\nError at " <> tshow  err' <> errorInFile
     PandocMakePDFError s -> s
     PandocOptionError s -> s
     PandocSyntaxMapError s -> s
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index efd2188f1..8c7292b69 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -241,9 +241,11 @@ instance ToJSON LogMessage where
 showPos :: SourcePos -> Text
 showPos pos = Text.pack $ sn ++ "line " ++
      show (sourceLine pos) ++ " column " ++ show (sourceColumn pos)
-  where sn = if sourceName pos == "source" || sourceName pos == ""
-                then ""
-                else sourceName pos ++ " "
+  where
+    sn' = sourceName pos
+    sn = if sn' == "source" || sn' == "" || sn' == "-"
+            then ""
+            else sn' ++ " "
 
 encodeLogMessages :: [LogMessage] -> BL.ByteString
 encodeLogMessages ms =
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 37ab0adaa..11c4c7a62 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -5,7 +5,6 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE IncoherentInstances        #-}
 {-# LANGUAGE MultiParamTypeClasses      #-}
-{-# LANGUAGE ViewPatterns               #-}
 {-# LANGUAGE OverloadedStrings          #-}
 {- |
    Module      : Text.Pandoc.Parsing
@@ -19,8 +18,7 @@
 A utility library with parsers used in pandoc readers.
 -}
 
-module Text.Pandoc.Parsing ( take1WhileP,
-                             takeP,
+module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              countChar,
                              textStr,
                              anyLine,
@@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              getInput,
                              setInput,
                              unexpected,
-                             char,
-                             letter,
-                             digit,
-                             alphaNum,
                              skipMany,
                              skipMany1,
-                             spaces,
-                             space,
-                             anyChar,
-                             satisfy,
-                             newline,
-                             string,
                              count,
                              eof,
-                             noneOf,
-                             oneOf,
                              lookAhead,
                              notFollowedBy,
                              many,
@@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP,
                              SourcePos,
                              getPosition,
                              setPosition,
+                             sourceName,
+                             setSourceName,
                              sourceColumn,
                              sourceLine,
                              setSourceColumn,
@@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP,
 where
 
 import Control.Monad.Identity
+    ( guard,
+      join,
+      unless,
+      when,
+      void,
+      liftM2,
+      liftM,
+      Identity(..),
+      MonadPlus(mzero) )
 import Control.Monad.Reader
+    ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
 import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
                   isPunctuation, isSpace, ord, toLower, toUpper)
-import Data.Default
+import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import qualified Data.Set as Set
-import Data.String
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report)
 import Text.Pandoc.Definition
+    ( Target,
+      nullMeta,
+      nullAttr,
+      Meta,
+      ColWidth(ColWidthDefault, ColWidth),
+      TableFoot(TableFoot),
+      TableBody(TableBody),
+      Attr,
+      TableHead(TableHead),
+      Row(..),
+      Alignment(..),
+      Inline(Str),
+      ListNumberDelim(..),
+      ListAttributes,
+      ListNumberStyle(..) )
 import Text.Pandoc.Logging
+    ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) )
 import Text.Pandoc.Options
+    ( extensionEnabled,
+      Extension(Ext_old_dashes, Ext_tex_math_dollars,
+                Ext_tex_math_single_backslash, Ext_tex_math_double_backslash,
+                Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart),
+      ReaderOptions(readerTabStop, readerColumns, readerExtensions) )
 import Text.Pandoc.Readers.LaTeX.Types (Macro)
 import Text.Pandoc.Shared
+    ( uniqueIdent,
+      tshow,
+      mapLeft,
+      compactify,
+      trim,
+      trimr,
+      splitTextByIndices,
+      safeRead,
+      trimMath,
+      schemes,
+      escapeURI )
+import Text.Pandoc.Sources
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Pandoc.XML (fromEntities)
-import Text.Parsec hiding (token)
-import Text.Parsec.Pos (initialPos, newPos, updatePosString)
-
-import Control.Monad.Except
+import Text.Parsec
+    ( between,
+      setSourceName,
+      Parsec,
+      Column,
+      Line,
+      incSourceLine,
+      incSourceColumn,
+      setSourceLine,
+      setSourceColumn,
+      sourceLine,
+      sourceColumn,
+      sourceName,
+      setSourceName,
+      setPosition,
+      getPosition,
+      updateState,
+      setState,
+      getState,
+      optionMaybe,
+      optional,
+      option,
+      endBy1,
+      endBy,
+      sepEndBy1,
+      sepEndBy,
+      sepBy1,
+      sepBy,
+      try,
+      choice,
+      (<?>),
+      (<|>),
+      manyTill,
+      many1,
+      many,
+      notFollowedBy,
+      lookAhead,
+      eof,
+      count,
+      skipMany1,
+      skipMany,
+      unexpected,
+      setInput,
+      getInput,
+      anyToken,
+      tokenPrim,
+      parse,
+      runParserT,
+      runParser,
+      ParseError,
+      ParsecT,
+      SourcePos,
+      Stream(..) )
+import Text.Parsec.Pos (initialPos, newPos)
+import Control.Monad.Except ( MonadError(throwError) )
 import Text.Pandoc.Error
+    ( PandocError(PandocParseError, PandocParsecError) )
 
 type Parser t s = Parsec t s
 
 type ParserT = ParsecT
 
+
 -- | Reader monad wrapping the parser state. This is used to possibly delay
 -- evaluation until all relevant information has been parsed and made available
 -- in the parser state.
@@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where
   mappend = (<>)
 
 -- | Like @count@, but packs its result
-countChar :: (Stream s m Char, Monad m)
+countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m)
           => Int
           -> ParsecT s st m Char
           -> ParsecT s st m Text
 countChar n = fmap T.pack . count n
 
 -- | Like @string@, but uses @Text@.
-textStr :: Stream s m Char => Text -> ParsecT s u m Text
+textStr :: (Stream s m Char, UpdateSourcePos s Char)
+        => Text -> ParsecT s u m Text
 textStr t = string (T.unpack t) $> t
 
--- | Parse characters while a predicate is true.
-take1WhileP :: Monad m
-           => (Char -> Bool)
-           -> ParserT Text st m Text
-take1WhileP f = do
-  -- needed to persuade parsec that this won't match an empty string:
-  c <- satisfy f
-  inp <- getInput
-  pos <- getPosition
-  let (t, rest) = T.span f inp
-  setInput rest
-  setPosition $
-    if f '\t' || f '\n'
-       then updatePosString pos $ T.unpack t
-       else incSourceColumn pos (T.length t)
-  return $ T.singleton c <> t
-
--- Parse n characters of input (or the rest of the input if
--- there aren't n characters).
-takeP :: Monad m => Int -> ParserT Text st m Text
-takeP n = do
-  guard (n > 0)
-  -- faster than 'count n anyChar'
-  inp <- getInput
-  pos <- getPosition
-  let (xs, rest) = T.splitAt n inp
-  -- needed to persuade parsec that this won't match an empty string:
-  anyChar
-  setInput rest
-  setPosition $ updatePosString pos $ T.unpack xs
-  return xs
-
--- | Parse any line of text
-anyLine :: Monad m => ParserT Text st m Text
+
+-- | Parse any line of text, returning the contents without the
+-- final newline.
+anyLine :: Monad m => ParserT Sources st m Text
 anyLine = do
   -- This is much faster than:
   -- manyTill anyChar newline
   inp <- getInput
-  pos <- getPosition
-  case T.break (=='\n') inp of
-       (this, T.uncons -> Just ('\n', rest)) -> do
-         -- needed to persuade parsec that this won't match an empty string:
-         anyChar
-         setInput rest
-         setPosition $ incSourceLine (setSourceColumn pos 1) 1
-         return this
-       _ -> mzero
+  case inp of
+    Sources [] -> mzero
+    Sources ((fp,t):inps) ->
+      -- we assume that lines don't span different input files
+      case T.break (=='\n') t of
+           (this, rest)
+             | T.null rest
+             , not (null inps) ->
+                -- line may span different input files, so do it
+                 -- character by character
+                 T.pack <$> manyTill anyChar newline
+             | otherwise -> do --  either end of inputs or newline in rest
+                 setInput $ Sources ((fp, rest):inps)
+                 char '\n' -- needed so parsec knows we won't match empty string
+                           -- and so source pos is updated
+                 return this
 
 -- | Parse any line, include the final newline in the output
-anyLineNewline :: Monad m => ParserT Text st m Text
+anyLineNewline :: Monad m => ParserT Sources st m Text
 anyLineNewline = (<> "\n") <$> anyLine
 
 -- | Parse indent by specified number of spaces (or equiv. tabs)
-indentWith :: Stream s m Char
+indentWith :: (Stream s m Char, UpdateSourcePos s Char)
            => HasReaderOptions st
            => Int -> ParserT s st m Text
 indentWith num = do
@@ -399,11 +460,13 @@ notFollowedBy' p  = try $ join $  do  a <- try p
                                   return (return ())
 -- (This version due to Andrew Pimlott on the Haskell mailing list.)
 
-oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
+oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char)
+              => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text
 oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack
 
 -- TODO: This should be re-implemented in a Text-aware way
-oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
+oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char)
+               => (Char -> Char -> Bool) -> [String] -> ParserT s st m String
 oneOfStrings'' _ []   = Prelude.fail "no strings"
 oneOfStrings'' matches strs = try $ do
   c <- anyChar
@@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do
 -- | Parses one of a list of strings.  If the list contains
 -- two strings one of which is a prefix of the other, the longer
 -- string will be matched if possible.
-oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char)
+             => [Text] -> ParserT s st m Text
 oneOfStrings = oneOfStrings' (==)
 
 -- | Parses one of a list of strings (tried in order), case insensitive.
 
 -- TODO: This will not be accurate with general Unicode (neither
 -- Text.toLower nor Text.toCaseFold can be implemented with a map)
-oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text
+oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char)
+               => [Text] -> ParserT s st m Text
 oneOfStringsCI = oneOfStrings' ciMatch
   where ciMatch x y = toLower' x == toLower' y
         -- this optimizes toLower by checking common ASCII case
@@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch
                    | otherwise = toLower c
 
 -- | Parses a space or tab.
-spaceChar :: Stream s m Char => ParserT s st m Char
+spaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
 
 -- | Parses a nonspace, nonnewline character.
-nonspaceChar :: Stream s m Char => ParserT s st m Char
+nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char)
+             => ParserT s st m Char
 nonspaceChar = satisfy (not . isSpaceChar)
 
 isSpaceChar :: Char -> Bool
@@ -451,21 +518,24 @@ isSpaceChar '\r' = True
 isSpaceChar _    = False
 
 -- | Skips zero or more spaces or tabs.
-skipSpaces :: Stream s m Char => ParserT s st m ()
+skipSpaces :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m ()
 skipSpaces = skipMany spaceChar
 
 -- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: Stream s m Char => ParserT s st m Char
+blankline :: (Stream s m Char, UpdateSourcePos s Char)
+          => ParserT s st m Char
 blankline = try $ skipSpaces >> newline
 
 -- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: Stream s m Char => ParserT s st m Text
+blanklines :: (Stream s m Char, UpdateSourcePos s Char)
+           => ParserT s st m Text
 blanklines = T.pack <$> many1 blankline
 
 -- | Gobble n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleSpaces :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m ()
+             => Int -> ParserT Sources st m ()
 gobbleSpaces 0 = return ()
 gobbleSpaces n
   | n < 0     = error "gobbleSpaces called with negative number"
@@ -473,18 +543,26 @@ gobbleSpaces n
       char ' ' <|> eatOneSpaceOfTab
       gobbleSpaces (n - 1)
 
-eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char
+eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char
 eatOneSpaceOfTab = do
-  char '\t'
+  lookAhead (char '\t')
+  pos <- getPosition
   tabstop <- getOption readerTabStop
+  -- replace the tab on the input stream with spaces
+  let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop)
   inp <- getInput
-  setInput $ T.replicate (tabstop - 1) " " <> inp
-  return ' '
+  setInput $
+    case inp of
+      Sources [] -> error "eatOneSpaceOfTab - empty Sources list"
+      Sources ((fp,t):rest) ->
+        -- drop the tab and add spaces
+        Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest)
+  char ' '
 
 -- | Gobble up to n spaces; if tabs are encountered, expand them
 -- and gobble some or all of their spaces, leaving the rest.
 gobbleAtMostSpaces :: (HasReaderOptions st, Monad m)
-                   => Int -> ParserT Text st m Int
+                   => Int -> ParserT Sources st m Int
 gobbleAtMostSpaces 0 = return 0
 gobbleAtMostSpaces n
   | n < 0     = error "gobbleAtMostSpaces called with negative number"
@@ -493,7 +571,8 @@ gobbleAtMostSpaces n
       (+ 1) <$> gobbleAtMostSpaces (n - 1)
 
 -- | Parses material enclosed between start and end parsers.
-enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser
+enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char)
+         => ParserT s st m t   -- ^ start parser
          -> ParserT s st m end  -- ^ end parser
          -> ParserT s st m a    -- ^ content parser (to be used repeatedly)
          -> ParserT s st m [a]
@@ -501,39 +580,41 @@ enclosed start end parser = try $
   start >> notFollowedBy space >> many1Till parser end
 
 -- | Parse string, case insensitive.
-stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text
+stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char)
+              => Text -> ParserT s st m Text
 stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack
 
-stringAnyCase' :: Stream s m Char => String -> ParserT s st m String
+stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char)
+               => String -> ParserT s st m String
 stringAnyCase' [] = string ""
 stringAnyCase' (x:xs) = do
   firstChar <- char (toUpper x) <|> char (toLower x)
   rest <- stringAnyCase' xs
   return (firstChar:rest)
 
+-- TODO rewrite by just adding to Sources stream?
 -- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: (Stream s m Char, IsString s)
-                => ParserT s st m r
+parseFromString :: Monad m
+                => ParserT Sources st m r
                 -> Text
-                -> ParserT s st m r
+                -> ParserT Sources st m r
 parseFromString parser str = do
   oldPos <- getPosition
-  setPosition $ initialPos " chunk"
+  setPosition $ initialPos "chunk"
   oldInput <- getInput
-  setInput $ fromString $ T.unpack str
+  setInput $ toSources str
   result <- parser
   spaces
-  eof
   setInput oldInput
   setPosition oldPos
   return result
 
 -- | Like 'parseFromString' but specialized for 'ParserState'.
 -- This resets 'stateLastStrPos', which is almost always what we want.
-parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u)
-                 => ParserT s u m a
+parseFromString' :: (Monad m, HasLastStrPosition u)
+                 => ParserT Sources u m a
                  -> Text
-                 -> ParserT s u m a
+                 -> ParserT Sources u m a
 parseFromString' parser str = do
   oldLastStrPos <- getLastStrPos <$> getState
   updateState $ setLastStrPos Nothing
@@ -542,7 +623,7 @@ parseFromString' parser str = do
   return res
 
 -- | Parse raw line block up to and including blank lines.
-lineClump :: Monad m => ParserT Text st m Text
+lineClump :: Monad m => ParserT Sources st m Text
 lineClump = blanklines
           <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine))
 
@@ -551,7 +632,7 @@ lineClump = blanklines
 -- pairs of open and close, which must be different. For example,
 -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
 -- and return "hello (there)".
-charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char
+charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char
                 -> ParserT s st m Text
 charsInBalanced open close parser = try $ do
   char open
@@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do
 -- Auxiliary functions for romanNumeral:
 
 -- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Stream s m Char => Bool                  -- ^ Uppercase if true
+romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool                  -- ^ Uppercase if true
              -> ParserT s st m Int
 romanNumeral upperCase = do
     let rchar uc = char $ if upperCase then uc else toLower uc
@@ -606,7 +687,7 @@ romanNumeral upperCase = do
 
 -- | Parses an email address; returns original and corresponding
 -- escaped mailto: URI.
-emailAddress :: Stream s m Char => ParserT s st m (Text, Text)
+emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
  where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom
                            in  (full, escapeURI $ "mailto:" <> full)
@@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        isEmailPunct c    = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;"
 
 
-uriScheme :: Stream s m Char => ParserT s st m Text
+uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text
 uriScheme = oneOfStringsCI (Set.toList schemes)
 
 -- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: Stream s m Char => ParserT s st m (Text, Text)
+uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text)
 uri = try $ do
   scheme <- uriScheme
   char ':'
@@ -677,7 +758,7 @@ uri = try $ do
     uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk
                                    return (T.pack $ [l] ++ chunk ++ [r])
 
-mathInlineWith :: Stream s m Char  => Text -> Text -> ParserT s st m Text
+mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char)  => Text -> Text -> ParserT s st m Text
 mathInlineWith op cl = try $ do
   textStr op
   when (op == "$") $ notFollowedBy space
@@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do
   notFollowedBy digit  -- to prevent capture of $5
   return $ trimMath $ T.concat words'
  where
-  inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text
+  inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text
   inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack
 
-  inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String
+  inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String
   inBalancedBraces' 0 "" = do
     c <- anyChar
     if c == '{'
@@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do
          '{' -> inBalancedBraces' (numOpen + 1) (c:xs)
          _   -> inBalancedBraces' numOpen (c:xs)
 
-mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text
+mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text
 mathDisplayWith op cl = try $ fmap T.pack $ do
   textStr op
   many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline))
             (try $ textStr cl)
 
-mathDisplay :: (HasReaderOptions st, Stream s m Char)
+mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m Text
 mathDisplay =
       (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$")
@@ -733,7 +814,7 @@ mathDisplay =
   <|> (guardEnabled Ext_tex_math_double_backslash >>
        mathDisplayWith "\\\\[" "\\\\]")
 
-mathInline :: (HasReaderOptions st , Stream s m Char)
+mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m Text
 mathInline =
       (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$")
@@ -746,7 +827,7 @@ mathInline =
 -- displacement (the difference between the source column at the end
 -- and the source column at the beginning). Vertical displacement
 -- (source row) is ignored.
-withHorizDisplacement :: Stream s m Char
+withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char)
                       => ParserT s st m a  -- ^ Parser to apply
                       -> ParserT s st m (a, Int) -- ^ (result, displacement)
 withHorizDisplacement parser = do
@@ -758,30 +839,37 @@ withHorizDisplacement parser = do
 -- | Applies a parser and returns the raw string that was parsed,
 -- along with the value produced by the parser.
 withRaw :: Monad m
-        => ParsecT Text st m a
-        -> ParsecT Text st m (a, Text)
+        => ParsecT Sources st m a
+        -> ParsecT Sources st m (a, Text)
 withRaw parser = do
-  pos1 <- getPosition
-  inp <- getInput
+  inps1 <- getInput
   result <- parser
-  pos2 <- getPosition
-  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
-  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
-  let inplines = take ((l2 - l1) + 1) $ T.lines inp
-  let raw = case inplines of
-                []  -> ""
-                [l] -> T.take (c2 - c1) l
-                ls  -> T.unlines (init ls) <> T.take (c2 - 1) (last ls)
-  return (result, raw)
+  inps2 <- getInput
+  -- 'raw' is the difference between inps1 and inps2
+  return (result, sourcesDifference inps1 inps2)
+
+sourcesDifference :: Sources -> Sources -> Text
+sourcesDifference (Sources is1) (Sources is2) = go is1 is2
+ where
+   go inps1 inps2 =
+    case (inps1, inps2) of
+      ([], _) -> mempty
+      (_, []) -> mconcat $ map snd inps1
+      ((p1,t1):rest1, (p2, t2):rest2)
+        | p1 == p2
+        , t1 == t2  -> go rest1 rest2
+        | p1 == p2
+        , t1 /= t2  -> fromMaybe mempty $ T.stripSuffix t2 t1
+        | otherwise -> t1 <> go rest1 inps2
 
 -- | Parses backslash, then applies character parser.
-escaped :: Stream s m Char
+escaped :: (Stream s m Char, UpdateSourcePos s Char)
         => ParserT s st m Char  -- ^ Parser for character to escape
         -> ParserT s st m Char
 escaped parser = try $ char '\\' >> parser
 
 -- | Parse character entity.
-characterReference :: Stream s m Char => ParserT s st m Char
+characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 characterReference = try $ do
   char '&'
   ent <- many1Till nonspaceChar (char ';')
@@ -794,19 +882,19 @@ characterReference = try $ do
        _            -> Prelude.fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperRoman = do
   num <- romanNumeral True
   return (UpperRoman, num)
 
 -- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerRoman = do
   num <- romanNumeral False
   return (LowerRoman, num)
 
 -- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 decimal = do
   num <- many1 digit
   return (Decimal, fromMaybe 1 $ safeRead $ T.pack num)
@@ -815,7 +903,7 @@ decimal = do
 -- returns (DefaultStyle, [next example number]).  The next
 -- example number is incremented in parser state, and the label
 -- (if present) is added to the label table.
-exampleNum :: Stream s m Char
+exampleNum :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s ParserState m (ListNumberStyle, Int)
 exampleNum = do
   char '@'
@@ -834,37 +922,37 @@ exampleNum = do
   return (Example, num)
 
 -- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 defaultNum = do
   char '#'
   return (DefaultStyle, 1)
 
 -- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 lowerAlpha = do
   ch <- satisfy isAsciiLower
   return (LowerAlpha, ord ch - ord 'a' + 1)
 
 -- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 upperAlpha = do
   ch <- satisfy isAsciiUpper
   return (UpperAlpha, ord ch - ord 'A' + 1)
 
 -- | Parses a roman numeral i or I
-romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int)
+romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int)
 romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
            (char 'I' >> return (UpperRoman, 1))
 
 -- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes
+anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes
 anyOrderedListMarker = choice
   [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
                            numParser <- [decimal, exampleNum, defaultNum, romanOne,
                            lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
 
 -- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: Stream s m Char
+inPeriod :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m (ListNumberStyle, Int)
          -> ParserT s st m ListAttributes
 inPeriod num = try $ do
@@ -876,7 +964,7 @@ inPeriod num = try $ do
   return (start, style, delim)
 
 -- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: Stream s m Char
+inOneParen :: (Stream s m Char, UpdateSourcePos s Char)
            => ParserT s st m (ListNumberStyle, Int)
            -> ParserT s st m ListAttributes
 inOneParen num = try $ do
@@ -885,7 +973,7 @@ inOneParen num = try $ do
   return (start, style, OneParen)
 
 -- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: Stream s m Char
+inTwoParens :: (Stream s m Char, UpdateSourcePos s Char)
             => ParserT s st m (ListNumberStyle, Int)
             -> ParserT s st m ListAttributes
 inTwoParens num = try $ do
@@ -896,7 +984,7 @@ inTwoParens num = try $ do
 
 -- | Parses an ordered list marker with a given style and delimiter,
 -- returns number.
-orderedListMarker :: Stream s m Char
+orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char)
                   => ListNumberStyle
                   -> ListNumberDelim
                   -> ParserT s ParserState m Int
@@ -919,10 +1007,10 @@ orderedListMarker style delim = do
   return start
 
 -- | Parses a character reference and returns a Str element.
-charRef :: Stream s m Char => ParserT s st m Inline
+charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline
 charRef = Str . T.singleton <$> characterReference
 
-lineBlockLine :: Monad m => ParserT Text st m Text
+lineBlockLine :: Monad m => ParserT Sources st m Text
 lineBlockLine = try $ do
   char '|'
   char ' '
@@ -932,11 +1020,11 @@ lineBlockLine = try $ do
   continuations <- many (try $ char ' ' >> anyLine)
   return $ white <> T.unwords (line : continuations)
 
-blankLineBlockLine :: Stream s m Char => ParserT s st m Char
+blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char
 blankLineBlockLine = try (char '|' >> blankline)
 
 -- | Parses an RST-style line block and returns a list of strings.
-lineBlockLines :: Monad m => ParserT Text st m [Text]
+lineBlockLines :: Monad m => ParserT Sources st m [Text]
 lineBlockLines = try $ do
   lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine))
   skipMany blankline
@@ -944,7 +1032,8 @@ lineBlockLines = try $ do
 
 -- | Parse a table using 'headerParser', 'rowParser',
 -- 'lineParser', and 'footerParser'.
-tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
           => ParserT s st m (mf [Blocks], [Alignment], [Int])
           -> ([Int] -> ParserT s st m (mf [Blocks]))
           -> ParserT s st m sep
@@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do
 
 type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row])
 
-tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf)
+tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
            => ParserT s st m (mf [Blocks], [Alignment], [Int])
            -> ([Int] -> ParserT s st m (mf [Blocks]))
            -> ParserT s st m sep
@@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices =
 -- (which may be grid), then the rows,
 -- which may be grid, separated by blank lines, and
 -- ending with a footer (dashed line followed by blank line).
-gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                  Monad mf, IsString s)
-              => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf)
+              => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
               -> Bool                        -- ^ Headerless table
-              -> ParserT s st m (mf Blocks)
+              -> ParserT Sources st m (mf Blocks)
 gridTableWith blocks headless =
   tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
             (gridTableSep '-') gridTableFooter
 
-gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st,
-                   Monad mf, IsString s)
-               => ParserT s st m (mf Blocks)  -- ^ Block list parser
+gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st,
+                   Monad mf)
+               => ParserT Sources st m (mf Blocks)  -- ^ Block list parser
                -> Bool                        -- ^ Headerless table
-               -> ParserT s st m (TableComponents mf)
+               -> ParserT Sources st m (TableComponents mf)
 gridTableWith' blocks headless =
   tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
              (gridTableSep '-') gridTableFooter
@@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
   splitTextByIndices (init indices) $ trimr line
 
-gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
+gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment)
 gridPart ch = do
   leftColon <- option False (True <$ char ':')
   dashes <- many1 (char ch)
@@ -1050,7 +1139,7 @@ gridPart ch = do
                        (False, False) -> AlignDefault
   return ((lengthDashes, lengthDashes + 1), alignment)
 
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
+gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)]
 gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
 
 removeFinalBar :: Text -> Text
@@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|')
     go c = T.any (== c) " \t"
 
 -- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
+gridTableSep :: Monad m => Char -> ParserT Sources st m Char
 gridTableSep ch = try $ gridDashedLines ch >> return '\n'
 
 -- | Parse header for a grid table.
-gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
+gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st)
                 => Bool -- ^ Headerless table
-                -> ParserT s st m (mf Blocks)
-                -> ParserT s st m (mf [Blocks], [Alignment], [Int])
+                -> ParserT Sources st m (mf Blocks)
+                -> ParserT Sources st m (mf [Blocks], [Alignment], [Int])
 gridTableHeader True _ = do
   optional blanklines
   dashes <- gridDashedLines '-'
@@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do
   heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads
   return (heads, aligns, indices)
 
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text]
+gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text]
 gridTableRawLine indices = do
   char '|'
   line <- many1Till anyChar newline
   return (gridTableSplitLine indices $ T.pack line)
 
 -- | Parse row of grid table.
-gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st)
-             => ParserT s st m (mf Blocks)
+gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st)
+             => ParserT Sources st m (mf Blocks)
              -> [Int]
-             -> ParserT s st m (mf [Blocks])
+             -> ParserT Sources st m (mf [Blocks])
 gridTableRow blocks indices = do
   colLines <- many1 (gridTableRawLine indices)
   let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $
@@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs =
            Just (c, _) -> c == ' '
 
 -- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s st m ()
+gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 gridTableFooter = optional blanklines
 
 ---
 
 -- | Removes the ParsecT layer from the monad transformer stack
-readWithM :: Monad m
-          => ParserT Text st m a -- ^ parser
-          -> st                  -- ^ initial state
-          -> Text                -- ^ input
+readWithM :: (Monad m, ToSources t)
+          => ParserT Sources st m a  -- ^ parser
+          -> st                      -- ^ initial state
+          -> t                       -- ^ input
           -> m (Either PandocError a)
 readWithM parser state input =
-    mapLeft (PandocParsecError input) <$> runParserT parser state "source" input
+    mapLeft (PandocParsecError sources)
+      <$> runParserT parser state (initialSourceName sources) sources
+ where
+   sources = toSources input
 
 -- | Parse a string with a given parser and state
-readWith :: Parser Text st a
+readWith :: ToSources t
+         => Parser Sources st a
          -> st
-         -> Text
+         -> t
          -> Either PandocError a
 readWith p t inp = runIdentity $ readWithM p t inp
 
 -- | Parse a string with @parser@ (for testing).
 testStringWith :: Show a
-               => ParserT Text ParserState Identity a
+               => ParserT Sources ParserState Identity a
                -> Text
                -> IO ()
 testStringWith parser str = UTF8.putStrLn $ tshow $
-                            readWith parser defaultParserState str
+                            readWith parser defaultParserState (toSources str)
 
 -- | Parsing options.
 data ParserState = ParserState
@@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do
           updateState $ updateIdentifierList $ Set.insert ident
         return (ident,classes,kvs)
 
-smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
+                     HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m Inlines
                  -> ParserT s st m Inlines
 smartPunctuation inlineParser = do
   guardEnabled Ext_smart
   choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ]
 
-quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+quoted :: (HasLastStrPosition st, HasQuoteContext st m,
+           Stream s m Char, UpdateSourcePos s Char)
        => ParserT s st m Inlines
        -> ParserT s st m Inlines
 quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
 
-singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 singleQuoted inlineParser = do
@@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do
      (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd)))
    <|> pure "\8217"
 
-doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char)
+doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
+                 Stream s m Char, UpdateSourcePos s Char)
              => ParserT s st m Inlines
              -> ParserT s st m Inlines
 doubleQuoted inlineParser = do
@@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do
   context' <- getQuoteContext
   when (context' == context) $ Prelude.fail "already inside quotes"
 
-charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char
+charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char
 charOrRef cs =
   oneOf cs <|> try (do c <- characterReference
                        guard (c `elem` cs)
                        return c)
 
-singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char)
+singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
@@ -1449,7 +1548,7 @@ singleQuoteStart = do
     charOrRef "'\8216\145"
     void $ lookAhead (satisfy (not . isSpaceChar))
 
-singleQuoteEnd :: Stream s m Char
+singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 singleQuoteEnd = try $ do
   charOrRef "'\8217\146"
@@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do
 
 doubleQuoteStart :: (HasLastStrPosition st,
                      HasQuoteContext st m,
-                     Stream s m Char)
+                     Stream s m Char, UpdateSourcePos s Char)
                  => ParserT s st m ()
 doubleQuoteStart = do
   failIfInQuoteContext InDoubleQuote
@@ -1465,21 +1564,21 @@ doubleQuoteStart = do
   try $ do charOrRef "\"\8220\147"
            void $ lookAhead (satisfy (not . isSpaceChar))
 
-doubleQuoteEnd :: Stream s m Char
+doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
                => ParserT s st m ()
 doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
-apostrophe :: Stream s m Char => ParserT s st m Inlines
+apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217")
 
-doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines
+doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines
 doubleCloseQuote = B.str "\8221" <$ char '"'
 
-ellipses :: Stream s m Char
+ellipses :: (Stream s m Char, UpdateSourcePos s Char)
          => ParserT s st m Inlines
 ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: (HasReaderOptions st, Stream s m Char)
+dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
      => ParserT s st m Inlines
 dash = try $ do
   oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions
@@ -1506,7 +1605,7 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
-citeKey :: (Stream s m Char, HasLastStrPosition st)
+citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
         => ParserT s st m (Bool, Text)
 citeKey = try $ do
   guard =<< notAfterString
@@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
 insertIncludedFile blocks totoks dirs f =
   runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
 
+-- TODO: replace this with something using addToSources.
 -- | Parse content of include file as future blocks. Circular includes result in
 -- an @PandocParseError@.
 insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Text st m (Future st Blocks)
+                    => ParserT Sources st m (Future st Blocks)
                     -> [FilePath] -> FilePath
-                    -> ParserT Text st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p id
+                    -> ParserT Sources st m (Future st Blocks)
+insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs
index 7ae9db34f..5106f8058 100644
--- a/src/Text/Pandoc/Readers.hs
+++ b/src/Text/Pandoc/Readers.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE MonoLocalBinds      #-}
+{-# LANGUAGE RankNTypes          #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {- |
@@ -72,6 +73,7 @@ import Text.Pandoc.Error
 import Text.Pandoc.Extensions
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.CommonMark
+import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.Creole
 import Text.Pandoc.Readers.DocBook
 import Text.Pandoc.Readers.Docx
@@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.JATS (readJATS)
 import Text.Pandoc.Readers.Jira (readJira)
 import Text.Pandoc.Readers.LaTeX
-import Text.Pandoc.Readers.Markdown
 import Text.Pandoc.Readers.MediaWiki
 import Text.Pandoc.Readers.Muse
 import Text.Pandoc.Readers.Native
@@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV
 import Text.Pandoc.Readers.CslJson
 import Text.Pandoc.Readers.BibTeX
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
+data Reader m = TextReader (forall a . ToSources a =>
+                                ReaderOptions -> a -> m Pandoc)
               | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)
 
 -- | Association list of formats and readers.
 readers :: PandocMonad m => [(Text, Reader m)]
-readers = [ ("native"       , TextReader readNative)
-           ,("json"         , TextReader readJSON)
-           ,("markdown"     , TextReader readMarkdown)
-           ,("markdown_strict" , TextReader readMarkdown)
-           ,("markdown_phpextra" , TextReader readMarkdown)
-           ,("markdown_github" , TextReader readMarkdown)
-           ,("markdown_mmd",  TextReader readMarkdown)
-           ,("commonmark"   , TextReader readCommonMark)
-           ,("commonmark_x" , TextReader readCommonMark)
-           ,("creole"       , TextReader readCreole)
-           ,("dokuwiki"     , TextReader readDokuWiki)
-           ,("gfm"          , TextReader readCommonMark)
-           ,("rst"          , TextReader readRST)
-           ,("mediawiki"    , TextReader readMediaWiki)
-           ,("vimwiki"      , TextReader readVimwiki)
-           ,("docbook"      , TextReader readDocBook)
-           ,("opml"         , TextReader readOPML)
-           ,("org"          , TextReader readOrg)
-           ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
-           ,("html"         , TextReader readHtml)
-           ,("jats"         , TextReader readJATS)
-           ,("jira"         , TextReader readJira)
-           ,("latex"        , TextReader readLaTeX)
-           ,("haddock"      , TextReader readHaddock)
-           ,("twiki"        , TextReader readTWiki)
-           ,("tikiwiki"     , TextReader readTikiWiki)
-           ,("docx"         , ByteStringReader readDocx)
-           ,("odt"          , ByteStringReader readOdt)
-           ,("t2t"          , TextReader readTxt2Tags)
-           ,("epub"         , ByteStringReader readEPUB)
-           ,("muse"         , TextReader readMuse)
-           ,("man"          , TextReader readMan)
-           ,("fb2"          , TextReader readFB2)
-           ,("ipynb"        , TextReader readIpynb)
-           ,("csv"          , TextReader readCSV)
-           ,("csljson"      , TextReader readCslJson)
-           ,("bibtex"       , TextReader readBibTeX)
-           ,("biblatex"     , TextReader readBibLaTeX)
+readers = [("native"       , TextReader readNative)
+          ,("json"         , TextReader readJSON)
+          ,("markdown"     , TextReader readMarkdown)
+          ,("markdown_strict" , TextReader readMarkdown)
+          ,("markdown_phpextra" , TextReader readMarkdown)
+          ,("markdown_github" , TextReader readMarkdown)
+          ,("markdown_mmd",  TextReader readMarkdown)
+          ,("commonmark"   , TextReader readCommonMark)
+          ,("commonmark_x" , TextReader readCommonMark)
+          ,("creole"       , TextReader readCreole)
+          ,("dokuwiki"     , TextReader readDokuWiki)
+          ,("gfm"          , TextReader readCommonMark)
+          ,("rst"          , TextReader readRST)
+          ,("mediawiki"    , TextReader readMediaWiki)
+          ,("vimwiki"      , TextReader readVimwiki)
+          ,("docbook"      , TextReader readDocBook)
+          ,("opml"         , TextReader readOPML)
+          ,("org"          , TextReader readOrg)
+          ,("textile"      , TextReader readTextile) -- TODO : textile+lhs
+          ,("html"         , TextReader readHtml)
+          ,("jats"         , TextReader readJATS)
+          ,("jira"         , TextReader readJira)
+          ,("latex"        , TextReader readLaTeX)
+          ,("haddock"      , TextReader readHaddock)
+          ,("twiki"        , TextReader readTWiki)
+          ,("tikiwiki"     , TextReader readTikiWiki)
+          ,("docx"         , ByteStringReader readDocx)
+          ,("odt"          , ByteStringReader readOdt)
+          ,("t2t"          , TextReader readTxt2Tags)
+          ,("epub"         , ByteStringReader readEPUB)
+          ,("muse"         , TextReader readMuse)
+          ,("man"          , TextReader readMan)
+          ,("fb2"          , TextReader readFB2)
+          ,("ipynb"        , TextReader readIpynb)
+          ,("csv"          , TextReader readCSV)
+          ,("csljson"      , TextReader readCslJson)
+          ,("bibtex"       , TextReader readBibTeX)
+          ,("biblatex"     , TextReader readBibLaTeX)
            ]
 
 -- | Retrieve reader, extensions based on formatSpec (format+extensions).
@@ -173,9 +176,13 @@ getReader s =
                      return (r, exts)
 
 -- | Read pandoc document from JSON format.
-readJSON :: PandocMonad m
-         => ReaderOptions -> Text -> m Pandoc
-readJSON _ t =
-  case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of
+readJSON :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
+readJSON _ s =
+  case eitherDecode' . BL.fromStrict . UTF8.fromText
+                     . sourcesToText . toSources $ s of
        Right doc -> return doc
-       Left e    -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e)
+       Left e    -> throwError $ PandocParseError ("JSON parse error: "
+                                                   <> T.pack e)
diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs
index b82a81350..318afda85 100644
--- a/src/Text/Pandoc/Readers/BibTeX.hs
+++ b/src/Text/Pandoc/Readers/BibTeX.hs
@@ -23,30 +23,33 @@ where
 import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
-import Data.Text (Text)
 import Citeproc (Lang(..), parseLang)
 import Citeproc.Locale (getLocale)
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad, lookupEnv)
 import Text.Pandoc.Citeproc.BibTeX as BibTeX
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
+import Text.Pandoc.Sources (ToSources(..))
 import Control.Monad.Except (throwError)
 
 -- | Read BibTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibTeX :: (PandocMonad m, ToSources a)
+           => ReaderOptions -> a -> m Pandoc
 readBibTeX = readBibTeX' BibTeX.Bibtex
 
 -- | Read BibLaTeX from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readBibLaTeX :: (PandocMonad m, ToSources a)
+             => ReaderOptions -> a -> m Pandoc
 readBibLaTeX = readBibTeX' BibTeX.Biblatex
 
-readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc
+readBibTeX' :: (PandocMonad m, ToSources a)
+            => Variant -> ReaderOptions -> a -> m Pandoc
 readBibTeX' variant _opts t = do
   mblangEnv <- lookupEnv "LANG"
   let defaultLang = Lang "en" Nothing (Just "US") [] [] []
@@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do
                    Left _  -> throwError $ PandocCiteprocError e
                Right l -> return l
   case BibTeX.readBibtexString variant locale (const True) t of
-    Left e -> throwError $ PandocParsecError t e
+    Left e -> throwError $ PandocParsecError (toSources t) e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
                          . setMeta "nocite"
diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs
index 2958d6180..eca8f9425 100644
--- a/src/Text/Pandoc/Readers/CSV.hs
+++ b/src/Text/Pandoc/Readers/CSV.hs
@@ -13,23 +13,23 @@
 Conversion from CSV to a 'Pandoc' table.
 -}
 module Text.Pandoc.Readers.CSV ( readCSV ) where
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.Options (ReaderOptions)
 import Control.Monad.Except (throwError)
 
-readCSV :: PandocMonad m
+readCSV :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
-readCSV _opts s =
-  case parseCSV defaultCSVOptions (crFilter s) of
+readCSV _opts s = do
+  let txt = sourcesToText $ toSources s
+  case parseCSV defaultCSVOptions txt of
     Right (r:rs) -> return $ B.doc $ B.table capt
                                              (zip aligns widths)
                                              (TableHead nullAttr hdrs)
@@ -45,4 +45,4 @@ readCSV _opts s =
              aligns = replicate numcols AlignDefault
              widths = replicate numcols ColWidthDefault
     Right []     -> return $ B.doc mempty
-    Left e       -> throwError $ PandocParsecError s e
+    Left e       -> throwError $ PandocParsecError (toSources [("",txt)]) e
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 244f77940..b099a9b50 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine,
-                            runF, defaultParserState, take1WhileP, option)
+import Text.Pandoc.Parsing (runParserT, getPosition,
+                            runF, defaultParserState, option, many1, anyChar,
+                            Sources(..), ToSources(..), ParserT, Future,
+                            sourceName)
 import qualified Data.Text as T
 
 -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readCommonMark :: (PandocMonad m, ToSources a)
+               => ReaderOptions -> a -> m Pandoc
 readCommonMark opts s
-  | isEnabled Ext_yaml_metadata_block opts
-  , "---" `T.isPrefixOf` s = do
-       let metaValueParser = do
-             inp <- option "" $ take1WhileP (const True)
-             case runIdentity
-                    (commonmarkWith (specFor opts) "metadata value" inp) of
-                    Left _ -> mzero
-                    Right (Cm bls :: Cm () Blocks)
-                             -> return $ return $ B.toMetaValue bls
-       res <- runParserT (do meta <- yamlMetaBlock metaValueParser
-                             pos <- getPosition
-                             return (meta, pos))
-                         defaultParserState "YAML metadata" s
-       case res of
-         Left _ -> readCommonMarkBody opts s
-         Right (meta, pos) -> do
-           let dropLines 0 = id
-               dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n')
-           let metaLines = sourceLine pos - 1
-           let body = T.replicate metaLines "\n" <> dropLines metaLines s
-           Pandoc _ bs <- readCommonMarkBody opts body
-           return $ Pandoc (runF meta defaultParserState) bs
-  | otherwise = readCommonMarkBody opts s
+  | isEnabled Ext_yaml_metadata_block opts = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
+                          pos <- getPosition
+                          return (meta, pos))
+                      defaultParserState "YAML metadata" (toSources s)
+    case res of
+      Left _ -> readCommonMarkBody opts sources toks
+      Right (meta, pos) -> do
+        -- strip off metadata section and parse body
+        let body = dropWhile (\t -> tokPos t < pos) toks
+        Pandoc _ bs <- readCommonMarkBody opts sources body
+        return $ Pandoc (runF meta defaultParserState) bs
+  | otherwise = do
+    let sources = toSources s
+    let toks = concatMap sourceToToks (unSources sources)
+    readCommonMarkBody opts sources toks
 
-readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCommonMarkBody opts s
+sourceToToks :: (SourcePos, Text) -> [Tok]
+sourceToToks (pos, s) = tokenize (sourceName pos) s
+
+metaValueParser :: Monad m
+                => ReaderOptions -> ParserT Sources st m (Future st MetaValue)
+metaValueParser opts = do
+  inp <- option "" $ T.pack <$> many1 anyChar
+  let toks = concatMap sourceToToks (unSources (toSources inp))
+  case runIdentity (parseCommonmarkWith (specFor opts) toks) of
+     Left _ -> mzero
+     Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
+
+readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
+readCommonMarkBody opts s toks
   | isEnabled Ext_sourcepos opts =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
   | otherwise =
-    case runIdentity (commonmarkWith (specFor opts) "" s) of
+    case runIdentity (parseCommonmarkWith (specFor opts) toks) of
       Left err -> throwError $ PandocParsecError s err
       Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
 
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index 2658dfea2..ad848ada7 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
-import Text.Pandoc.Shared (crFilter)
-
 
 -- | Read creole from an input string and return a Pandoc document.
-readCreole :: PandocMonad m
+readCreole :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readCreole opts s = do
-  res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n"
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseCreole def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type CRLParser = ParserT Text ParserState
+type CRLParser = ParserT Sources ParserState
 
 --
 -- Utility functions
diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs
index 30bb19483..a0af5c325 100644
--- a/src/Text/Pandoc/Readers/CslJson.hs
+++ b/src/Text/Pandoc/Readers/CslJson.hs
@@ -24,21 +24,22 @@ import Text.Pandoc.Options
 import Text.Pandoc.Definition
 import Text.Pandoc.Builder (setMeta, cite, str)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
 import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue)
 import Control.Monad.Except (throwError)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read CSL JSON from an input string and return a Pandoc document.
 -- The document will have only metadata, with an empty body.
 -- The metadata will contain a `references` field with the
 -- bibliography entries, and a `nocite` field with the wildcard `[@*]`.
-readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readCslJson _opts t =
-  case cslJsonToReferences (UTF8.fromText t) of
+readCslJson :: (PandocMonad m, ToSources a)
+            => ReaderOptions -> a -> m Pandoc
+readCslJson _opts x =
+  case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of
     Left e -> throwError $ PandocParseError $ T.pack e
     Right refs -> return $ setMeta "references"
                               (map referenceToMetaValue refs)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ac3caa2c0..3db459cfd 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -30,7 +30,8 @@ import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 
@@ -539,11 +540,15 @@ instance Default DBState where
                , dbContent = [] }
 
 
-readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readDocBook _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
             parseXMLContents
-              (TL.fromStrict . handleInstructions $ crFilter inp)
+              (TL.fromStrict . handleInstructions . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index dedc1f03f..db98ac8de 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -29,26 +29,27 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, stringify, tshow)
+import Text.Pandoc.Shared (trim, stringify, tshow)
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
-readDokuWiki :: PandocMonad m
+readDokuWiki :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readDokuWiki opts s = do
-  let input = crFilter s
-  res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
+  let sources = toSources s
+  res <- runParserT parseDokuWiki def {stateOptions = opts }
+           (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
-type DWParser = ParserT Text ParserState
+type DWParser = ParserT Sources ParserState
 
 -- * Utility functions
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 nested :: PandocMonad m => DWParser m a -> DWParser m a
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 66e390bd7..84e5278db 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report)
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter)
 import Text.Pandoc.XML.Light
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type FB2 m = StateT FB2State m
 
@@ -63,9 +63,12 @@ instance HasMeta FB2State where
   setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)}
   deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)}
 
-readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readFB2 :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
 readFB2 _ inp =
-  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+  case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of
     Left msg -> throwError $ PandocXMLError "" msg
     Right el ->  do
       (bs, st) <- runStateT (parseRootElement el) def
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c3e68afd8..f5c8a2277 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -62,21 +62,21 @@ import Text.Pandoc.Options (
     extensionEnabled)
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Shared (
-    addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces,
+    addMetaField, blocksToInlines', escapeURI, extractSpaces,
     htmlSpanLikeElements, renderTags', safeRead, tshow)
 import Text.Pandoc.Walk
 import Text.Parsec.Error
 import Text.TeXMath (readMathML, writeTeX)
 
 -- | Convert HTML-formatted string to 'Pandoc' document.
-readHtml :: PandocMonad m
+readHtml :: (PandocMonad m, ToSources a)
          => ReaderOptions -- ^ Reader options
-         -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+         -> a             -- ^ Input to parse
          -> m Pandoc
 readHtml opts inp = do
   let tags = stripPrefixes $ canonicalizeTags $
              parseTagsOptions parseOptions{ optTagPosition = True }
-             (crFilter inp)
+             (sourcesToText $ toSources inp)
       parseDoc = do
         blocks <- fixPlains False . mconcat <$> manyTill block eof
         meta <- stateMeta . parserState <$> getState
@@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
 
 pTagText :: PandocMonad m => TagParser m Inlines
 pTagText = try $ do
+  pos <- getPosition
   (TagText str) <- pSatisfy isTagText
   st <- getState
   qu <- ask
   parsed <- lift $ lift $
-            flip runReaderT qu $ runParserT (many pTagContents) st "text" str
+            flip runReaderT qu $ runParserT (many pTagContents) st "text"
+               (Sources [(pos, str)])
   case parsed of
        Left _        -> throwError $ PandocParseError $
                         "Could not parse `" <> str <> "'"
        Right result  -> return $ mconcat result
 
-type InlinesParser m = HTMLParser m Text
+type InlinesParser m = HTMLParser m Sources
 
 pTagContents :: PandocMonad m => InlinesParser m Inlines
 pTagContents =
@@ -970,13 +972,14 @@ isCommentTag = tagComment (const True)
 -- | Matches a stretch of HTML in balanced tags.
 htmlInBalanced :: Monad m
                => (Tag Text -> Bool)
-               -> ParserT Text st m Text
+               -> ParserT Sources st m Text
 htmlInBalanced f = try $ do
   lookAhead (char '<')
-  inp <- getInput
-  let ts = canonicalizeTags $
-        parseTagsOptions parseOptions{ optTagWarning = True,
-                                       optTagPosition = True } inp
+  sources <- getInput
+  let ts = canonicalizeTags
+        $ parseTagsOptions parseOptions{ optTagWarning = True,
+                                         optTagPosition = True }
+        $ sourcesToText sources
   case ts of
     (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do
        guard $ f t
@@ -1018,15 +1021,17 @@ hasTagWarning _                = False
 -- | Matches a tag meeting a certain condition.
 htmlTag :: (HasReaderOptions st, Monad m)
         => (Tag Text -> Bool)
-        -> ParserT Text st m (Tag Text, Text)
+        -> ParserT Sources st m (Tag Text, Text)
 htmlTag f = try $ do
   lookAhead (char '<')
   startpos <- getPosition
-  inp <- getInput
+  sources <- getInput
+  let inp = sourcesToText sources
   let ts = canonicalizeTags $ parseTagsOptions
                                parseOptions{ optTagWarning = False
                                            , optTagPosition = True }
-                               (inp <> " ") -- add space to ensure that
+                               (inp <> " ")
+                               -- add space to ensure that
                                -- we get a TagPosition after the tag
   (next, ln, col) <- case ts of
                       (TagPosition{} : next : TagPosition ln col : _)
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 48454e353..35eaac0a9 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -19,7 +19,7 @@ import Control.Monad.Except (throwError)
 import Data.List (intersperse)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (fromMaybe)
-import Data.Text (Text, unpack)
+import Data.Text (unpack)
 import qualified Data.Text as T
 import Documentation.Haddock.Parser
 import Documentation.Haddock.Types as H
@@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
+import Text.Pandoc.Shared (splitTextBy, trim)
 
 
 -- | Parse Haddock markup and return a 'Pandoc' document.
-readHaddock :: PandocMonad m
+readHaddock :: (PandocMonad m, ToSources a)
             => ReaderOptions
-            -> Text
+            -> a
             -> m Pandoc
-readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of
+readHaddock opts s = case readHaddockEither opts
+                           (unpack . sourcesToText . toSources $ s) of
   Right result -> return result
   Left e       -> throwError e
 
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index 70296bb6b..cd1093109 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -39,10 +39,12 @@ import Data.Aeson as Aeson
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
-readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-readIpynb opts t = do
-  let src = BL.fromStrict (TE.encodeUtf8 t)
+readIpynb :: (PandocMonad m, ToSources a)
+          => ReaderOptions -> a -> m Pandoc
+readIpynb opts x = do
+  let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x
   case eitherDecode src of
     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
     Left _ ->
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c068f3774..9cdbf1611 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
+import Text.Pandoc.Shared (safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 type JATS m = StateT JATSState m
 
@@ -52,10 +53,14 @@ instance Default JATSState where
                  , jatsContent = [] }
 
 
-readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readJATS :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readJATS _ inp = do
+  let sources = toSources inp
   tree <- either (throwError . PandocXMLError "") return $
-            parseXMLContents (TL.fromStrict $ crFilter inp)
+            parseXMLContents (TL.fromStrict . sourcesToText $ sources)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index 89aecbf56..a3b415f09 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell)
 import Text.Pandoc.Error (PandocError (PandocParseError))
 import Text.Pandoc.Options (ReaderOptions)
 import Text.Pandoc.Shared (stringify)
-
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import qualified Text.Jira.Markup as Jira
 
 -- | Read Jira wiki markup.
-readJira :: PandocMonad m
+readJira :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
-readJira _opts s = case parse s of
-  Right d -> return $ jiraToPandoc d
-  Left e  -> throwError . PandocParseError $
-             "Jira parse error" `append` pack (show e)
+readJira _opts inp = do
+  let sources = toSources inp
+  case parse (sourcesToText sources) of
+    Right d -> return $ jiraToPandoc d
+    Left e  -> throwError . PandocParseError $
+               "Jira parse error" `append` pack (show e)
 
 jiraToPandoc :: Jira.Doc -> Pandoc
 jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9ad168293..f90d562ae 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty)
 -- import Debug.Trace (traceShowId)
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
-readLaTeX :: PandocMonad m
+readLaTeX :: (PandocMonad m, ToSources a)
           => ReaderOptions -- ^ Reader options
-          -> Text        -- ^ String to parse (assumes @'\n'@ line endings)
+          -> a             -- ^ Input to parse
           -> m Pandoc
 readLaTeX opts ltx = do
+  let sources = toSources ltx
   parsed <- runParserT parseLaTeX def{ sOptions = opts } "source"
-               (tokenize "source" (crFilter ltx))
+               (tokenizeSources sources)
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError ltx e
+    Left e       -> throwError $ PandocParsecError sources e
 
 parseLaTeX :: PandocMonad m => LP m Pandoc
 parseLaTeX = do
@@ -132,11 +133,11 @@ resolveRefs _ x = x
 
 
 rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-              => ParserT Text s m Text
+              => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -163,11 +164,11 @@ beginOrEndCommand = try $ do
                     (txt <> untokenize rawargs)
 
 rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-               => ParserT Text s m Text
+               => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -178,11 +179,11 @@ rawLaTeXInline = do
   finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
   return $ raw <> T.pack finalbraces
 
-inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
   inp <- getInput
-  let toks = tokenize "source" inp
+  let toks = tokenizeSources inp
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
@@ -641,7 +642,7 @@ opt = do
   parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks
   case parsed of
     Right result -> return result
-    Left e       -> throwError $ PandocParsecError (untokenize toks) e
+    Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 -- block elements:
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
index 655823dab..af97125c6 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs
@@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do
       runParserT (mconcat <$> many inline) st "bracketed option" toks
     case parsed of
       Right result -> return result
-      Left e       -> throwError $ PandocParsecError (untokenize toks) e
+      Left e       -> throwError $ PandocParsecError (toSources toks) e
 
 
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index db58b333d..35ce3509d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , rawLaTeXParser
   , applyMacros
   , tokenize
+  , tokenizeSources
   , untokenize
   , untoken
   , totoks
@@ -248,7 +249,7 @@ withVerbatimMode parser = do
 
 rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => [Tok] -> Bool -> LP m a -> LP m a
-               -> ParserT Text s m (a, Text)
+               -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                _ <- takeP (T.length (untokenize toks'))
+                void $ count (T.length (untokenize toks')) anyChar
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do
                 return (val, result')
 
 applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
-            => Text -> ParserT Text s m Text
+            => Text -> ParserT Sources s m Text
 applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
    do let retokenize = untokenize <$> many (satisfyTok (const True))
       pstate <- getState
@@ -301,6 +302,11 @@ QuickCheck property:
 >   let t = T.pack s in untokenize (tokenize "random" t) == t
 -}
 
+tokenizeSources :: Sources -> [Tok]
+tokenizeSources = concatMap tokenizeSource . unSources
+ where
+   tokenizeSource (pos, t) = totoks pos t
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs
index f8c214318..c20b72bc5 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleInstances #-}
 {- |
    Module      : Text.Pandoc.Readers.LaTeX.Types
    Copyright   : Copyright (C) 2017-2021 John MacFarlane
@@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..)
                                        )
 where
 import Data.Text (Text)
-import Text.Parsec.Pos (SourcePos)
+import Text.Parsec.Pos (SourcePos, sourceName)
+import Text.Pandoc.Sources
+import Data.List (groupBy)
 
 data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
                Esc1    | Esc2   | Arg Int
@@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment |
 data Tok = Tok SourcePos TokType Text
      deriving (Eq, Ord, Show)
 
+instance ToSources [Tok] where
+  toSources = Sources
+    . map (\ts -> case ts of
+                    Tok p _ _ : _ -> (p, mconcat $ map tokToText ts)
+                    _ -> error "toSources [Tok] encountered empty group")
+    . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2)
+
+tokToText :: Tok -> Text
+tokToText (Tok _ _ t) = t
+
 data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed
      deriving (Eq, Ord, Show)
 
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21b8feaab..1141af66f 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void)
 import Control.Monad.Trans (lift)
 import Control.Monad.Except (throwError)
 import Data.Maybe (catMaybes, isJust)
-import Data.List (intersperse, intercalate)
+import Data.List (intersperse)
 import qualified Data.Text as T
 import Text.Pandoc.Builder as B
 import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report)
@@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Walk (query)
-import Text.Pandoc.Shared (crFilter, mapLeft)
+import Text.Pandoc.Shared (mapLeft)
 import Text.Pandoc.Readers.Roff  -- TODO explicit imports
-import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString)
 import qualified Data.Foldable as Foldable
@@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
-readMan opts txt = do
-  tokenz <- lexRoff (initialPos "input") (crFilter txt)
+readMan :: (PandocMonad m, ToSources a)
+        => ReaderOptions
+        -> a
+        -> m Pandoc
+readMan opts s = do
+  let Sources inps = toSources s
+  tokenz <- mconcat <$> mapM (uncurry lexRoff) inps
   let state = def {readerOptions = opts} :: ManState
+  let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e
+      fixError e = e
   eitherdoc <- readWithMTokens parseMan state
      (Foldable.toList . unRoffTokens $ tokenz)
-  either throwError return eitherdoc
+  either (throwError . fixError) return eitherdoc
+
 
 readWithMTokens :: PandocMonad m
         => ParserT [RoffToken] ManState m a  -- ^ parser
@@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m
         -> [RoffToken]                       -- ^ input
         -> m (Either PandocError a)
 readWithMTokens parser state input =
-  let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
+  let leftF = PandocParsecError mempty
   in mapLeft leftF `liftM` runParserT parser state "source" input
 
+
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
   bs <- many parseBlock <* eof
@@ -89,7 +96,7 @@ parseBlock = choice [ parseList
 
 parseTable :: PandocMonad m => ManParser m Blocks
 parseTable = do
-  modifyState $ \st -> st { tableCellsPlain = True }
+  updateState $ \st -> st { tableCellsPlain = True }
   let isTbl Tbl{} = True
       isTbl _     = False
   Tbl _opts rows pos <- msatisfy isTbl
@@ -135,7 +142,7 @@ parseTable = do
         case res' of
           Left _  -> Prelude.fail "Could not parse table cell"
           Right x -> do
-            modifyState $ \s -> s{ tableCellsPlain = False }
+            updateState $ \s -> s{ tableCellsPlain = False }
             return x
       Right x -> return x
 
@@ -222,7 +229,7 @@ parseTitle = do
                       setMeta "section" (linePartsToInlines y)
          [x]       -> setMeta "title" (linePartsToInlines x)
          []        -> id
-  modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
+  updateState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
 linePartsToInlines :: [LinePart] -> Inlines
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index ba8ed147e..69dd51bc4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
 import Text.Pandoc.Shared
 import Text.Pandoc.XML (fromEntities)
 import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock)
+-- import Debug.Trace (traceShowId)
 
-type MarkdownParser m = ParserT Text ParserState m
+type MarkdownParser m = ParserT Sources ParserState m
 
 type F = Future ParserState
 
 -- | Read markdown from an input string and return a Pandoc document.
-readMarkdown :: PandocMonad m
+readMarkdown :: (PandocMonad m, ToSources a)
              => ReaderOptions -- ^ Reader options
-             -> Text      -- ^ String to parse (assuming @'\n'@ line endings)
+             -> a             -- ^ Input
              -> m Pandoc
 readMarkdown opts s = do
   parsed <- readWithM parseMarkdown def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+                (ensureFinalNewlines 3 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do
         meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr
         setPosition oldPos
         return $ runF meta defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
         setPosition oldPos
         return $ runF refs defaultParserState
-  parsed <- readWithM parser def{ stateOptions = opts } ""
+  parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -146,14 +147,14 @@ inList = do
   ctx <- stateParserContext <$> getState
   guard (ctx == ListItemState)
 
-spnl :: PandocMonad m => ParserT Text st m ()
+spnl :: PandocMonad m => ParserT Sources st m ()
 spnl = try $ do
   skipSpaces
   optional newline
   skipSpaces
   notFollowedBy (char '\n')
 
-spnl' :: PandocMonad m => ParserT Text st m Text
+spnl' :: PandocMonad m => ParserT Sources st m Text
 spnl' = try $ do
   xs <- many spaceChar
   ys <- option "" $ try $ (:) <$> newline
@@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _)
 -- hrule block
 --
 
-hrule :: PandocMonad m => ParserT Text st m (F Blocks)
+hrule :: PandocMonad m => ParserT Sources st m (F Blocks)
 hrule = try $ do
   skipSpaces
   start <- satisfy isHruleChar
@@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline
 blockDelimiter :: PandocMonad m
                => (Char -> Bool)
                -> Maybe Int
-               -> ParserT Text ParserState m Int
+               -> ParserT Sources ParserState m Int
 blockDelimiter f len = try $ do
   skipNonindentSpaces
   c <- lookAhead (satisfy f)
@@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do
   blanklines
   return $ T.intercalate "\n" lns'
 
-birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
+birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text
 birdTrackLine c = try $ do
   char c
   -- allow html tags on left margin:
@@ -1025,7 +1026,7 @@ para = try $ do
   option (B.plain <$> result)
     $ try $ do
             newline
-            (blanklines >> return mempty)
+            (mempty <$ blanklines)
               <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
               <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
               <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
@@ -1170,7 +1171,7 @@ lineBlock = do
 -- and the length including trailing space.
 dashedLine :: PandocMonad m
            => Char
-           -> ParserT Text st m (Int, Int)
+           -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many spaceChar
@@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m
              -> MarkdownParser m [Text]
 rawTableLine indices = do
   notFollowedBy' (blanklines' <|> tableFooter)
-  line <- take1WhileP (/='\n') <* newline
+  line <- anyLine
   return $ map trim $ tail $
            splitTextByIndices (init indices) line
 
@@ -1390,7 +1391,7 @@ pipeTableCell =
       return $ B.plain <$> result)
     <|> return mempty
 
-pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int)
 pipeTableHeaderPart = try $ do
   skipMany spaceChar
   left <- optionMaybe (char ':')
@@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do
       (Just _,Just _)   -> AlignCenter, len)
 
 -- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT Text st m ()
+scanForPipe :: PandocMonad m => ParserT Sources st m ()
 scanForPipe = do
-  inp <- getInput
-  case T.break (\c -> c == '\n' || c == '|') inp of
+  Sources inps <- getInput
+  let ln = case inps of
+             [] -> ""
+             ((_,t):(_,t'):_) | T.null t -> t'
+             ((_,t):_) -> t
+  case T.break (\c -> c == '\n' || c == '|') ln of
        (_, T.uncons -> Just ('|', _)) -> return ()
        _                              -> mzero
 
@@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
   where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
         regsp = skipMany spaceChar >> return B.space
 
-nonEndline :: PandocMonad m => ParserT Text st m Char
+nonEndline :: PandocMonad m => ParserT Sources st m Char
 nonEndline = satisfy (/='\n')
 
 str :: PandocMonad m => MarkdownParser m (F Inlines)
 str = do
   result <- mconcat <$> many1
-             ( take1WhileP isAlphaNum
+             ( T.pack <$> (many1 alphaNum)
               <|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
   updateLastStrPos
   (do guardEnabled Ext_smart
@@ -1962,7 +1967,7 @@ rawLaTeXInline' = do
     s <- rawLaTeXInline
     return $ return $ B.rawInline "tex" s -- "tex" because it might be context
 
-rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
+rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text
 rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
@@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do
                        (try $ string "\\stop" >> textStr completion)
   return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
 
-inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
+inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text
 inBrackets parser = do
   char '['
   contents <- manyChar parser
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 9f4d5e170..825e4a2eb 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,17 +36,18 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
+import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
                            trim, splitTextBy, tshow)
 import Text.Pandoc.Walk (walk)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read mediawiki from an input string and return a Pandoc document.
-readMediaWiki :: PandocMonad m
-              => ReaderOptions -- ^ Reader options
-              -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+readMediaWiki :: (PandocMonad m, ToSources a)
+              => ReaderOptions
+              -> a
               -> m Pandoc
 readMediaWiki opts s = do
+  let sources = toSources s
   parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
                                             , mwMaxNestingLevel = 4
                                             , mwNextLinkNumber  = 1
@@ -55,7 +56,7 @@ readMediaWiki opts s = do
                                             , mwLogMessages = []
                                             , mwInTT = False
                                             }
-            (crFilter s <> "\n")
+            sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
@@ -69,7 +70,7 @@ data MWState = MWState { mwOptions         :: ReaderOptions
                        , mwInTT            :: Bool
                        }
 
-type MWParser m = ParserT Text MWState m
+type MWParser m = ParserT Sources MWState m
 
 instance HasReaderOptions MWState where
   extractReaderOptions = mwOptions
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index cb141cba5..bbcfe62ea 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL
 import qualified Text.Pandoc.UTF8 as UTF8
 
 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> BL.ByteString
-             -> ParserT Text st m (Future st Meta)
+             -> ParserT Sources st m (Future st Meta)
 yamlBsToMeta pMetaValue bstr = do
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc (YAML.Mapping _ _ o):_)
@@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing
 
 -- Returns filtered list of references.
 yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st)
-             => ParserT Text st m (Future st MetaValue)
+             => ParserT Sources st m (Future st MetaValue)
              -> (Text -> Bool) -- ^ Filter for id
              -> BL.ByteString
-             -> ParserT Text st m (Future st [MetaValue])
+             -> ParserT Sources st m (Future st [MetaValue])
 yamlBsToRefs pMetaValue idpred bstr =
   case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
        Right (YAML.Doc o@YAML.Mapping{}:_)
@@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t
 nodeToKey _                                   = Nothing
 
 normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                   => ParserT Text st m (Future st MetaValue)
+                   => ParserT Sources st m (Future st MetaValue)
                    -> Text
-                   -> ParserT Text st m (Future st MetaValue)
+                   -> ParserT Sources st m (Future st MetaValue)
 normalizeMetaValue pMetaValue x =
    -- Note: a standard quoted or unquoted YAML value will
    -- not end in a newline, but a "block" set off with
@@ -133,9 +133,9 @@ checkBoolean t
   | otherwise = Nothing
 
 yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st)
-                => ParserT Text st m (Future st MetaValue)
+                => ParserT Sources st m (Future st MetaValue)
                 -> YAML.Node YE.Pos
-                -> ParserT Text st m (Future st MetaValue)
+                -> ParserT Sources st m (Future st MetaValue)
 yamlToMetaValue pMetaValue (YAML.Scalar _ x) =
   case x of
        YAML.SStr t       -> normalizeMetaValue pMetaValue t
@@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) =
 yamlToMetaValue _ _ = return $ return $ MetaString ""
 
 yamlMap :: (PandocMonad m, HasLastStrPosition st)
-        => ParserT Text st m (Future st MetaValue)
+        => ParserT Sources st m (Future st MetaValue)
         -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
-        -> ParserT Text st m (Future st (M.Map Text MetaValue))
+        -> ParserT Sources st m (Future st (M.Map Text MetaValue))
 yamlMap pMetaValue o = do
     kvs <- forM (M.toList o) $ \(key, v) -> do
              k <- maybe (throwError $ PandocParseError
@@ -177,8 +177,8 @@ yamlMap pMetaValue o = do
 
 -- | Parse a YAML metadata block using the supplied 'MetaValue' parser.
 yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m)
-              => ParserT Text st m (Future st MetaValue)
-              -> ParserT Text st m (Future st Meta)
+              => ParserT Sources st m (Future st MetaValue)
+              -> ParserT Sources st m (Future st Meta)
 yamlMetaBlock parser = try $ do
   string "---"
   blankline
@@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do
   optional blanklines
   yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
 
-stopLine :: Monad m => ParserT Text st m ()
+stopLine :: Monad m => ParserT Sources st m ()
 stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index 698bfd3d7..a0d4534f1 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (crFilter, trimr, tshow)
+import Text.Pandoc.Shared (trimr, tshow)
 
 -- | Read Muse from an input string and return a Pandoc document.
-readMuse :: PandocMonad m
+readMuse :: (PandocMonad m, ToSources a)
          => ReaderOptions
-         -> Text
+         -> a
          -> m Pandoc
 readMuse opts s = do
-  let input = crFilter s
-  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
+  let sources = toSources s
+  res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts }
+              (initialSourceName sources) sources
   case res of
-       Left e  -> throwError $ PandocParsecError input e
+       Left e  -> throwError $ PandocParsecError sources e
        Right d -> return d
 
 type F = Future MuseState
@@ -82,7 +83,7 @@ instance Default MuseEnv where
                 , museInPara = False
                 }
 
-type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m)
+type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m)
 
 instance HasReaderOptions MuseState where
   extractReaderOptions = museOptions
@@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1)
 -- * Parsers
 
 -- | Parse end-of-line, which can be either a newline or end-of-file.
-eol :: Stream s m Char => ParserT s st m ()
+eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m ()
 eol = void newline <|> eof
 
 getIndent :: PandocMonad m
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9c8bc0374..58f235e81 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -21,6 +21,7 @@ import Control.Monad.Except (throwError)
 import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Error
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 
 -- | Read native formatted text and return a Pandoc document.
 -- The input may be a full pandoc document, a block list, a block,
@@ -32,14 +33,15 @@ import Text.Pandoc.Error
 --
 -- > Pandoc nullMeta [Plain [Str "hi"]]
 --
-readNative :: PandocMonad m
+readNative :: (PandocMonad m, ToSources a)
            => ReaderOptions
-           -> Text       -- ^ String to parse (assuming @'\n'@ line endings)
+           -> a
            -> m Pandoc
 readNative _ s =
-  case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
-    Right doc -> return doc
-    Left _    -> throwError $ PandocParseError "couldn't read native"
+  let t = sourcesToText . toSources $ s
+  in  case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of
+        Right doc -> return doc
+        Left _    -> throwError $ PandocParseError "couldn't read native"
 
 readBlocks :: Text -> Either PandocError [Block]
 readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5f2ddb876..668c9ca11 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -24,7 +24,8 @@ import Text.Pandoc.Options
 import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
-import Text.Pandoc.Shared (crFilter, blocksToInlines')
+import Text.Pandoc.Shared (blocksToInlines')
+import Text.Pandoc.Sources (ToSources(..), sourcesToText)
 import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
@@ -46,10 +47,14 @@ instance Default OPMLState where
                  , opmlOptions = def
                  }
 
-readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readOPML :: (PandocMonad m, ToSources a)
+         => ReaderOptions
+         -> a
+         -> m Pandoc
 readOPML opts inp  = do
-  (bs, st') <- runStateT
-                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+  let sources = toSources inp
+  (bs, st') <-
+    runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of
                      Left msg -> throwError $ PandocXMLError "" msg
                      Right ns -> mapM parseBlock ns)
                  def{ opmlOptions = opts }
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index afeb27a87..8823befdd 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing (reportLogMessages)
-import Text.Pandoc.Shared (crFilter)
-
+import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines)
 import Control.Monad.Except (throwError)
 import Control.Monad.Reader (runReaderT)
 
-import Data.Text (Text)
-
 -- | Parse org-mode string and return a Pandoc document.
-readOrg :: PandocMonad m
+readOrg :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readOrg opts s = do
   parsed <- flip runReaderT def $
             readWithM parseOrg (optionsToParserState opts)
-            (crFilter s <> "\n\n")
+            (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left  e      -> throwError e
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 519a6ce04..054f2611a 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -29,6 +29,7 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
 import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
+import Text.Pandoc.Sources (ToSources(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
 import Control.Monad (guard, mplus, mzero, unless, void, when)
@@ -802,7 +803,7 @@ inlineLaTeX = try $ do
    parseAsInlineLaTeX :: PandocMonad m
                       => Text -> TeXExport -> OrgParser m (Maybe Inlines)
    parseAsInlineLaTeX cs = \case
-     TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs
+     TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs)
      TeXIgnore -> return (Just mempty)
      TeXVerbatim -> return (Just $ B.str cs)
 
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 6ed24a602..c7ea02815 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -114,7 +114,7 @@ import Control.Monad (guard)
 import Control.Monad.Reader (ReaderT)
 
 -- | The parser used to read org files.
-type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m)
 
 --
 -- Adaptions and specializations of parsing utilities
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index ac4c0b6cb..a3fcf028c 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,25 +38,24 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.Printf (printf)
 import Data.Time.Format
 
 -- TODO:
 -- [ ] .. parsed-literal
 
 -- | Parse reStructuredText string and return Pandoc document.
-readRST :: PandocMonad m
+readRST :: (PandocMonad m, ToSources a)
         => ReaderOptions -- ^ Reader options
-        -> Text          -- ^ Text to parse (assuming @'\n'@ line endings)
+        -> a
         -> m Pandoc
 readRST opts s = do
   parsed <- readWithM parseRST def{ stateOptions = opts }
-               (crFilter s <> "\n\n")
+               (ensureFinalNewlines 2 (toSources s))
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type RSTParser m = ParserT Text ParserState m
+type RSTParser m = ParserT Sources ParserState m
 
 --
 -- Constants and data structure definitions
@@ -151,11 +150,19 @@ parseRST = do
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys were...
-  docMinusKeys <- T.concat <$>
-                  manyTill (referenceKey <|> anchorDef <|>
-                            noteBlock <|> citationBlock <|>
-                            (snd <$> withRaw comment) <|>
-                            headerBlock <|> lineClump) eof
+  let chunk = referenceKey
+              <|> anchorDef
+              <|> noteBlock
+              <|> citationBlock
+              <|> (snd <$> withRaw comment)
+              <|> headerBlock
+              <|> lineClump
+  docMinusKeys <- Sources <$>
+                  manyTill (do pos <- getPosition
+                               t <- chunk
+                               return (pos, t)) eof
+  -- UGLY: we collapse source position information.
+  -- TODO: fix the parser to use the F monad instead of two passes
   setInput docMinusKeys
   setPosition startPos
   st' <- getState
@@ -348,7 +355,7 @@ singleHeader' = try $ do
 -- hrule block
 --
 
-hrule :: Monad m => ParserT Text st m Blocks
+hrule :: Monad m => ParserT Sources st m Blocks
 hrule = try $ do
   chr <- oneOf underlineChars
   count 3 (char chr)
@@ -363,7 +370,7 @@ hrule = try $ do
 
 -- read a line indented by a given string
 indentedLine :: (HasReaderOptions st, Monad m)
-             => Int -> ParserT Text st m Text
+             => Int -> ParserT Sources st m Text
 indentedLine indents = try $ do
   lookAhead spaceChar
   gobbleAtMostSpaces indents
@@ -372,7 +379,7 @@ indentedLine indents = try $ do
 -- one or more indented lines, possibly separated by blank lines.
 -- any amount of indentation will work.
 indentedBlock :: (HasReaderOptions st, Monad m)
-              => ParserT Text st m Text
+              => ParserT Sources st m Text
 indentedBlock = try $ do
   indents <- length <$> lookAhead (many1 spaceChar)
   lns <- many1 $ try $ do b <- option "" blanklines
@@ -381,20 +388,20 @@ indentedBlock = try $ do
   optional blanklines
   return $ T.unlines lns
 
-quotedBlock :: Monad m => ParserT Text st m Text
+quotedBlock :: Monad m => ParserT Sources st m Text
 quotedBlock = try $ do
     quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
     lns <- many1 $ lookAhead (char quote) >> anyLine
     optional blanklines
     return $ T.unlines lns
 
-codeBlockStart :: Monad m => ParserT Text st m Char
+codeBlockStart :: Monad m => ParserT Sources st m Char
 codeBlockStart = string "::" >> blankline >> blankline
 
-codeBlock :: Monad m => ParserT Text ParserState m Blocks
+codeBlock :: Monad m => ParserT Sources ParserState m Blocks
 codeBlock = try $ codeBlockStart >> codeBlockBody
 
-codeBlockBody :: Monad m => ParserT Text ParserState m Blocks
+codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks
 codeBlockBody = do
   lang <- stateRstHighlight <$> getState
   try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$>
@@ -410,14 +417,14 @@ lhsCodeBlock = try $ do
   return $ B.codeBlockWith ("", ["haskell","literate"], [])
          $ T.intercalate "\n" lns
 
-latexCodeBlock :: Monad m => ParserT Text st m [Text]
+latexCodeBlock :: Monad m => ParserT Sources st m [Text]
 latexCodeBlock = try $ do
   try (latexBlockLine "\\begin{code}")
   many1Till anyLine (try $ latexBlockLine "\\end{code}")
  where
   latexBlockLine s = skipMany spaceChar >> string s >> blankline
 
-birdCodeBlock :: Monad m => ParserT Text st m [Text]
+birdCodeBlock :: Monad m => ParserT Sources st m [Text]
 birdCodeBlock = filterSpace <$> many1 birdTrackLine
   where filterSpace lns =
             -- if (as is normal) there is always a space after >, drop it
@@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine
                then map (T.drop 1) lns
                else lns
 
-birdTrackLine :: Monad m => ParserT Text st m Text
+birdTrackLine :: Monad m => ParserT Sources st m Text
 birdTrackLine = char '>' >> anyLine
 
 --
@@ -456,7 +463,6 @@ includeDirective top fields body = do
   let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
   let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
   oldPos <- getPosition
-  oldInput <- getInput
   containers <- stateContainers <$> getState
   when (f `elem` containers) $
     throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
@@ -494,15 +500,11 @@ includeDirective top fields body = do
        Nothing   -> case lookup "literal" fields of
                          Just _  -> return $ B.rawBlock "rst" contents'
                          Nothing -> do
-                           setPosition $ newPos (T.unpack f) 1 1
-                           setInput $ contents' <> "\n"
-                           bs <- optional blanklines >>
-                                  (mconcat <$> many block)
-                           setInput oldInput
-                           setPosition oldPos
+                           addToSources (initialPos (T.unpack f))
+                              (contents' <> "\n")
                            updateState $ \s -> s{ stateContainers =
                                          tail $ stateContainers s }
-                           return bs
+                           return mempty
 
 
 --
@@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks
 definitionList = B.definitionList <$> many1 definitionListItem
 
 -- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT Text st m Int
+bulletListStart :: Monad m => ParserT Sources st m Int
 bulletListStart = try $ do
   notFollowedBy' hrule  -- because hrules start out just like lists
   marker <- oneOf bulletListMarkers
@@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do
 -- plus isolated (no two adjacent) internal hyphens, underscores,
 -- periods, colons and plus signs; no whitespace or other characters
 -- are allowed.
-simpleReferenceName :: Monad m => ParserT Text st m Text
+simpleReferenceName :: Monad m => ParserT Sources st m Text
 simpleReferenceName = do
   x <- alphaNum
   xs <- many $  alphaNum
@@ -1122,7 +1124,7 @@ referenceKey = do
   -- return enough blanks to replace key
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
-targetURI :: Monad m => ParserT Text st m Text
+targetURI :: Monad m => ParserT Sources st m Text
 targetURI = do
   skipSpaces
   optional $ try $ newline >> notFollowedBy blankline
@@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m ()
 anonymousKey = try $ do
   oneOfStrings [".. __:", "__"]
   src <- targetURI
-  pos <- getPosition
-  let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
+  -- we need to ensure that the keys are ordered by occurrence in
+  -- the document.
+  numKeys <- M.size . stateKeys <$> getState
+  let key = toKey $ "_" <> T.pack (show numKeys)
   updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
                           stateKeys s }
 
@@ -1250,13 +1254,13 @@ headerBlock = do
 -- Grid tables TODO:
 --  - column spans
 
-dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
   return (length dashes, length $ dashes ++ sp)
 
-simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)]
 simpleDashedLines ch = try $ many1 (dashedLine ch)
 
 -- Parse a table row separator
@@ -1382,7 +1386,7 @@ hyphens = do
   -- don't want to treat endline after hyphen or dash as a space
   return $ B.str result
 
-escapedChar :: Monad m => ParserT Text st m Inlines
+escapedChar :: Monad m => ParserT Sources st m Inlines
 escapedChar = do c <- escaped anyChar
                  return $ if c == ' ' || c == '\n' || c == '\r'
                              -- '\ ' is null in RST
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 509ce1377..47f16ef4b 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared (safeRead)
-import Text.Parsec hiding (tokenPrim)
 import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
 import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
@@ -122,16 +121,16 @@ instance Default RoffState where
                   , afterConditional = False
                   }
 
-type RoffLexer m = ParserT T.Text RoffState m
+type RoffLexer m = ParserT Sources RoffState m
 
 --
 -- Lexer: T.Text -> RoffToken
 --
 
-eofline :: Stream s m Char => ParsecT s u m ()
+eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m ()
 eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
 
-spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char
 spacetab = char ' ' <|> char '\t'
 
 characterCodeMap :: M.Map T.Text Char
@@ -303,8 +302,7 @@ expandString = try $ do
   char '*'
   cs <- escapeArg <|> countChar 1 anyChar
   s <- linePartsToText <$> resolveText cs pos
-  getInput >>= setInput . (s <>)
-  return ()
+  addToInput s
 
 -- Parses: '..'
 quoteArg :: PandocMonad m => RoffLexer m T.Text
@@ -316,7 +314,7 @@ escFont = do
   font' <- if T.null font || font == "P"
               then prevFont <$> getState
               else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
-  modifyState $ \st -> st{ prevFont = currentFont st
+  updateState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font' }
   return [Font font']
   where
@@ -372,8 +370,8 @@ lexTable pos = do
   spaces
   opts <- try tableOptions <|> [] <$ optional (char ';')
   case lookup "tab" opts of
-    Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
-    _                              -> modifyState $ \st -> st{ tableTabChar = '\t' }
+    Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c }
+    _                              -> updateState $ \st -> st{ tableTabChar = '\t' }
   spaces
   skipMany lexComment
   spaces
@@ -489,18 +487,18 @@ lexConditional mname = do
   ifPart <- do
       optional $ try $ char '\\' >> newline
       lexGroup
-       <|> do modifyState $ \s -> s{ afterConditional = True }
+       <|> do updateState $ \s -> s{ afterConditional = True }
               t <- manToken
-              modifyState $ \s -> s{ afterConditional = False }
+              updateState $ \s -> s{ afterConditional = False }
               return t
   case mbtest of
     Nothing    -> do
-      putState st  -- reset state, so we don't record macros in skipped section
+      setState st  -- reset state, so we don't record macros in skipped section
       report $ SkippedContent (T.cons '.' mname) pos
       return mempty
     Just True  -> return ifPart
     Just False -> do
-      putState st
+      setState st
       return mempty
 
 expression :: PandocMonad m => RoffLexer m (Maybe Bool)
@@ -515,7 +513,7 @@ expression = do
       _    -> Nothing
   where
     returnValue v = do
-      modifyState $ \st -> st{ lastExpression = v }
+      updateState $ \st -> st{ lastExpression = v }
       return v
 
 lexGroup :: PandocMonad m => RoffLexer m RoffTokens
@@ -536,7 +534,7 @@ lexIncludeFile args = do
       result <- readFileFromDirs dirs $ T.unpack fp
       case result of
         Nothing  -> report $ CouldNotLoadIncludeFile fp pos
-        Just s   -> getInput >>= setInput . (s <>)
+        Just s   -> addToInput s
       return mempty
     []    -> return mempty
 
@@ -564,13 +562,13 @@ lexStringDef args = do -- string definition
      (x:ys) -> do
        let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
        let stringName = linePartsToText x
-       modifyState $ \st ->
+       updateState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
    return mempty
 
 lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
 lexMacroDef args = do -- macro definition
-   modifyState $ \st -> st{ roffMode = CopyMode }
+   updateState $ \st -> st{ roffMode = CopyMode }
    (macroName, stopMacro) <-
      case args of
        (x : y : _) -> return (linePartsToText x, linePartsToText y)
@@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition
          _ <- lexArgs
          return ()
    ts <- mconcat <$> manyTill manToken stop
-   modifyState $ \st ->
+   updateState $ \st ->
      st{ customMacros = M.insert macroName ts (customMacros st)
        , roffMode = NormalMode }
    return mempty
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index c4d7bcc93..276d28aaa 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -28,22 +28,22 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter, tshow)
+import Text.Pandoc.Shared (tshow)
 import Text.Pandoc.XML (fromEntities)
 
 -- | Read twiki from an input string and return a Pandoc document.
-readTWiki :: PandocMonad m
+readTWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTWiki opts s = do
-  res <- readWithM parseTWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TWParser = ParserT Text ParserState
+type TWParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 8d7900de4..981878206 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -53,30 +53,34 @@ import Text.Pandoc.Options
 import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
 import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, tshow)
+import Text.Pandoc.Shared (trim, tshow)
 
 -- | Parse a Textile text and return a Pandoc document.
-readTextile :: PandocMonad m
+readTextile :: (PandocMonad m, ToSources a)
             => ReaderOptions -- ^ Reader options
-            -> Text          -- ^ String to parse (assuming @'\n'@ line endings)
+            -> a
             -> m Pandoc
 readTextile opts s = do
-  parsed <- readWithM parseTextile def{ stateOptions = opts }
-                (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  parsed <- readWithM parseTextile def{ stateOptions = opts } sources
   case parsed of
      Right result -> return result
      Left e       -> throwError e
 
+type TextileParser = ParserT Sources ParserState
 
 -- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
+parseTextile :: PandocMonad m => TextileParser m Pandoc
 parseTextile = do
   many blankline
   startPos <- getPosition
   -- go through once just to get list of reference keys and notes
   -- docMinusKeys is the raw document with blanks where the keys/notes were...
-  let firstPassParser = noteBlock <|> lineClump
-  manyTill firstPassParser eof >>= setInput . T.concat
+  let firstPassParser = do
+        pos <- getPosition
+        t <- noteBlock <|> lineClump
+        return (pos, t)
+  manyTill firstPassParser eof >>= setInput . Sources
   setPosition startPos
   st' <- getState
   let reversedNotes = stateNotes st'
@@ -84,10 +88,10 @@ parseTextile = do
   -- now parse it for real...
   Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME
 
-noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker :: PandocMonad m => TextileParser m Text
 noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
 
-noteBlock :: PandocMonad m => ParserT Text ParserState m Text
+noteBlock :: PandocMonad m => TextileParser m Text
 noteBlock = try $ do
   startPos <- getPosition
   ref <- noteMarker
@@ -102,11 +106,11 @@ noteBlock = try $ do
   return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
 
 -- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
+parseBlocks :: PandocMonad m => TextileParser m Blocks
 parseBlocks = mconcat <$> manyTill block eof
 
 -- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
+blockParsers :: PandocMonad m => [TextileParser m Blocks]
 blockParsers = [ codeBlock
                , header
                , blockQuote
@@ -121,22 +125,22 @@ blockParsers = [ codeBlock
                ]
 
 -- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT Text ParserState m Blocks
+block :: PandocMonad m => TextileParser m Blocks
 block = do
   res <- choice blockParsers <?> "block"
   trace (T.take 60 $ tshow $ B.toList res)
   return res
 
-commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+commentBlock :: PandocMonad m => TextileParser m Blocks
 commentBlock = try $ do
   string "###."
   manyTill anyLine blanklines
   return mempty
 
-codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlock :: PandocMonad m => TextileParser m Blocks
 codeBlock = codeBlockTextile <|> codeBlockHtml
 
-codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockTextile :: PandocMonad m => TextileParser m Blocks
 codeBlockTextile = try $ do
   string "bc." <|> string "pre."
   extended <- option False (True <$ char '.')
@@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text
 trimTrailingNewlines = T.dropWhileEnd (=='\n')
 
 -- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks
+codeBlockHtml :: PandocMonad m => TextileParser m Blocks
 codeBlockHtml = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
   result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
@@ -174,7 +178,7 @@ codeBlockHtml = try $ do
   return $ B.codeBlockWith (ident,classes,kvs) result'''
 
 -- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT Text ParserState m Blocks
+header :: PandocMonad m => TextileParser m Blocks
 header = try $ do
   char 'h'
   level <- digitToInt <$> oneOf "123456"
@@ -186,14 +190,14 @@ header = try $ do
   return $ B.headerWith attr' level name
 
 -- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
+blockQuote :: PandocMonad m => TextileParser m Blocks
 blockQuote = try $ do
   string "bq" >> attributes >> char '.' >> whitespace
   B.blockQuote <$> para
 
 -- Horizontal rule
 
-hrule :: PandocMonad m => ParserT Text st m Blocks
+hrule :: PandocMonad m => TextileParser m Blocks
 hrule = try $ do
   skipSpaces
   start <- oneOf "-*"
@@ -208,39 +212,39 @@ hrule = try $ do
 -- | Can be a bullet list or an ordered list. This implementation is
 -- strict in the nesting, sublist must start at exactly "parent depth
 -- plus one"
-anyList :: PandocMonad m => ParserT Text ParserState m Blocks
+anyList :: PandocMonad m => TextileParser m Blocks
 anyList = try $ anyListAtDepth 1 <* blanklines
 
 -- | This allow one type of list to be nested into an other type,
 -- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                 orderedListAtDepth depth,
                                 definitionList ]
 
 -- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
 
 -- | Bullet List Item of given depth, depth being the number of
 -- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 bulletListItemAtDepth = genericListItemAtDepth '*'
 
 -- | Ordered List of given depth, depth being the number of
 -- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListAtDepth depth = try $ do
   items <- many1 (orderedListItemAtDepth depth)
   return $ B.orderedList items
 
 -- | Ordered List Item of given depth, depth being the number of
 -- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks
 orderedListItemAtDepth = genericListItemAtDepth '#'
 
 -- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks
 genericListItemAtDepth c depth = try $ do
   count depth (char c) >> attributes >> whitespace
   contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do
   return $ contents <> sublist
 
 -- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
+definitionList :: PandocMonad m => TextileParser m Blocks
 definitionList = try $ B.definitionList <$> many1 definitionListItem
 
 -- | List start character.
-listStart :: PandocMonad m => ParserT Text ParserState m ()
+listStart :: PandocMonad m => TextileParser m ()
 listStart = genericListStart '*'
         <|> () <$ genericListStart '#'
         <|> () <$ definitionListStart
 
-genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
+genericListStart :: PandocMonad m => Char -> TextileParser m ()
 genericListStart c = () <$ try (many1 (char c) >> whitespace)
 
-basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
+basicDLStart :: PandocMonad m => TextileParser m ()
 basicDLStart = do
   char '-'
   whitespace
   notFollowedBy newline
 
-definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
+definitionListStart :: PandocMonad m => TextileParser m Inlines
 definitionListStart = try $ do
   basicDLStart
   trimInlines . mconcat <$>
@@ -281,15 +285,15 @@ definitionListStart = try $ do
 -- the term defined, then spaces and ":=". The definition follows, on
 -- the same single line, or spaned on multiple line, after a line
 -- break.
-definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks])
 definitionListItem = try $ do
   term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart
   def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
   return (term, def')
-  where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+  where inlineDef :: PandocMonad m => TextileParser m [Blocks]
         inlineDef = liftM (\d -> [B.plain d])
                     $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
-        multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
+        multilineDef :: PandocMonad m => TextileParser m [Blocks]
         multilineDef = try $ do
           optional whitespace >> newline
           s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
@@ -300,7 +304,7 @@ definitionListItem = try $ do
 -- raw content
 
 -- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => TextileParser m Blocks
 rawHtmlBlock = try $ do
   skipMany spaceChar
   (_,b) <- htmlTag isBlockTag
@@ -308,14 +312,14 @@ rawHtmlBlock = try $ do
   return $ B.rawBlock "html" b
 
 -- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks
 rawLaTeXBlock' = do
   guardEnabled Ext_raw_tex
   B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
 
 
 -- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT Text ParserState m Blocks
+para :: PandocMonad m => TextileParser m Blocks
 para = B.para . trimInlines . mconcat <$> many1 inline
 
 -- Tables
@@ -326,7 +330,7 @@ toAlignment '>' = AlignRight
 toAlignment '=' = AlignCenter
 toAlignment _   = AlignDefault
 
-cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment)
 cellAttributes = try $ do
   isHeader <- option False (True <$ char '_')
   -- we just ignore colspan and rowspan markers:
@@ -339,7 +343,7 @@ cellAttributes = try $ do
   return (isHeader, alignment)
 
 -- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks)
 tableCell = try $ do
   char '|'
   (isHeader, alignment) <- option (False, AlignDefault) cellAttributes
@@ -350,7 +354,7 @@ tableCell = try $ do
   return ((isHeader, alignment), B.plain content)
 
 -- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)]
 tableRow = try $ do
   -- skip optional row attributes
   optional $ try $ do
@@ -360,7 +364,7 @@ tableRow = try $ do
   many1 tableCell <* char '|' <* blankline
 
 -- | A table with an optional header.
-table :: PandocMonad m => ParserT Text ParserState m Blocks
+table :: PandocMonad m => TextileParser m Blocks
 table = try $ do
   -- ignore table attributes
   caption <- option mempty $ try $ do
@@ -388,7 +392,7 @@ table = try $ do
     (TableFoot nullAttr [])
 
 -- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
+ignorableRow :: PandocMonad m => TextileParser m ()
 ignorableRow = try $ do
   char '|'
   oneOf ":^-~"
@@ -397,7 +401,7 @@ ignorableRow = try $ do
   _ <- anyLine
   return ()
 
-explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> TextileParser m ()
 explicitBlockStart name = try $ do
   string (T.unpack name)
   attributes
@@ -409,8 +413,8 @@ explicitBlockStart name = try $ do
 -- However, they can be used to set HTML/CSS attributes when needed.
 maybeExplicitBlock :: PandocMonad m
                    => Text  -- ^ block tag name
-                   -> ParserT Text ParserState m Blocks -- ^ implicit block
-                   -> ParserT Text ParserState m Blocks
+                   -> TextileParser m Blocks -- ^ implicit block
+                   -> TextileParser m Blocks
 maybeExplicitBlock name blk = try $ do
   optional $ explicitBlockStart name
   blk
@@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do
 
 
 -- | Any inline element
-inline :: PandocMonad m => ParserT Text ParserState m Inlines
+inline :: PandocMonad m => TextileParser m Inlines
 inline = choice inlineParsers <?> "inline"
 
 -- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
+inlineParsers :: PandocMonad m => [TextileParser m Inlines]
 inlineParsers = [ str
                 , whitespace
                 , endline
@@ -447,7 +451,7 @@ inlineParsers = [ str
                 ]
 
 -- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+inlineMarkup :: PandocMonad m => TextileParser m Inlines
 inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       , simpleInline (string "**") B.strong
                       , simpleInline (string "__") B.emph
@@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                       ]
 
 -- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT Text st m Inlines
+mark :: PandocMonad m => TextileParser m Inlines
 mark = try $ char '(' >> (try tm <|> try reg <|> copy)
 
-reg :: PandocMonad m => ParserT Text st m Inlines
+reg :: PandocMonad m => TextileParser m Inlines
 reg = do
   oneOf "Rr"
   char ')'
   return $ B.str "\174"
 
-tm :: PandocMonad m => ParserT Text st m Inlines
+tm :: PandocMonad m => TextileParser m Inlines
 tm = do
   oneOf "Tt"
   oneOf "Mm"
   char ')'
   return $ B.str "\8482"
 
-copy :: PandocMonad m => ParserT Text st m Inlines
+copy :: PandocMonad m => TextileParser m Inlines
 copy = do
   oneOf "Cc"
   char ')'
   return $ B.str "\169"
 
-note :: PandocMonad m => ParserT Text ParserState m Inlines
+note :: PandocMonad m => TextileParser m Inlines
 note = try $ do
   ref <- char '[' *> many1 digit <* char ']'
   notes <- stateNotes <$> getState
@@ -507,13 +511,13 @@ wordBoundaries :: [Char]
 wordBoundaries = markupChars <> stringBreakers
 
 -- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
+hyphenedWords :: PandocMonad m => TextileParser m Text
 hyphenedWords = do
   x <- wordChunk
   xs <-  many (try $ char '-' >> wordChunk)
   return $ T.intercalate "-" (x:xs)
 
-wordChunk :: PandocMonad m => ParserT Text ParserState m Text
+wordChunk :: PandocMonad m => TextileParser m Text
 wordChunk = try $ do
   hd <- noneOf wordBoundaries
   tl <- many ( noneOf wordBoundaries <|>
@@ -522,7 +526,7 @@ wordChunk = try $ do
   return $ T.pack $ hd:tl
 
 -- | Any string
-str :: PandocMonad m => ParserT Text ParserState m Inlines
+str :: PandocMonad m => TextileParser m Inlines
 str = do
   baseStr <- hyphenedWords
   -- RedCloth compliance : if parsed word is uppercase and immediately
@@ -535,11 +539,11 @@ str = do
   return $ B.str fullStr
 
 -- | Some number of space chars
-whitespace :: PandocMonad m => ParserT Text st m Inlines
+whitespace :: PandocMonad m => TextileParser m Inlines
 whitespace = many1 spaceChar >> return B.space <?> "whitespace"
 
 -- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT Text ParserState m Inlines
+endline :: PandocMonad m => TextileParser m Inlines
 endline = try $ do
   newline
   notFollowedBy blankline
@@ -547,18 +551,18 @@ endline = try $ do
   notFollowedBy rawHtmlBlock
   return B.linebreak
 
-rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
+rawHtmlInline :: PandocMonad m => TextileParser m Inlines
 rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
 
 -- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines
 rawLaTeXInline' = try $ do
   guardEnabled Ext_raw_tex
   B.rawInline "latex" <$> rawLaTeXInline
 
 -- | Textile standard link syntax is "label":target. But we
 -- can also have ["label":target].
-link :: PandocMonad m => ParserT Text ParserState m Inlines
+link :: PandocMonad m => TextileParser m Inlines
 link = try $ do
   bracketed <- (True <$ char '[') <|> return False
   char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -578,7 +582,7 @@ link = try $ do
               else B.spanWith attr $ B.link url "" name'
 
 -- | image embedding
-image :: PandocMonad m => ParserT Text ParserState m Inlines
+image :: PandocMonad m => TextileParser m Inlines
 image = try $ do
   char '!' >> notFollowedBy space
   (ident, cls, kvs) <- attributes
@@ -590,51 +594,51 @@ image = try $ do
   char '!'
   return $ B.imageWith attr src alt (B.str alt)
 
-escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedInline :: PandocMonad m => TextileParser m Inlines
 escapedInline = escapedEqs <|> escapedTag
 
-escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs :: PandocMonad m => TextileParser m Inlines
 escapedEqs = B.str . T.pack <$>
   try (string "==" *> manyTill anyChar' (try $ string "=="))
 
 -- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag :: PandocMonad m => TextileParser m Inlines
 escapedTag = B.str . T.pack <$>
   try (string "<notextile>" *>
          manyTill anyChar' (try $ string "</notextile>"))
 
 -- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol :: PandocMonad m => TextileParser m Inlines
 symbol = B.str . T.singleton <$> (notFollowedBy newline *>
                                   notFollowedBy rawHtmlBlock *>
                                   oneOf wordBoundaries)
 
 -- | Inline code
-code :: PandocMonad m => ParserT Text ParserState m Inlines
+code :: PandocMonad m => TextileParser m Inlines
 code = code1 <|> code2
 
 -- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT Text ParserState m Char
+anyChar' :: PandocMonad m => TextileParser m Char
 anyChar' =
   satisfy (/='\n') <|>
   try (char '\n' <* notFollowedBy blankline)
 
-code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 :: PandocMonad m => TextileParser m Inlines
 code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
 
-code2 :: PandocMonad m => ParserT Text ParserState m Inlines
+code2 :: PandocMonad m => TextileParser m Inlines
 code2 = do
   htmlTag (tagOpen (=="tt") null)
   B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
 
 -- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT Text ParserState m Attr
+attributes :: PandocMonad m => TextileParser m Attr
 attributes = foldl' (flip ($)) ("",[],[]) <$>
   try (do special <- option id specialAttribute
           attrs <- many attribute
           return (special : attrs))
 
-specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 specialAttribute = do
   alignStr <- ("center" <$ char '=') <|>
     ("justify" <$ try (string "<>")) <|>
@@ -643,11 +647,11 @@ specialAttribute = do
   notFollowedBy spaceChar
   return $ addStyle $ T.pack $ "text-align:" ++ alignStr
 
-attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => TextileParser m (Attr -> Attr)
 attribute = try $
   (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
 
-classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 classIdAttr = try $ do -- (class class #id)
   char '('
   ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
@@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id)
        classes'
          -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
 
-styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 styleAttr = do
   style <- try $ enclosed (char '{') (char '}') anyChar'
   return $ addStyle $ T.pack style
@@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) =
   where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
         style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
 
-langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => TextileParser m (Attr -> Attr)
 langAttr = do
   lang <- try $ enclosed (char '[') (char ']') alphaNum
   return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
 
 -- | Parses material surrounded by a parser.
 surrounded :: (PandocMonad m, Show t)
-           => ParserT Text st m t   -- ^ surrounding parser
-           -> ParserT Text st m a   -- ^ content parser (to be used repeatedly)
-           -> ParserT Text st m [a]
+           => ParserT Sources st m t   -- ^ surrounding parser
+           -> ParserT Sources st m a   -- ^ content parser (to be used repeatedly)
+           -> ParserT Sources st m [a]
 surrounded border =
   enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
 
 simpleInline :: PandocMonad m
-             => ParserT Text ParserState m t        -- ^ surrounding parser
+             => TextileParser m t        -- ^ surrounding parser
              -> (Inlines -> Inlines)                  -- ^ Inline constructor
-             -> ParserT Text ParserState m Inlines  -- ^ content parser (to be used repeatedly)
+             -> TextileParser m Inlines  -- ^ content parser (to be used repeatedly)
 simpleInline border construct = try $ do
   notAfterString
   border *> notFollowedBy (oneOf " \t\n\r")
@@ -700,7 +704,7 @@ simpleInline border construct = try $ do
            then body
            else B.spanWith attr body
 
-groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines
 groupedInlineMarkup = try $ do
     char '['
     sp1 <- option mempty $ B.space <$ whitespace
@@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do
     char ']'
     return $ sp1 <> result <> sp2
 
-eof' :: Monad m => ParserT Text s m Char
+eof' :: Monad m => ParserT Sources s m Char
 eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index fb4b662c5..5c414fdec 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -30,23 +30,23 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Logging (Verbosity (..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, safeRead)
+import Text.Pandoc.Shared (safeRead)
 import Text.Pandoc.XML (fromEntities)
 import Text.Printf (printf)
 
 -- | Read TikiWiki from an input string and return a Pandoc document.
-readTikiWiki :: PandocMonad m
+readTikiWiki :: (PandocMonad m, ToSources a)
           => ReaderOptions
-          -> Text
+          -> a
           -> m Pandoc
 readTikiWiki opts s = do
-  res <- readWithM parseTikiWiki def{ stateOptions = opts }
-             (crFilter s <> "\n\n")
+  let sources = ensureFinalNewlines 2 (toSources s)
+  res <- readWithM parseTikiWiki def{ stateOptions = opts } sources
   case res of
        Left e  -> throwError e
        Right d -> return d
 
-type TikiWikiParser = ParserT Text ParserState
+type TikiWikiParser = ParserT Sources ParserState
 
 --
 -- utility functions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d355a4b55..6f92f0063 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (space, spaces, uri)
-import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI)
+import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
 
-type T2T = ParserT Text ParserState (Reader T2TMeta)
+type T2T = ParserT Sources ParserState (Reader T2TMeta)
 
 -- | An object for the T2T macros meta information
 -- the contents of each field is simply substituted verbatim into the file
@@ -68,15 +68,15 @@ getT2TMeta = do
                      (intercalate ", " inps) outp
 
 -- | Read Txt2Tags from an input string returning a Pandoc document
-readTxt2Tags :: PandocMonad m
+readTxt2Tags :: (PandocMonad m, ToSources a)
              => ReaderOptions
-             -> Text
+             -> a
              -> m Pandoc
 readTxt2Tags opts s = do
+  let sources = ensureFinalNewlines 2 (toSources s)
   meta <- getT2TMeta
   let parsed = flip runReader meta $
-        readWithM parseT2T (def {stateOptions = opts}) $
-        crFilter s <> "\n\n"
+        readWithM parseT2T (def {stateOptions = opts}) sources
   case parsed of
     Right result -> return result
     Left e       -> throwError e
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 74dac5ea7..460f304c4 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
                             many1Till, orderedListMarker, readWithM,
                             registerHeader, spaceChar, stateMeta,
                             stateOptions, uri, manyTillChar, manyChar, textStr,
-                            many1Char, countChar, many1TillChar)
-import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+                            many1Char, countChar, many1TillChar,
+                            alphaNum, anyChar, char, newline, noneOf, oneOf,
+                            space, spaces, string)
+import Text.Pandoc.Sources (ToSources(..), Sources)
+import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast,
                            isURI, tshow)
-import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
-                         spaces, string)
 import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
                                manyTill, notFollowedBy, option, skipMany1)
 import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
 
-readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readVimwiki :: (PandocMonad m, ToSources a)
+            => ReaderOptions
+            -> a
+            -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s
+  let sources = toSources s
+  res <- readWithM parseVimwiki def{ stateOptions = opts } sources
   case res of
        Left e       -> throwError e
        Right result -> return result
 
-type VwParser = ParserT Text ParserState
+type VwParser = ParserT Sources ParserState
 
 
 -- constants
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index e389c1727..920edca7b 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines
                        (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
                        <> go (T.drop 1 s2)
 
+{-# DEPRECATED crFilter "readers filter crs automatically" #-}
 -- | Strip out DOS line endings.
 crFilter :: T.Text -> T.Text
 crFilter = T.filter (/= '\r')
diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs
new file mode 100644
index 000000000..5511ccfb8
--- /dev/null
+++ b/src/Text/Pandoc/Sources.hs
@@ -0,0 +1,195 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.Sources
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Defines Sources object to be used as input to pandoc parsers and redefines Char
+parsers so they get source position information from it.
+-}
+
+module Text.Pandoc.Sources
+  ( Sources(..)
+  , ToSources(..)
+  , UpdateSourcePos(..)
+  , sourcesToText
+  , initialSourceName
+  , addToSources
+  , ensureFinalNewlines
+  , addToInput
+  , satisfy
+  , oneOf
+  , noneOf
+  , anyChar
+  , char
+  , string
+  , newline
+  , space
+  , spaces
+  , letter
+  , digit
+  , hexDigit
+  , alphaNum
+  )
+where
+import qualified Text.Parsec as P
+import Text.Parsec (Stream(..), ParsecT)
+import Text.Parsec.Pos as P
+import Data.Text (Text)
+import qualified Data.Text as T
+import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
+import Data.String (IsString(..))
+import qualified Data.List.NonEmpty as NonEmpty
+
+-- | A list of inputs labeled with source positions.  It is assumed
+-- that the 'Text's have @\n@ line endings.
+newtype Sources = Sources { unSources :: [(SourcePos, Text)] }
+  deriving (Show, Semigroup, Monoid)
+
+instance Monad m => Stream Sources m Char where
+  uncons (Sources []) = return Nothing
+  uncons (Sources ((pos,t):rest)) =
+    case T.uncons t of
+      Nothing -> uncons (Sources rest)
+      Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest))
+
+instance IsString Sources where
+  fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))]
+
+class ToSources a where
+  toSources :: a -> Sources
+
+instance ToSources Text where
+  toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)]
+
+instance ToSources [(FilePath, Text)] where
+  toSources = Sources
+            . map (\(fp,t) ->
+                    (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n'))
+
+instance ToSources Sources where
+  toSources = id
+
+sourcesToText :: Sources -> Text
+sourcesToText (Sources xs) = mconcat $ map snd xs
+
+addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
+addToSources pos t = do
+  curpos <- P.getPosition
+  Sources xs <- P.getInput
+  let xs' = case xs of
+               [] -> []
+               ((_,t'):rest) -> (curpos,t'):rest
+  P.setInput $ Sources ((pos, T.filter (/='\r') t):xs')
+
+ensureFinalNewlines :: Int -- ^ number of trailing newlines
+                    -> Sources
+                    -> Sources
+ensureFinalNewlines n (Sources xs) =
+  case NonEmpty.nonEmpty xs of
+    Nothing -> Sources [(initialPos "", T.replicate n "\n")]
+    Just lst ->
+      case NonEmpty.last lst of
+        (spos, t) ->
+          case T.length (T.takeWhileEnd (=='\n') t) of
+            len | len >= n -> Sources xs
+                | otherwise -> Sources (NonEmpty.init lst ++
+                                        [(spos,
+                                          t <> T.replicate (n - len) "\n")])
+
+class UpdateSourcePos s c where
+  updateSourcePos :: SourcePos -> c -> s -> SourcePos
+
+instance UpdateSourcePos Text Char where
+   updateSourcePos pos c _ = updatePosChar pos c
+
+instance UpdateSourcePos Sources Char where
+   updateSourcePos pos c sources =
+     case sources of
+       Sources [] -> updatePosChar pos c
+       Sources ((_,t):(pos',_):_)
+         | T.null t  -> pos'
+       Sources _ ->
+           case c of
+             '\n' -> incSourceLine (setSourceColumn pos 1) 1
+             '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4))
+             _    -> incSourceColumn pos 1
+
+-- | Get name of first source in 'Sources'.
+initialSourceName :: Sources -> FilePath
+initialSourceName (Sources []) = ""
+initialSourceName (Sources ((pos,_):_)) = sourceName pos
+
+-- | Add some text to the beginning of the input sources.
+-- This simplifies code that expands macros.
+addToInput :: Monad m => Text -> ParsecT Sources u m ()
+addToInput t = do
+  Sources xs <- P.getInput
+  case xs of
+    [] -> P.setInput $ Sources [(initialPos "",t)]
+    (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest)
+
+-- We need to redefine the parsers in Text.Parsec.Char so that they
+-- update source positions properly from the Sources stream.
+
+satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => (Char -> Bool) -> ParsecT s u m Char
+satisfy f = P.tokenPrim show updateSourcePos matcher
+ where
+  matcher c = if f c then Just c else Nothing
+
+oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => [Char] -> ParsecT s u m Char
+oneOf cs = satisfy (`elem` cs)
+
+noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m Char
+noneOf cs = satisfy (`notElem` cs)
+
+anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+anyChar = satisfy (const True)
+
+char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+     => Char -> ParsecT s u m Char
+char c = satisfy (== c)
+
+string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => [Char] -> ParsecT s u m [Char]
+string = mapM char
+
+newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+        => ParsecT s u m Char
+newline = satisfy (== '\n')
+
+space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+space = satisfy isSpace
+
+spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m ()
+spaces = P.skipMany space P.<?> "white space"
+
+letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+       => ParsecT s u m Char
+letter = satisfy isLetter
+
+alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+alphaNum = satisfy isAlphaNum
+
+digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+      => ParsecT s u m Char
+digit = satisfy isDigit
+
+hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
+         => ParsecT s u m Char
+hexDigit = satisfy isHexDigit
diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs
index 6e38da21a..f055ab197 100644
--- a/test/Tests/Readers/Markdown.hs
+++ b/test/Tests/Readers/Markdown.hs
@@ -374,8 +374,8 @@ tests = [ testGroup "inline code"
         , testGroup "lhs"
           [ test (purely $ readMarkdown def{ readerExtensions = enableExtension
                        Ext_literate_haskell pandocExtensions })
-              "inverse bird tracks and html" $
-              "> a\n\n< b\n\n<div>\n"
+              "inverse bird tracks and html"
+              $ ("> a\n\n< b\n\n<div>\n" :: Text)
               =?> codeBlockWith ("",["haskell","literate"],[]) "a"
                   <>
                   codeBlockWith ("",["haskell"],[]) "b"
-- 
cgit v1.2.3


From 05ea507bd75e0bb4bbb8f25cad5fa2f02e4f6796 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 15:26:11 -0600
Subject: T.P.Parsing: improve include file functions.

Remove old `insertIncludedFileF`. [API change]
Give `insertIncludedFile` a more general type, allowing it
to be used where `insertIncludedFileF` was.
---
 src/Text/Pandoc/Parsing.hs             | 61 +++++++++++++++++-----------------
 src/Text/Pandoc/Readers/Org/Blocks.hs  |  3 +-
 src/Text/Pandoc/Readers/Org/Parsing.hs |  3 +-
 3 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 11c4c7a62..cbe9993c6 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -121,7 +121,6 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              (<+?>),
                              extractIdClass,
                              insertIncludedFile,
-                             insertIncludedFileF,
                              -- * Re-exports from Text.Parsec
                              Stream,
                              runParser,
@@ -1638,12 +1637,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
     cls'   = maybe cls T.words $ lookup "class" kvs
     kvs'   = filter (\(k,_) -> k /= "id" || k /= "class") kvs
 
-insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT a st m (mf Blocks)
-                    -> (Text -> a)
-                    -> [FilePath] -> FilePath
-                    -> ParserT a st m (mf Blocks)
-insertIncludedFile' blocks totoks dirs f = do
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+                   => ParserT a st m b -- ^ parser to apply
+                   -> (Text -> a) -- ^ convert Text to stream type
+                   -> [FilePath]  -- ^ search path (directories)
+                   -> FilePath    -- ^ path of file to include
+                   -> Maybe Int   -- ^ start line (negative counts from end)
+                   -> Maybe Int   -- ^ end line (negative counts from end)
+                   -> ParserT a st m b
+insertIncludedFile parser toStream dirs f mbstartline mbendline = do
   oldPos <- getPosition
   oldInput <- getInput
   containers <- getIncludeFiles <$> getState
@@ -1652,33 +1654,32 @@ insertIncludedFile' blocks totoks dirs f = do
   updateState $ addIncludeFile $ T.pack f
   mbcontents <- readFileFromDirs dirs f
   contents <- case mbcontents of
-                   Just s -> return s
+                   Just s -> return $ exciseLines mbstartline mbendline s
                    Nothing -> do
                      report $ CouldNotLoadIncludeFile (T.pack f) oldPos
                      return ""
-  setPosition $ newPos f 1 1
-  setInput $ totoks contents
-  bs <- blocks
+  setInput $ toStream contents
+  setPosition $ newPos f (fromMaybe 1 mbstartline) 1
+  result <- parser
   setInput oldInput
   setPosition oldPos
   updateState dropLatestIncludeFile
-  return bs
+  return result
+
+exciseLines :: Maybe Int -> Maybe Int -> Text -> Text
+exciseLines Nothing Nothing t = t
+exciseLines mbstartline mbendline t =
+  T.unlines $ take (endline' - (startline' - 1))
+            $ drop (startline' - 1) contentLines
+ where
+  contentLines = T.lines t
+  numLines = length contentLines
+  startline' = case mbstartline of
+                 Nothing -> 1
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
+  endline' = case mbendline of
+                 Nothing -> numLines
+                 Just x | x >= 0 -> x
+                        | otherwise -> numLines + x -- negative from end
 
--- | Parse content of include file as blocks. Circular includes result in an
--- @PandocParseError@.
-insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
-                   => ParserT [a] st m Blocks
-                   -> (Text -> [a])
-                   -> [FilePath] -> FilePath
-                   -> ParserT [a] st m Blocks
-insertIncludedFile blocks totoks dirs f =
-  runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f
-
--- TODO: replace this with something using addToSources.
--- | Parse content of include file as future blocks. Circular includes result in
--- an @PandocParseError@.
-insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
-                    => ParserT Sources st m (Future st Blocks)
-                    -> [FilePath] -> FilePath
-                    -> ParserT Sources st m (Future st Blocks)
-insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)])
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 883434cdc..5e15c2c36 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -44,6 +44,7 @@ import Data.List.NonEmpty (nonEmpty)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
+import Text.Pandoc.Sources (ToSources(..))
 
 --
 -- parsing blocks
@@ -527,7 +528,7 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFileF blocksParser ["."] filename
+  insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index c7ea02815..701bf3398 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , ellipses
   , citeKey
   , gridTableWith
-  , insertIncludedFileF
-  -- * Re-exports from Text.Pandoc.Parsec
+  , insertIncludedFile
   , runParser
   , runParserT
   , getInput
-- 
cgit v1.2.3


From 41a3ac9da99c2701fa7e6adbc85da91f191620f4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 16:26:11 -0600
Subject: RST reader: use `insertIncludedFile` from T.P.Parsing...

instead of reproducing much of its code.
---
 src/Text/Pandoc/Readers/RST.hs | 94 ++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 58 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a3fcf028c..bb70b2620 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -27,8 +27,7 @@ import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines)
 import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem,
-                                      readFileFromDirs, getTimestamp)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp)
 import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
@@ -453,59 +452,38 @@ encoding
 -}
 
 includeDirective :: PandocMonad m
-                 => Text -> [(Text, Text)] -> Text
+                 => Text
+                 -> [(Text, Text)]
+                 -> Text
                  -> RSTParser m Blocks
 includeDirective top fields body = do
-  let f = trim top
-  guard $ not (T.null f)
+  let f = T.unpack $ trim top
+  guard $ not $ null f
   guard $ T.null (trim body)
-  -- options
-  let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
-  let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
-  oldPos <- getPosition
-  containers <- stateContainers <$> getState
-  when (f `elem` containers) $
-    throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
-  updateState $ \s -> s{ stateContainers = f : stateContainers s }
-  mbContents <- readFileFromDirs ["."] $ T.unpack f
-  contentLines <- case mbContents of
-                       Just s -> return $ T.lines s
-                       Nothing -> do
-                         logMessage $ CouldNotLoadIncludeFile f oldPos
-                         return []
-  let numLines = length contentLines
-  let startLine' = case startLine of
-                        Nothing            -> 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let endLine' = case endLine of
-                        Nothing            -> numLines + 1
-                        Just x | x >= 0    -> x
-                               | otherwise -> numLines + x -- negative from end
-  let contentLines' =   drop (startLine' - 1)
-                      $ take (endLine' - 1) contentLines
-  let contentLines'' = (case trim <$> lookup "end-before" fields of
-                             Just patt -> takeWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) .
-                       (case trim <$> lookup "start-after" fields of
-                             Just patt -> drop 1 .
-                                            dropWhile (not . (patt `T.isInfixOf`))
-                             Nothing   -> id) $ contentLines'
-  let contents' = T.unlines contentLines''
-  case lookup "code" fields of
-       Just lang -> do
-         let classes =  maybe [] T.words (lookup "class" fields)
-         let ident = maybe "" trimr $ lookup "name" fields
-         codeblock ident classes fields (trimr lang) contents' False
-       Nothing   -> case lookup "literal" fields of
-                         Just _  -> return $ B.rawBlock "rst" contents'
-                         Nothing -> do
-                           addToSources (initialPos (T.unpack f))
-                              (contents' <> "\n")
-                           updateState $ \s -> s{ stateContainers =
-                                         tail $ stateContainers s }
-                           return mempty
-
+  let startLine = lookup "start-line" fields >>= safeRead
+  let endLine = lookup "end-line" fields >>= safeRead
+  let classes =  maybe [] T.words (lookup "class" fields)
+  let ident = maybe "" trimr $ lookup "name" fields
+  let parser =
+       case lookup "code" fields of
+         Just lang ->
+           (codeblock ident classes fields (trimr lang) False
+            . sourcesToText) <$> getInput
+         Nothing   ->
+           case lookup "literal" fields of
+             Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
+             Nothing -> parseBlocks
+  let selectLines =
+        (case trim <$> lookup "end-before" fields of
+                         Just patt -> takeWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id) .
+        (case trim <$> lookup "start-after" fields of
+                         Just patt -> drop 1 .
+                                        dropWhile (not . (patt `T.isInfixOf`))
+                         Nothing   -> id)
+  let toStream t =
+        toSources [(f, T.unlines . selectLines . T.lines $ t)]
+  insertIncludedFile parser toStream ["."] f startLine endLine
 
 --
 -- list blocks
@@ -734,8 +712,8 @@ directive' = do
                                      ""   -> stateRstHighlight def
                                      lang -> Just lang })
         x | x == "code" || x == "code-block" || x == "sourcecode" ->
-          codeblock name classes (map (second trimr) fields)
-             (trim top) body True
+          return $ codeblock name classes (map (second trimr) fields)
+             (trim top) True body
         "aafig" -> do
           let attribs = (name, ["aafig"], map (second trimr) fields)
           return $ B.codeBlockWith attribs $ stripTrailingNewlines body
@@ -1021,10 +999,10 @@ toChunks = dropWhile T.null
                           then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
                           else s
 
-codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool
-          -> RSTParser m Blocks
-codeblock ident classes fields lang body rmTrailingNewlines =
-  return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
+codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text
+          -> Blocks
+codeblock ident classes fields lang rmTrailingNewlines body =
+  B.codeBlockWith attribs $ stripTrailingNewlines' body
     where stripTrailingNewlines' = if rmTrailingNewlines
                                      then stripTrailingNewlines
                                      else id
-- 
cgit v1.2.3


From b2398cd74744ce34fa10cc34f4051555f21feb2c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 18:42:42 -0600
Subject: Org reader: Resolve org includes relative to ...

...the directory containing the file containing the
INCLUDE directive.  Closes #5501.
---
 src/Text/Pandoc/Readers/Org/Blocks.hs  | 6 ++++--
 src/Text/Pandoc/Readers/Org/Parsing.hs | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 5e15c2c36..f18d2f9a7 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -40,7 +40,7 @@ import Data.List (foldl', intersperse)
 import Data.Maybe (fromMaybe, isJust, isNothing)
 import Data.Text (Text)
 import Data.List.NonEmpty (nonEmpty)
-
+import System.FilePath
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.Walk as Walk
@@ -528,7 +528,9 @@ include = try $ do
                      _ -> nullAttr
         return $ pure . B.codeBlockWith attr <$> parseRaw
       _ -> return $ return . B.fromList . blockFilter params <$> blockList
-  insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile blocksParser toSources
+                     [currentDir] filename Nothing Nothing
  where
   includeTarget :: PandocMonad m => OrgParser m FilePath
   includeTarget = do
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 701bf3398..f0949e205 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -99,6 +99,7 @@ module Text.Pandoc.Readers.Org.Parsing
   , getState
   , updateState
   , SourcePos
+  , sourceName
   , getPosition
   ) where
 
-- 
cgit v1.2.3


From 2a2e08d82335d45c982f4acf14b62130365f2c8e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 9 May 2021 18:50:41 -0600
Subject: RST reader: seek include files in the directory...

...of the file containing the include directive, as
RST requires.

Closes #6632.
---
 src/Text/Pandoc/Readers/RST.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index bb70b2620..35292d949 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -38,6 +38,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
 import qualified Text.Pandoc.UTF8 as UTF8
 import Data.Time.Format
+import System.FilePath (takeDirectory)
 
 -- TODO:
 -- [ ] .. parsed-literal
@@ -483,7 +484,8 @@ includeDirective top fields body = do
                          Nothing   -> id)
   let toStream t =
         toSources [(f, T.unlines . selectLines . T.lines $ t)]
-  insertIncludedFile parser toStream ["."] f startLine endLine
+  currentDir <- takeDirectory . sourceName <$> getPosition
+  insertIncludedFile parser toStream [currentDir] f startLine endLine
 
 --
 -- list blocks
-- 
cgit v1.2.3


From ff7176de80726a5db4c4bc4c5a3296b2bcdab192 Mon Sep 17 00:00:00 2001
From: nuew <code@nuew.net>
Date: Mon, 10 May 2021 11:26:32 -0400
Subject: epub Writer: Fix belongs-to-collection XML id choice (#7267)

The epub writer previously used the same XML id for both the book
identifier and the epub collection. This causes an error on epubcheck.
---
 src/Text/Pandoc/Writers/EPUB.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 3f10cb437..3c092a2c1 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -991,12 +991,12 @@ metadataElement version md currentTime =
                showDateTimeISO8601 currentTime | version == EPUB3 ]
         belongsToCollectionNodes = 
             maybe []
-                (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection )
+                (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-collection-1")] $ belongsToCollection )
                 :
-                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ])
+                [unode "meta" !  [("refines", "#epub-collection-1"), ("property", "collection-type")] $ ("series" :: Text) ])
                 (epubBelongsToCollection md)++
             maybe []
-                (\groupPosition -> [unode "meta" !  [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ])
+                (\groupPosition -> [unode "meta" !  [("refines", "#epub-collection-1"), ("property", "group-position")] $ groupPosition ])
                 (epubGroupPosition md)
         dcTag n s = unode ("dc:" <> n) s
         dcTag' n s = [dcTag n s]
-- 
cgit v1.2.3


From 2bd5d0cafbe67fa723f49fd2d35d913594fdb935 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 11 May 2021 07:52:05 -0600
Subject: LaTeX writer: better handling of line breaks in simple tables.

Now we also handle the case where they're embedded in other
elements, e.g. spans. Closes #7272.
---
 src/Text/Pandoc/Writers/LaTeX/Table.hs |  4 +---
 test/command/7272.md                   | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 test/command/7272.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 6f8386937..16f63314b 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -219,9 +219,7 @@ footRows (Ann.TableFoot _attr rows) = map headerRowCells rows
 -- we need to go to some lengths to get line breaks working:
 -- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}.
 fixLineBreaks :: Block -> Block
-fixLineBreaks (Para ils)  = Para $ fixLineBreaks' ils
-fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils
-fixLineBreaks x           = x
+fixLineBreaks = walk fixLineBreaks'
 
 fixLineBreaks' :: [Inline] -> [Inline]
 fixLineBreaks' ils = case splitBy (== LineBreak) ils of
diff --git a/test/command/7272.md b/test/command/7272.md
new file mode 100644
index 000000000..d3a3b2137
--- /dev/null
+++ b/test/command/7272.md
@@ -0,0 +1,24 @@
+```
+% pandoc -t latex -f html
+<table>
+  <tbody>
+    <tr>
+      <td>
+        <span>
+          text
+          <br />
+          text2
+        </span>
+      </td>
+    </tr>
+  </tbody>
+</table>
+^D
+\begin{longtable}[]{@{}
+  >{\raggedright\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{1.00}}@{}}
+\toprule
+\endhead
+{\vtop{\hbox{\strut  text}\hbox{\strut text2 }}} \\
+\bottomrule
+\end{longtable}
+```
-- 
cgit v1.2.3


From a66e50840bdc3c92e661257a4a0c5fec34aa25bc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 11 May 2021 09:01:36 -0600
Subject: T.P.XML.Light - add Eq, Ord instances...

for Content, Element, Attr, CDataKind.
[API change]
---
 src/Text/Pandoc/XML/Light/Types.hs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs
index 03fdc2e4d..ba602ac1f 100644
--- a/src/Text/Pandoc/XML/Light/Types.hs
+++ b/src/Text/Pandoc/XML/Light/Types.hs
@@ -53,7 +53,7 @@ type Line     = Integer
 data Content  = Elem Element
               | Text CData
               | CRef Text
-                deriving (Show, Typeable, Data)
+                deriving (Show, Typeable, Data, Ord, Eq)
 
 -- | XML elements
 data Element  = Element {
@@ -61,7 +61,7 @@ data Element  = Element {
                   elAttribs   :: [Attr],
                   elContent   :: [Content],
                   elLine      :: Maybe Line
-                } deriving (Show, Typeable, Data)
+                } deriving (Show, Typeable, Data, Ord, Eq)
 
 -- | XML attributes
 data Attr     = Attr {
@@ -74,13 +74,13 @@ data CData    = CData {
                   cdVerbatim  :: CDataKind,
                   cdData      :: Text,
                   cdLine      :: Maybe Line
-                } deriving (Show, Typeable, Data)
+                } deriving (Show, Typeable, Data, Ord, Eq)
 
 data CDataKind
  = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
  | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
  | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
-   deriving ( Eq, Show, Typeable, Data )
+   deriving ( Eq, Ord, Show, Typeable, Data )
 
 -- | XML qualified names
 data QName    = QName {
-- 
cgit v1.2.3


From 5eb7ad7d1ebbfe27a282a2d75f199bacf2052be3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 11 May 2021 22:31:38 -0600
Subject: Improve integration of settings from reference.docx.

The settings we can carry over from a reference.docx are
autoHyphenation, consecutiveHyphenLimit, hyphenationZone,
doNotHyphenateCap, evenAndOddHeaders, and proofState.

Previously this was implemented in a buggy way, so that the
reference doc's values AND the new values were included.

This change allows users to create a reference.docx that
sets w:proofState for spelling or grammar to "dirty,"
so that spell/grammar checking will be triggered on the
generated docx.

Closes #1209.
---
 src/Text/Pandoc/Writers/Docx.hs                    |  27 +++++++++++----------
 test/docx/golden/block_quotes.docx                 | Bin 10067 -> 10061 bytes
 test/docx/golden/codeblock.docx                    | Bin 9916 -> 9910 bytes
 test/docx/golden/comments.docx                     | Bin 10254 -> 10248 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10017 -> 10011 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10646 -> 10640 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12400 -> 12397 bytes
 test/docx/golden/definition_list.docx              | Bin 9916 -> 9910 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9922 -> 9916 bytes
 test/docx/golden/document-properties.docx          | Bin 10399 -> 10393 bytes
 test/docx/golden/headers.docx                      | Bin 10055 -> 10049 bytes
 test/docx/golden/image.docx                        | Bin 26733 -> 26727 bytes
 test/docx/golden/inline_code.docx                  | Bin 9855 -> 9849 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10035 -> 10029 bytes
 test/docx/golden/inline_images.docx                | Bin 26789 -> 26783 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10077 -> 10071 bytes
 test/docx/golden/links.docx                        | Bin 10248 -> 10242 bytes
 test/docx/golden/lists.docx                        | Bin 10314 -> 10308 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10110 -> 10104 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10192 -> 10186 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10108 -> 10102 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10212 -> 10206 bytes
 test/docx/golden/notes.docx                        | Bin 10024 -> 10018 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9956 -> 9950 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10090 -> 10084 bytes
 test/docx/golden/table_one_row.docx                | Bin 9925 -> 9920 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10230 -> 10225 bytes
 test/docx/golden/tables.docx                       | Bin 10271 -> 10266 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9899 -> 9893 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9882 -> 9876 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9916 -> 9910 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10028 -> 10022 bytes
 test/docx/golden/unicode.docx                      | Bin 9841 -> 9835 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9888 -> 9882 bytes
 34 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index e11961bfd..75bed1595 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -469,12 +469,12 @@ writeDocx opts doc = do
   -- adds references to footnotes or endnotes we don't have...
   -- we do, however, copy some settings over from reference
   let settingsPath = "word/settings.xml"
-      settingsList = [ "w:autoHyphenation"
-                     , "w:consecutiveHyphenLimit"
-                     , "w:hyphenationZone"
-                     , "w:doNotHyphenateCap"
-                     , "w:evenAndOddHeaders"
-                     , "w:proofState"
+      settingsList = [ "autoHyphenation"
+                     , "consecutiveHyphenLimit"
+                     , "hyphenationZone"
+                     , "doNotHyphenateCap"
+                     , "evenAndOddHeaders"
+                     , "proofState"
                      ]
   settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
 
@@ -577,16 +577,17 @@ copyChildren :: (PandocMonad m)
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
+  let elsToCopy =
+        map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref
+  let elsToKeep =
+        [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)]
   return $ toEntry path timestamp $ renderXml dist{
-      elContent = elContent dist ++ copyContent ref
+      elContent = map Elem elsToKeep ++ map Elem elsToCopy
     }
   where
-    strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p <> ":" <> name
-      | otherwise        = name
-    shouldCopy = (`elem` elNames) . strName
-    cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
-    copyContent = map cleanElem . filterChildrenName shouldCopy
+    hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) =
+      qName n1 == qName n2
+    cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}}
 
 -- this is the lowest number used for a list numId
 baseListId :: Int
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index af1b33ca2..b6973fcfd 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index f748f1f01..f0d35d3ad 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index ac9d56680..fb1c15dec 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index f27727edd..43d536b65 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 1da499d6a..8865010d1 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 4d2fe245d..10f7bf661 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index f386fcea3..2a7d81a34 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index debe9a3f6..2fcd14908 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index cd17400bf..39533f42d 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index d3af8a3dd..c8d67c45b 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 1c4e738c0..8a7aeec10 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 35f43f19f..969237cec 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 8de3f70f6..cda936a39 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index e76558be9..69991e791 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 88bae8142..40e848195 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 455adcfc7..28237a30d 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 081d9ddba..bf075805e 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index fc9213fc5..e7d308e13 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index b636fd3f8..9763e347e 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 252623215..b717ca619 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index a8c3f5478..873e731d2 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 43e650ebd..134cb2eaf 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index fe4f7845b..c49ed79c8 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 45e90608f..1f2cbb214 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index e60bb303f..a0160cdb4 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index a4037cf32..6427d475c 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index bc1bc27f8..470eac2ae 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 247725aaa..3542b8f9c 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index 3863afef2..b36b4485e 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 5c848b63a..e30ab06ae 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index e0c843713..11597d578 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 78a773bdd..c7bff82e5 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index c66a45b74..c70f6946e 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 46309319ef9eb66ea3598073df4174fb40b1b0f8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 12 May 2021 06:01:13 -0600
Subject: Fix source position reporting for YAML bibliographies.

Closes #7273.
---
 src/Text/Pandoc/Readers/Markdown.hs | 2 --
 src/Text/Pandoc/Readers/Metadata.hs | 8 ++++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 69dd51bc4..2d20ff018 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -97,12 +97,10 @@ yamlToRefs :: PandocMonad m
            -> m [MetaValue]
 yamlToRefs idpred opts mbfp bstr = do
   let parser = do
-        oldPos <- getPosition
         case mbfp of
           Nothing -> return ()
           Just fp -> setPosition $ initialPos fp
         refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
-        setPosition oldPos
         return $ runF refs defaultParserState
   parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text)
   case parsed of
diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index bbcfe62ea..45eddf25a 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -98,8 +98,12 @@ yamlBsToRefs pMetaValue idpred bstr =
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
        Right _  -> Prelude.fail "expecting YAML object"
-       Left (_pos, err')
-                -> Prelude.fail err'
+       Left (yamlpos, err')
+                -> do pos <- getPosition
+                      setPosition $ incSourceLine
+                            (setSourceColumn pos (YE.posColumn yamlpos))
+                            (YE.posLine yamlpos - 1)
+                      Prelude.fail err'
 
 
 nodeToKey :: YAML.Node YE.Pos -> Maybe Text
-- 
cgit v1.2.3


From 0217ae2a4ffe33bee7cc53ba44817cd540b5f01e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 12 May 2021 11:05:55 -0700
Subject: Hande 'annote' field in bibtex/biblatex writer.

Closes #7266.
---
 src/Text/Pandoc/Citeproc/BibTeX.hs |  2 ++
 test/command/7266.md               | 10 ++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 test/command/7266.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index f6833000c..b17240557 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -156,6 +156,7 @@ writeBibtexString opts variant mblang ref =
            , "langid"
            , "abstract"
            , "keywords"
+           , "annote"
            ]
       Bibtex ->
            [ "author"
@@ -175,6 +176,7 @@ writeBibtexString opts variant mblang ref =
            , "address"
            , "type"
            , "note"
+           , "annote"
            ]
 
   valToInlines (TextVal t) = B.text t
diff --git a/test/command/7266.md b/test/command/7266.md
new file mode 100644
index 000000000..9db833636
--- /dev/null
+++ b/test/command/7266.md
@@ -0,0 +1,10 @@
+```
+% pandoc -f biblatex -t biblatex -s
+@article{id,
+  annote = "annotation"
+}
+^D
+@article{id,
+  annote = {annotation}
+}
+```
-- 
cgit v1.2.3


From edca1d1656a03d7de4901a1f6d356094ec9fb2df Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 12 May 2021 11:12:27 -0700
Subject: Plain writer: handle superscript unicode minus.

Closes #7276.  Note:  currently we still get unwanted
white space around the minus; this needs to be addressed
with a change in texmath.
---
 src/Text/Pandoc/Writers/Shared.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index a09d18571..0b7c6bee0 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -394,6 +394,7 @@ toSuperscript '2' = Just '\x00B2'
 toSuperscript '3' = Just '\x00B3'
 toSuperscript '+' = Just '\x207A'
 toSuperscript '-' = Just '\x207B'
+toSuperscript '\x2212' = Just '\x207B' -- unicode minus
 toSuperscript '=' = Just '\x207C'
 toSuperscript '(' = Just '\x207D'
 toSuperscript ')' = Just '\x207E'
-- 
cgit v1.2.3


From 3f09f53459b877f53072efbf57dec21fa37280b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 13 May 2021 21:11:52 -0700
Subject: Implement curly-brace syntax for Markdown citation keys.

The change provides a way to use citation keys that contain
special characters not usable with the standard citation
key syntax.  Example: `@{foo_bar{x}'}` for the key `foo_bar{x}`.
Closes #6026.

The change requires adding a new parameter to the `citeKey`
parser from Text.Pandoc.Parsing [API change].

Markdown reader: recognize @{..} syntax for citatinos.

Markdown writer:  use @{..} syntax for citations when needed.

Update manual with curly-brace syntax for citations.

Closes #6026.
---
 MANUAL.txt                                 | 15 ++++++++++-----
 src/Text/Pandoc/Parsing.hs                 | 18 +++++++++++++-----
 src/Text/Pandoc/Readers/Markdown.hs        |  6 +++---
 src/Text/Pandoc/Readers/Org/Inlines.hs     |  8 ++++----
 src/Text/Pandoc/Writers/Markdown/Inline.hs |  9 +++++++--
 test/command/6026.md                       | 19 +++++++++++++++++++
 6 files changed, 56 insertions(+), 19 deletions(-)
 create mode 100644 test/command/6026.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 75fe599e2..6f06d1e8a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -4949,12 +4949,15 @@ Inline and regular footnotes may be mixed freely.
 #### Extension: `citations` ####
 
 Markdown citations go inside square brackets and are separated
-by semicolons.  Each citation must have a key, composed of '@' +
-the citation identifier from the database, and may optionally
-have a prefix, a locator, and a suffix.  The citation key must
-begin with a letter, digit, or `_`, and may contain
+by semicolons.  Each citation must have a key and may optionally
+have a prefix, a locator, and a suffix.  The citation key
+consists of `@` plus the citation identifier, possibly
+enclosed in curly braces.  If the identifier starts
+with a letter, digit, or `_`, followed by zero or more
 alphanumerics, `_`, and internal punctuation characters
-(`:.#$%&-+?<>~/`).  Here are some examples:
+(`:.#$%&-+?<>~/`), then the curly braces may be omitted.
+Identifiers may not contain whitespace characters or unbalanced
+curly braces.  Here are some examples:
 
     Blah blah [see @doe99, pp. 33-35; also @smith04, chap. 1].
 
@@ -4962,6 +4965,8 @@ alphanumerics, `_`, and internal punctuation characters
 
     Blah blah [@smith04; @doe99].
 
+    Blah blah [@{https://example.com/bib?name=foobar&date=2000}, p. 33].
+
 `pandoc` detects locator terms in the [CSL locale files].
 Either abbreviated or unabbreviated forms are accepted. In the `en-US`
 locale, locator terms can be written in either singular or plural forms,
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index cbe9993c6..0bb794ba1 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -1605,19 +1605,27 @@ nested p = do
   return res
 
 citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st)
-        => ParserT s st m (Bool, Text)
-citeKey = try $ do
+        => Bool -- ^ If True, allow expanded @{..} syntax.
+        -> ParserT s st m (Bool, Text)
+citeKey allowBraced = try $ do
   guard =<< notAfterString
   suppress_author <- option False (True <$ char '-')
   char '@'
+  key <- simpleCiteIdentifier
+        <|> if allowBraced
+               then charsInBalanced '{' '}' (satisfy (not . isSpace))
+               else mzero
+  return (suppress_author, key)
+
+simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char)
+                      => ParserT s st m Text
+simpleCiteIdentifier = do
   firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite
   let regchar = satisfy (\c -> isAlphaNum c || c == '_')
   let internal p = try $ p <* lookAhead regchar
   rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|>
                  try (oneOf ":/" <* lookAhead (char '/'))
-  let key = firstChar:rest
-  return (suppress_author, T.pack key)
-
+  return $ T.pack $ firstChar:rest
 
 token :: (Stream s m t)
       => (t -> Text)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 2d20ff018..34f16ab4e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2094,7 +2094,7 @@ cite = do
 
 textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
 textualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey True
   -- If this is a reference to an earlier example list item,
   -- then don't parse it as a citation.  If the example list
   -- item comes later, we'll parse it here and figure out in
@@ -2174,7 +2174,7 @@ prefix = trimInlinesF . mconcat <$>
   manyTill inline (char ']'
    <|> lookAhead
          (try $ do optional (try (char ';' >> spnl))
-                   citeKey
+                   citeKey True
                    return ']'))
 
 citeList :: PandocMonad m => MarkdownParser m (F [Citation])
@@ -2183,7 +2183,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
 citation :: PandocMonad m => MarkdownParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey True
   suff <- suffix
   noteNum <- stateNoteNumber <$> getState
   return $ do
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 054f2611a..6862dd71e 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -263,7 +263,7 @@ berkeleyCitationList = try $ do
  where
    citationListPart :: PandocMonad m => OrgParser m (F Inlines)
    citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
-     notFollowedBy' citeKey
+     notFollowedBy' $ citeKey False
      notFollowedBy (oneOf ";]")
      inline
 
@@ -278,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite")
 
 berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
 berkeleyTextualCite = try $ do
-  (suppressAuthor, key) <- citeKey
+  (suppressAuthor, key) <- citeKey False
   returnF . return $ Citation
     { citationId      = key
     , citationPrefix  = mempty
@@ -351,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
 citation :: PandocMonad m => OrgParser m (F Citation)
 citation = try $ do
   pref <- prefix
-  (suppress_author, key) <- citeKey
+  (suppress_author, key) <- citeKey False
   suff <- suffix
   return $ do
     x <- pref
@@ -368,7 +368,7 @@ citation = try $ do
       }
  where
    prefix = trimInlinesF . mconcat <$>
-            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+            manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
    suffix = try $ do
      hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
      skipSpaces
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index e35e1a0b9..2062050e4 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -491,11 +491,16 @@ inlineToMarkdown opts (Cite (c:cs) lst)
            rest <- mapM convertOne cs
            let inbr = suffs <+> joincits rest
                br   = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
-           return $ literal ("@" <> citationId c) <+> br
+           return $ literal ("@" <> maybeInBraces (citationId c)) <+> br
          else do
            cits <- mapM convertOne (c:cs)
            return $ literal "[" <> joincits cits <> literal "]"
   where
+        maybeInBraces key =
+          case readWith (citeKey False >> spaces >> eof)
+                 defaultParserState ("@" <> key) of
+            Left _  -> "{" <> key <> "}"
+            Right _ -> key
         joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty)
         convertOne Citation { citationId      = k
                             , citationPrefix  = pinlines
@@ -504,7 +509,7 @@ inlineToMarkdown opts (Cite (c:cs) lst)
                                = do
            pdoc <- inlineListToMarkdown opts pinlines
            sdoc <- inlineListToMarkdown opts sinlines
-           let k' = literal (modekey m <> "@" <> k)
+           let k' = literal (modekey m <> "@" <> maybeInBraces k)
                r = case sinlines of
                         Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc
                         _                                         -> k' <+> sdoc
diff --git a/test/command/6026.md b/test/command/6026.md
new file mode 100644
index 000000000..5e18a5f42
--- /dev/null
+++ b/test/command/6026.md
@@ -0,0 +1,19 @@
+```
+% pandoc -t native
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+^D
+[Para [Cite [Citation {citationId = "https://openreview.net/forum?id=HkwoSDPgg", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "@https://openreview.net/forum?id=HkwoSDPgg"]]
+,Para [Cite [Citation {citationId = "https://openreview.net/forum?id", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "@https://openreview.net/forum?id"],Str "=HkwoSDPgg"]]
+```
+```
+% pandoc -t markdown
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+^D
+@{https://openreview.net/forum?id=HkwoSDPgg}
+
+@https://openreview.net/forum?id=HkwoSDPgg
+```
-- 
cgit v1.2.3


From 875f8f36545d1c21fa3d29c52c40517a667f2574 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 11:58:58 +0200
Subject: HTML reader: don't fail on unmatched closing "script" tag.

Prevent the reader from crashing if the HTML input contains an unmatched
closing `</script>` tag.

Fixes: #7282
---
 src/Text/Pandoc/Readers/HTML.hs | 16 +++++++++-------
 test/command/7282.md            |  7 +++++++
 2 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 test/command/7282.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f5c8a2277..0a9d67e35 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -942,13 +942,15 @@ getTagName (TagClose t)  = Just t
 getTagName _             = Nothing
 
 isInlineTag :: Tag Text -> Bool
-isInlineTag t =
-  isCommentTag t ||
-  case getTagName t of
-    Nothing  -> False
-    Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
-    Just x   -> x `Set.notMember` blockTags ||
-                T.take 1 x == "?" -- processing instr.
+isInlineTag t = isCommentTag t || case t of
+  TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t
+  TagClose "script"  -> True
+  TagOpen name _     -> isInlineTagName name
+  TagClose name      -> isInlineTagName name
+  _                  -> False
+ where isInlineTagName x =
+         x `Set.notMember` blockTags ||
+         T.take 1 x == "?" -- processing instr.
 
 isBlockTag :: Tag Text -> Bool
 isBlockTag t = isBlockTagName || isTagComment t
diff --git a/test/command/7282.md b/test/command/7282.md
new file mode 100644
index 000000000..4ed6b5b3e
--- /dev/null
+++ b/test/command/7282.md
@@ -0,0 +1,7 @@
+Don't crash on unmatched closing tag.
+```
+% pandoc -f html -t native
+</script>
+^D
+[]
+```
-- 
cgit v1.2.3


From 17d96404f5b6f5f080329e220a0784f10c364f2d Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 16:19:20 +0200
Subject: Docx writer: allow multirow table headers

---
 src/Text/Pandoc/Writers/Docx/Table.hs      |  42 +++++++++++++++++------------
 test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10238 bytes
 test/docx/golden/tables.docx               | Bin 10266 -> 10279 bytes
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index bb931bf08..ccc325fbe 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -42,11 +42,12 @@ tableToOpenXML blocksToOpenXML gridTable = do
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
                      $ blocksToOpenXML captionBlocks
-  head' <- cellGridToOpenXML blocksToOpenXML thead
-  bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies
-  foot' <- cellGridToOpenXML blocksToOpenXML tfoot
+  head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot
 
   let hasHeader = not . null . indices . partRowAttrs $ thead
+  let hasFooter = not . null . indices . partRowAttrs $ tfoot
   -- for compatibility with Word <= 2007, we include a val with a bitmask
   -- 0×0020  Apply first row conditional formatting
   -- 0×0040  Apply last row conditional formatting
@@ -61,7 +62,7 @@ tableToOpenXML blocksToOpenXML gridTable = do
           ( mknode "w:tblStyle" [("w:val","Table")] () :
             mknode "w:tblW" tblWattr () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
-                               ,("w:lastRow","0")
+                               ,("w:lastRow",if hasFooter then "1" else "0")
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
@@ -77,6 +78,9 @@ tableToOpenXML blocksToOpenXML gridTable = do
   modify $ \s -> s { stInTable = False }
   return $ captionXml ++ [Elem tbl]
 
+-- | Parts of a table
+data RowType = HeadRow | BodyRow | FootRow
+
 alignmentToString :: Alignment -> Text
 alignmentToString = \case
   AlignLeft    -> "left"
@@ -104,22 +108,23 @@ tableLayout specs =
           , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
 
 cellGridToOpenXML :: PandocMonad m
-                   => ([Block] -> WS m [Content])
-                   -> Part
-                   -> WS m [Element]
-cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) =
+                  => ([Block] -> WS m [Content])
+                  -> RowType
+                  -> Part
+                  -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) =
   if null (indices rowAttrs)
   then return mempty
-  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part
+  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part
 
 data OOXMLCell
   = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
   | OOXMLCellMerge ColSpan
 
-data OOXMLRow = OOXMLRow Attr [OOXMLCell]
+data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell]
 
-partToRows :: Part -> [OOXMLRow]
-partToRows part =
+partToRows :: RowType -> Part -> [OOXMLRow]
+partToRows rowType part =
   let
     toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
     toOOXMLCell ridx cidx = \case
@@ -132,7 +137,7 @@ partToRows part =
                        " at index " ++ show idx'
       _ -> mempty
     mkRow :: (RowIndex, Attr) -> OOXMLRow
-    mkRow (ridx, attr) = OOXMLRow attr
+    mkRow (ridx, attr) = OOXMLRow rowType attr
                        . concatMap (uncurry $ toOOXMLCell ridx)
                        . assocs
                        . rowArray ridx
@@ -143,12 +148,17 @@ rowToOpenXML :: PandocMonad m
              => ([Block] -> WS m [Content])
              -> OOXMLRow
              -> WS m Element
-rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do
+rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do
   xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+  let addTrPr = case rowType of
+        HeadRow -> (mknode "w:trPr" []
+                    [mknode "w:tblHeader" [("w:val", "true")] ()] :)
+        BodyRow -> id
+        FootRow -> id
   -- let align' = case align of
   --       AlignDefault -> colAlign
   --       _            -> align
-  return $ mknode "w:tr" [] xmlcells
+  return $ mknode "w:tr" [] (addTrPr xmlcells)
 
 ooxmlCellToOpenXML :: PandocMonad m
                    => ([Block] -> WS m [Content])
@@ -161,8 +171,6 @@ ooxmlCellToOpenXML blocksToOpenXML = \case
                            , mknode "w:vMerge"   [("w:val", "continue")] () ]
       , mknode "w:p" [] [mknode "w:pPr" [] ()]]
   OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
-    -- we handle rowspans via 'leftpad', so we can ignore those here
-
     compactStyle <- pStyleM "Compact"
     es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
     -- Table cells require a <w:p> element, even an empty one!
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 6427d475c..e51910770 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 470eac2ae..8029774a9 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 3ec5726c9b7b9f7cc41ca5f26a80c347e20afc9a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 16:49:19 +0200
Subject: Docx writer: fix alignment for cells.

This fixes a regression introduced with the in the colspan/rowspan
changes that caused column alignments to be ignored. The column
alignment is used only if a default alignment is specified at the cell
level; otherwise the cell-level alignment takes precedence.
---
 src/Text/Pandoc/Writers/Docx/Table.hs | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index ccc325fbe..e7fc82a10 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -38,13 +38,14 @@ tableToOpenXML blocksToOpenXML gridTable = do
         gridTable
   let (Caption _maybeShortCaption captionBlocks) = caption
   let captionStr = stringify captionBlocks
+  let aligns = map fst $ elems colspecs
   captionXml <- if null captionBlocks
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
                      $ blocksToOpenXML captionBlocks
-  head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead
-  bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies
-  foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot
+  head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot
 
   let hasHeader = not . null . indices . partRowAttrs $ thead
   let hasFooter = not . null . indices . partRowAttrs $ tfoot
@@ -110,12 +111,14 @@ tableLayout specs =
 cellGridToOpenXML :: PandocMonad m
                   => ([Block] -> WS m [Content])
                   -> RowType
+                  -> [Alignment]
                   -> Part
                   -> WS m [Element]
-cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) =
+cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ _ rowAttrs) =
   if null (indices rowAttrs)
   then return mempty
-  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part
+  else mapM (rowToOpenXML blocksToOpenXML) $
+       partToRows rowType aligns part
 
 data OOXMLCell
   = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
@@ -123,13 +126,17 @@ data OOXMLCell
 
 data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell]
 
-partToRows :: RowType -> Part -> [OOXMLRow]
-partToRows rowType part =
+partToRows :: RowType -> [Alignment] -> Part -> [OOXMLRow]
+partToRows rowType aligns part =
   let
-    toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
-    toOOXMLCell ridx cidx = \case
+    toOOXMLCell :: Alignment -> RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
+    toOOXMLCell columnAlign ridx cidx = \case
       ContentCell attr align rowspan colspan blocks ->
-        [OOXMLCell attr align rowspan colspan blocks]
+        -- Respect non-default, cell specific alignment.
+        let align' = case align of
+              AlignDefault -> columnAlign
+              _            -> align
+        in [OOXMLCell attr align' rowspan colspan blocks]
       ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' ->
         case (partCellArray part)!idx' of
           (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan]
@@ -138,7 +145,9 @@ partToRows rowType part =
       _ -> mempty
     mkRow :: (RowIndex, Attr) -> OOXMLRow
     mkRow (ridx, attr) = OOXMLRow rowType attr
-                       . concatMap (uncurry $ toOOXMLCell ridx)
+                       . mconcat
+                       . zipWith (\align -> uncurry $ toOOXMLCell align ridx)
+                                 aligns
                        . assocs
                        . rowArray ridx
                        $ partCellArray part
@@ -155,9 +164,6 @@ rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do
                     [mknode "w:tblHeader" [("w:val", "true")] ()] :)
         BodyRow -> id
         FootRow -> id
-  -- let align' = case align of
-  --       AlignDefault -> colAlign
-  --       _            -> align
   return $ mknode "w:tr" [] (addTrPr xmlcells)
 
 ooxmlCellToOpenXML :: PandocMonad m
-- 
cgit v1.2.3


From 76a4e7127be1116b67ae531f56aadb05963813b5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 14 May 2021 10:09:02 -0700
Subject: Beamer writer: support exampleblock and alertblock.

A block will be rendered as an exampleblock if the heading
has class `example` and alertblock if it has class `alert`.

Closes #7278.
---
 MANUAL.txt                       |  6 +++++-
 src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++---
 test/command/7278.md             | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 test/command/7278.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 6f06d1e8a..cdfb0e332 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5666,7 +5666,11 @@ rules:
   * A heading at the slide level always starts a new slide.
 
   * Headings *below* the slide level in the hierarchy create
-    headings *within* a slide.
+    headings *within* a slide.  (In beamer, a "block" will be
+    created.  If the heading has the class `example`, an
+    `exampleblock` environment will be used; if it has the class
+    `alert`, an `alertblock` will be used; otherwise a regular
+    `block` will be used.)
 
   * Headings *above* the slide level in the hierarchy create
     "title slides," which just contain the section title
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 8b1f3df1d..d8722876e 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -290,7 +290,12 @@ blockToLaTeX :: PandocMonad m
              => Block     -- ^ Block to convert
              -> LW m (Doc Text)
 blockToLaTeX Null = return empty
-blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do
+blockToLaTeX (Div attr@(identifier,"block":dclasses,_)
+             (Header _ _ ils : bs)) = do
+  let blockname
+        | "example" `elem` dclasses = "exampleblock"
+        | "alert" `elem` dclasses = "alertblock"
+        | otherwise = "block"
   ref <- toLabel identifier
   let anchor = if T.null identifier
                   then empty
@@ -298,8 +303,8 @@ blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do
                        braces (literal ref) <> braces empty
   title' <- inlineListToLaTeX ils
   contents <- blockListToLaTeX bs
-  wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$
-                 contents $$ "\\end{block}"
+  wrapDiv attr $ ("\\begin" <> braces blockname <> braces title' <> anchor) $$
+                 contents $$ "\\end" <> braces blockname
 blockToLaTeX (Div (identifier,"slide":dclasses,dkvs)
                (Header _ (_,hclasses,hkvs) ils : bs)) = do
   -- note: [fragile] is required or verbatim breaks
diff --git a/test/command/7278.md b/test/command/7278.md
new file mode 100644
index 000000000..dcf71c29f
--- /dev/null
+++ b/test/command/7278.md
@@ -0,0 +1,38 @@
+```
+% pandoc -t beamer
+# Slide
+
+Some blocks:
+
+## example block title {.example}
+
+text in block
+
+## alert block title {.alert}
+
+text in block
+
+## block title
+
+text in block
+^D
+\begin{frame}{Slide}
+\protect\hypertarget{slide}{}
+Some blocks:
+
+\begin{exampleblock}{example block title}
+\protect\hypertarget{example-block-title}{}
+text in block
+\end{exampleblock}
+
+\begin{alertblock}{alert block title}
+\protect\hypertarget{alert-block-title}{}
+text in block
+\end{alertblock}
+
+\begin{block}{block title}
+\protect\hypertarget{block-title}{}
+text in block
+\end{block}
+\end{frame}
+```
-- 
cgit v1.2.3


From 013e4a3164115bf84f3e1964f21b7cd7f020b86a Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 21:31:24 +0200
Subject: HTML reader: keep h1 tags as normal headers (#7274)

The tags `<title>` and `<h1 class="title">` often contain the same
information, so the latter was dropped from the document. However, as
this can lead to loss of information, the heading is now always
retained.

Use `--shift-heading-level-by=-1` to turn the `<h1>` into the document
title, or a filter to restore the previous behavior.

Closes: #2293
---
 src/Text/Pandoc/Readers/HTML.hs | 6 +-----
 test/html-reader.native         | 3 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0a9d67e35..b305de7b5 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -499,17 +499,13 @@ pHeader = try $ do
                            tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
                            (const True)
   let attr = toStringAttr attr'
-  let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text)
-                                               [("class","title")]
   level <- headerLevel tagtype
   contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] T.words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
   attr'' <- registerHeader (ident, classes, keyvals) contents
-  return $ if bodyTitle
-              then mempty  -- skip a representation of the title in the body
-              else B.headerWith attr'' level contents
+  return $ B.headerWith attr'' level contents
 
 pHrule :: PandocMonad m => TagParser m Blocks
 pHrule = do
diff --git a/test/html-reader.native b/test/html-reader.native
index 04ec55d1e..880561a93 100644
--- a/test/html-reader.native
+++ b/test/html-reader.native
@@ -1,5 +1,6 @@
 Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
-[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
+[Header 1 ("pandoc-test-suite",["title"],[]) [Str "Pandoc",Space,Str "Test",Space,Str "Suite"]
+,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,HorizontalRule
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link ("",[],[]) [Str "embedded",Space,Str "link"] ("/url","")]
-- 
cgit v1.2.3


From 0794862aacdb5d28616dadc3d681a63c36251212 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 15 May 2021 16:36:13 +0200
Subject: HTML writer: parse `<header>` as a Div

HTML5 `<header>` elements are treated like `<div>` elements.
---
 src/Text/Pandoc/Readers/HTML.hs |  2 ++
 test/Tests/Readers/HTML.hs      | 14 +++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b305de7b5..0a9e4addf 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -205,6 +205,7 @@ block = ((do
           | otherwise
           -> pDiv
         "section" -> pDiv
+        "header" -> pDiv
         "main" -> pDiv
         "figure" -> pFigure
         "iframe" -> pIframe
@@ -404,6 +405,7 @@ pLineBlock = try $ do
 isDivLike :: Text -> Bool
 isDivLike "div"     = True
 isDivLike "section" = True
+isDivLike "header"  = True
 isDivLike "main"    = True
 isDivLike _         = False
 
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index f23af2cb1..7f5849991 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -99,11 +99,15 @@ tests = [ testGroup "base tag"
             plain (codeWith ("",["sample"],[]) "Answer is 42")
           ]
         , testGroup "var"
-        [
-          test html "inline var block" $
-          "<var>result</var>" =?>
-          plain (codeWith ("",["variable"],[]) "result")
-        ]
+          [ test html "inline var block" $
+            "<var>result</var>" =?>
+            plain (codeWith ("",["variable"],[]) "result")
+          ]
+        , testGroup "header"
+          [ test htmlNativeDivs "<header> is parsed as a div" $
+            "<header id=\"title\">Title</header>" =?>
+            divWith ("title", mempty, mempty) (plain "Title")
+          ]
         , askOption $ \(QuickCheckTests numtests) ->
             testProperty "Round trip" $
               withMaxSuccess (if QuickCheckTests numtests == defaultValue
-- 
cgit v1.2.3


From 0a4c6925b6db433bdb8b9d57c94a7c36be3daea7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 15 May 2021 15:40:49 -0700
Subject: Docx writer: copy over more settings from referenc.odcx.

From settings.xml in the reference-doc, we now include:
`zoom`, `embedSystemFonts`, `doNotTrackMoves`, `defaultTabStop`,
`drawingGridHorizontalSpacing`, `drawingGridVerticalSpacing`,
`displayHorizontalDrawingGridEvery`, `displayVerticalDrawingGridEvery`,
`characterSpacingControl`, `savePreviewPicture`, `mathPr`, `themeFontLang`,
`decimalSymbol`, `listSeparator`, `autoHyphenation`, `compat`.

Closes #7240.
---
 src/Text/Pandoc/Writers/Docx.hs                     |  17 ++++++++++++++++-
 test/docx/golden/block_quotes.docx                  | Bin 9981 -> 9981 bytes
 test/docx/golden/codeblock.docx                     | Bin 9830 -> 9830 bytes
 test/docx/golden/comments.docx                      | Bin 10168 -> 10168 bytes
 test/docx/golden/custom_style_no_reference.docx     | Bin 9931 -> 9931 bytes
 test/docx/golden/custom_style_preserve.docx         | Bin 10560 -> 10560 bytes
 test/docx/golden/custom_style_reference.docx        | Bin 12317 -> 12368 bytes
 test/docx/golden/definition_list.docx               | Bin 9830 -> 9830 bytes
 .../docx/golden/document-properties-short-desc.docx | Bin 9835 -> 9835 bytes
 test/docx/golden/document-properties.docx           | Bin 10314 -> 10314 bytes
 test/docx/golden/headers.docx                       | Bin 9969 -> 9969 bytes
 test/docx/golden/image.docx                         | Bin 26647 -> 26647 bytes
 test/docx/golden/inline_code.docx                   | Bin 9769 -> 9769 bytes
 test/docx/golden/inline_formatting.docx             | Bin 9949 -> 9949 bytes
 test/docx/golden/inline_images.docx                 | Bin 26703 -> 26703 bytes
 test/docx/golden/link_in_notes.docx                 | Bin 9991 -> 9991 bytes
 test/docx/golden/links.docx                         | Bin 10162 -> 10162 bytes
 test/docx/golden/lists.docx                         | Bin 10228 -> 10228 bytes
 test/docx/golden/lists_continuing.docx              | Bin 10024 -> 10024 bytes
 test/docx/golden/lists_multiple_initial.docx        | Bin 10106 -> 10106 bytes
 test/docx/golden/lists_restarting.docx              | Bin 10022 -> 10022 bytes
 test/docx/golden/nested_anchors_in_header.docx      | Bin 10126 -> 10126 bytes
 test/docx/golden/notes.docx                         | Bin 9938 -> 9938 bytes
 test/docx/golden/raw-blocks.docx                    | Bin 9870 -> 9870 bytes
 test/docx/golden/raw-bookmarks.docx                 | Bin 10004 -> 10004 bytes
 test/docx/golden/table_one_row.docx                 | Bin 9840 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx          | Bin 10159 -> 10159 bytes
 test/docx/golden/tables.docx                        | Bin 10200 -> 10200 bytes
 test/docx/golden/track_changes_deletion.docx        | Bin 9813 -> 9813 bytes
 test/docx/golden/track_changes_insertion.docx       | Bin 9796 -> 9796 bytes
 test/docx/golden/track_changes_move.docx            | Bin 9830 -> 9830 bytes
 .../golden/track_changes_scrubbed_metadata.docx     | Bin 9942 -> 9942 bytes
 test/docx/golden/unicode.docx                       | Bin 9755 -> 9755 bytes
 test/docx/golden/verbatim_subsuper.docx             | Bin 9802 -> 9802 bytes
 34 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 75bed1595..e7a49ba02 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -469,12 +469,27 @@ writeDocx opts doc = do
   -- adds references to footnotes or endnotes we don't have...
   -- we do, however, copy some settings over from reference
   let settingsPath = "word/settings.xml"
-      settingsList = [ "autoHyphenation"
+      settingsList = [ "zoom"
+                     , "embedSystemFonts"
+                     , "doNotTrackMoves"
+                     , "defaultTabStop"
+                     , "drawingGridHorizontalSpacing"
+                     , "drawingGridVerticalSpacing"
+                     , "displayHorizontalDrawingGridEvery"
+                     , "displayVerticalDrawingGridEvery"
+                     , "characterSpacingControl"
+                     , "savePreviewPicture"
+                     , "mathPr"
+                     , "themeFontLang"
+                     , "decimalSymbol"
+                     , "listSeparator"
+                     , "autoHyphenation"
                      , "consecutiveHyphenLimit"
                      , "hyphenationZone"
                      , "doNotHyphenateCap"
                      , "evenAndOddHeaders"
                      , "proofState"
+                     , "compat"
                      ]
   settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
 
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index f5e4b6428..d05020f82 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 0bbe42bd7..616c9b1d0 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index 6b759b522..aa200c8d6 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 74e30f651..49cf42f38 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 7ee99c87b..e24940478 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index dfaf16e2b..e7da8f06d 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 02992c6bd..df148cfed 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 9bfe56dca..2d9e96b15 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 2a37045f2..d8f091956 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index ca2c7a261..8c1b3e870 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index c4447d021..48b72e283 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 1824c0aae..048ac8f15 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 82565a5b1..cf1301c4b 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 0416bcc15..662e70556 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index b51ec5220..d800a5fb4 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index ba0d100c8..bffdbbaf8 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 9632c598b..2b201df28 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3269a0528..257053a78 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 716129170..0a3bf1016 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index af6edfe86..0aa69805f 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index f141425f7..88dd21abd 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 93b4222f6..f02d5951c 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 9d1aa9853..58a101b3f 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index b57289fdd..484c363a2 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index edb23cc72..f75e567ab 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index f9cbed156..a49f70643 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index b585c803d..f24e27516 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 313942750..de7c44bf4 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f18b3f85f..958533459 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 50bdab767..04fa05062 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 230b27006..ef2dc96f8 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 627276e4b..a9de2b367 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 790278108..3ebadc59d 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 39a69c4f93d46e059c48a740b69dfff219a5d715 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 16 May 2021 11:53:19 -0700
Subject: Markdown writer: improve escaping of `@`.

We need to escape literal `@` before `{` because of
the new citation syntax.
---
 src/Text/Pandoc/Writers/Markdown/Inline.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index 2062050e4..ced5fbacb 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -56,7 +56,7 @@ escapeText opts = T.pack . go . T.unpack
        '@' | isEnabled Ext_citations opts ->
                case cs of
                     (d:_)
-                      | isAlphaNum d || d == '_'
+                      | isAlphaNum d || d == '_' || d == '{'
                          -> '\\':'@':go cs
                     _ -> '@':go cs
        _ | c `elem` ['\\','`','*','_','[',']','#'] ->
-- 
cgit v1.2.3


From 5a6399d9f62c4306fa073ae1311675158dd6a203 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 16 May 2021 12:23:34 -0700
Subject: Markdown writer: fewer unneeded escapes for `#`.

See #6259.
---
 src/Text/Pandoc/Writers/Markdown/Inline.hs | 13 +++++++++++--
 test/command/3792.md                       |  2 +-
 test/command/4164.md                       |  2 +-
 test/command/7208.md                       |  2 +-
 test/command/biblatex-sigfridsson.md       |  2 +-
 test/writer.opml                           |  2 +-
 6 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index ced5fbacb..e6c6da5a9 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -43,7 +43,11 @@ import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
 -- | Escape special characters for Markdown.
 escapeText :: WriterOptions -> Text -> Text
 escapeText opts = T.pack . go . T.unpack
-  where
+ where
+  startsWithSpace (' ':_) = True
+  startsWithSpace ('\t':_) = True
+  startsWithSpace [] = True
+  startsWithSpace _ = False
   go [] = []
   go (c:cs) =
     case c of
@@ -59,7 +63,10 @@ escapeText opts = T.pack . go . T.unpack
                       | isAlphaNum d || d == '_' || d == '{'
                          -> '\\':'@':go cs
                     _ -> '@':go cs
-       _ | c `elem` ['\\','`','*','_','[',']','#'] ->
+       '#' | isEnabled Ext_space_in_atx_header opts
+           , startsWithSpace cs
+           -> '\\':'#':go cs
+       _ | c `elem` ['\\','`','*','_','[',']'] ->
               '\\':c:go cs
        '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs
        '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs
@@ -81,6 +88,8 @@ escapeText opts = T.pack . go . T.unpack
                   | isEnabled Ext_intraword_underscores opts
                   , isAlphaNum c
                   , isAlphaNum x -> c : '_' : x : go xs
+                '#':xs           -> c : '#' : go xs
+                '>':xs           -> c : '>' : go xs
                 _                -> c : go cs
 
 attrsToMarkdown :: Attr -> Doc Text
diff --git a/test/command/3792.md b/test/command/3792.md
index eb109b9cc..eff26d517 100644
--- a/test/command/3792.md
+++ b/test/command/3792.md
@@ -6,7 +6,7 @@ and properly escaped.
 ok
 ^D
 ---
-title: \<this\> \*that\*
+title: \<this> \*that\*
 ---
 
 ok
diff --git a/test/command/4164.md b/test/command/4164.md
index 4e7b7e285..68cbd0584 100644
--- a/test/command/4164.md
+++ b/test/command/4164.md
@@ -26,6 +26,6 @@ Here is inline html:
 
 Here is inline html:
 
-\<div\> \<balise\> bla bla \</div\>
+\<div> \<balise> bla bla \</div>
 
 ```
diff --git a/test/command/7208.md b/test/command/7208.md
index e65943ade..fe02ec32e 100644
--- a/test/command/7208.md
+++ b/test/command/7208.md
@@ -2,5 +2,5 @@
 % pandoc -t gfm
 \<hi\>
 ^D
-\<hi\>
+\<hi>
 ```
diff --git a/test/command/biblatex-sigfridsson.md b/test/command/biblatex-sigfridsson.md
index f83c35622..e042a8762 100644
--- a/test/command/biblatex-sigfridsson.md
+++ b/test/command/biblatex-sigfridsson.md
@@ -90,7 +90,7 @@ references:
   - family: Ryde
     given: Ulf
   container-title: Journal of Computational Chemistry
-  doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1\\>3.0.CO;2-P"
+  doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1>3.0.CO;2-P"
   id: sigfridsson
   issue: 4
   issued: 1998
diff --git a/test/writer.opml b/test/writer.opml
index 6bdcb882e..bfe1e5de6 100644
--- a/test/writer.opml
+++ b/test/writer.opml
@@ -52,7 +52,7 @@
 </outline>
 <outline text="LaTeX" _note="-   &#10;-   2 + 2 = 4&#10;-   *x* ∈ *y*&#10;-   *α* ∧ *ω*&#10;-   223&#10;-   *p*-Tree&#10;-   Here’s some display math:&#10;    $$\\frac{d}{dx}f(x)=\\lim\_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$&#10;-   Here’s one that has a line break in it: *α* + *ω* × *x*².&#10;&#10;These shouldn’t be math:&#10;&#10;-   To get the famous equation, write `$e = mc^2$`.&#10;-   $22,000 is a *lot* of money. So is $34,000. (It worked if “lot” is&#10;    emphasized.)&#10;-   Shoes ($20) and socks ($5).&#10;-   Escaped `$`: $73 *this should be emphasized* 23$.&#10;&#10;Here’s a LaTeX table:&#10;&#10;------------------------------------------------------------------------">
 </outline>
-<outline text="Special Characters" _note="Here is some unicode:&#10;&#10;-   I hat: Î&#10;-   o umlaut: ö&#10;-   section: §&#10;-   set membership: ∈&#10;-   copyright: ©&#10;&#10;AT&amp;T has an ampersand in their name.&#10;&#10;AT&amp;T is another way to write it.&#10;&#10;This &amp; that.&#10;&#10;4 &amp;lt; 5.&#10;&#10;6 &amp;gt; 5.&#10;&#10;Backslash: \\&#10;&#10;Backtick: \`&#10;&#10;Asterisk: \*&#10;&#10;Underscore: \_&#10;&#10;Left brace: {&#10;&#10;Right brace: }&#10;&#10;Left bracket: \[&#10;&#10;Right bracket: \]&#10;&#10;Left paren: (&#10;&#10;Right paren: )&#10;&#10;Greater-than: &amp;gt;&#10;&#10;Hash: \#&#10;&#10;Period: .&#10;&#10;Bang: !&#10;&#10;Plus: +&#10;&#10;Minus: -&#10;&#10;------------------------------------------------------------------------">
+<outline text="Special Characters" _note="Here is some unicode:&#10;&#10;-   I hat: Î&#10;-   o umlaut: ö&#10;-   section: §&#10;-   set membership: ∈&#10;-   copyright: ©&#10;&#10;AT&amp;T has an ampersand in their name.&#10;&#10;AT&amp;T is another way to write it.&#10;&#10;This &amp; that.&#10;&#10;4 &amp;lt; 5.&#10;&#10;6 &amp;gt; 5.&#10;&#10;Backslash: \\&#10;&#10;Backtick: \`&#10;&#10;Asterisk: \*&#10;&#10;Underscore: \_&#10;&#10;Left brace: {&#10;&#10;Right brace: }&#10;&#10;Left bracket: \[&#10;&#10;Right bracket: \]&#10;&#10;Left paren: (&#10;&#10;Right paren: )&#10;&#10;Greater-than: &amp;gt;&#10;&#10;Hash: #&#10;&#10;Period: .&#10;&#10;Bang: !&#10;&#10;Plus: +&#10;&#10;Minus: -&#10;&#10;------------------------------------------------------------------------">
 </outline>
 <outline text="Links">
   <outline text="Explicit" _note="Just a [URL](/url/).&#10;&#10;[URL and title](/url/ &quot;title&quot;).&#10;&#10;[URL and title](/url/ &quot;title preceded by two spaces&quot;).&#10;&#10;[URL and title](/url/ &quot;title preceded by a tab&quot;).&#10;&#10;[URL and title](/url/ &quot;title with &quot;quotes&quot; in it&quot;)&#10;&#10;[URL and title](/url/ &quot;title with single quotes&quot;)&#10;&#10;[with\_underscore](/url/with_underscore)&#10;&#10;[Email link](mailto:nobody@nowhere.net)&#10;&#10;[Empty]().">
-- 
cgit v1.2.3


From d92622ba3cae5ced69a256472d367a53fc5878a1 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 16 May 2021 21:33:32 +0200
Subject: LaTeX template: define commands for zero width non-joiner character

Closes: #6639

The zero-width non-joiner character is used to avoid ligatures (e.g. in
German).
---
 data/templates/default.latex           | 19 +++++++++++++++++++
 src/Text/Pandoc/Writers/LaTeX.hs       |  3 +--
 src/Text/Pandoc/Writers/LaTeX/Types.hs |  3 ++-
 src/Text/Pandoc/Writers/LaTeX/Util.hs  |  7 ++++---
 4 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'src')

diff --git a/data/templates/default.latex b/data/templates/default.latex
index 142fe3e55..04784b971 100644
--- a/data/templates/default.latex
+++ b/data/templates/default.latex
@@ -156,6 +156,25 @@ $if(CJKmainfont)$
   \fi
 $endif$
 \fi
+$if(zero-width-non-joiner)$
+%% Support for zero-width non-joiner characters.
+\makeatletter
+\def\zerowidthnonjoiner{%
+  % Prevent ligatures and adjust kerning, but still support hyphenating.
+  \texorpdfstring{%
+    \textormath{\nobreak\discretionary{-}{}{\kern.03em}%
+      \ifvmode\else\nobreak\hskip\z@skip\fi}{}%
+  }{}%
+}
+\makeatother
+\ifPDFTeX
+  \DeclareUnicodeCharacter{200C}{\zerowidthnonjoiner}
+\else
+  \catcode`^^^^200c=\active
+  \protected\def ^^^^200c{\zerowidthnonjoiner}
+\fi
+%% End of ZWNJ support
+$endif$
 $if(beamer)$
 $if(theme)$
 \usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$}
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d8722876e..bf57937bd 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -173,6 +173,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                   defField "has-chapters" (stHasChapters st) $
                   defField "has-frontmatter" (documentClass `elem` frontmatterClasses) $
                   defField "listings" (writerListings options || stLHS st) $
+                  defField "zero-width-non-joiner" (stZwnj st) $
                   defField "beamer" beamer $
                   (if stHighlighting st
                       then case writerHighlightStyle options of
@@ -1048,5 +1049,3 @@ extractInline _ _               = []
 -- Look up a key in an attribute and give a list of its values
 lookKey :: Text -> Attr -> [Text]
 lookKey key (_,_,kvs) =  maybe [] T.words $ lookup key kvs
-
-
diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs
index d598794ad..c06b7e923 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Types.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs
@@ -40,7 +40,7 @@ data WriterState =
   , stCsquotes      :: Bool          -- ^ true if document uses csquotes
   , stHighlighting  :: Bool          -- ^ true if document has highlighted code
   , stIncremental   :: Bool          -- ^ true if beamer lists should be
-                                     --   displayed bit by bit
+  , stZwnj          :: Bool          -- ^ true if document has a ZWNJ character
   , stInternalLinks :: [Text]        -- ^ list of internal link targets
   , stBeamer        :: Bool          -- ^ produce beamer
   , stEmptyLine     :: Bool          -- ^ true if no content on line
@@ -74,6 +74,7 @@ startingState options =
   , stCsquotes = False
   , stHighlighting = False
   , stIncremental = writerIncremental options
+  , stZwnj = False
   , stInternalLinks = []
   , stBeamer = False
   , stEmptyLine = True
diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs
index 56bb792ae..c34338121 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Util.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs
@@ -22,6 +22,7 @@ module Text.Pandoc.Writers.LaTeX.Util (
 where
 
 import Control.Applicative ((<|>))
+import Control.Monad (when)
 import Text.Pandoc.Class (PandocMonad, toLang)
 import Text.Pandoc.Options (WriterOptions(..), isEnabled)
 import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..))
@@ -30,7 +31,7 @@ import Text.Pandoc.Highlighting (toListingsLanguage)
 import Text.DocLayout
 import Text.Pandoc.Definition
 import Text.Pandoc.ImageSize (showFl)
-import Control.Monad.State.Strict (gets)
+import Control.Monad.State.Strict (gets, modify)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Pandoc.Extensions (Extension(Ext_smart))
@@ -49,6 +50,8 @@ data StringContext = TextString
 stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text
 stringToLaTeX context zs = do
   opts <- gets stOptions
+  when ('\x200c' `elemText` zs) $
+    modify (\s -> s { stZwnj = True })
   return $ T.pack $
     foldr (go opts context) mempty $ T.unpack $
     if writerPreferAscii opts
@@ -270,5 +273,3 @@ mbBraced :: Text -> Text
 mbBraced x = if not (T.all isAlphaNum x)
                 then "{" <> x <> "}"
                 else x
-
-
-- 
cgit v1.2.3


From 4417dacc440e269c5a4db1d67b52da9e0fe05561 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 17 May 2021 13:00:38 +0200
Subject: ConTeXt writer: use span identifiers as reference anchors.

Closes: #7246
---
 src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++++--
 test/Tests/Writers/ConTeXt.hs      | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index b694437d8..57d752a67 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -487,7 +487,7 @@ inlineToConTeXt (Note contents) = do
               then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}'
               else literal "\\startbuffer " <> nest 2 (chomp contents') <>
                    literal "\\stopbuffer\\footnote{\\getbuffer}"
-inlineToConTeXt (Span (_,_,kvs) ils) = do
+inlineToConTeXt (Span (ident,_,kvs) ils) = do
   mblang <- fromBCP47 (lookup "lang" kvs)
   let wrapDir txt = case lookup "dir" kvs of
                       Just "rtl" -> braces $ "\\righttoleft " <> txt
@@ -497,7 +497,11 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do
                        Just lng -> braces ("\\language" <>
                                            brackets (literal lng) <> txt)
                        Nothing -> txt
-  wrapLang . wrapDir <$> inlineListToConTeXt ils
+      addReference =
+        if T.null ident
+        then id
+        else (("\\reference" <> brackets (literal ident) <> "{}") <>)
+  addReference . wrapLang . wrapDir <$> inlineListToConTeXt ils
 
 -- | Craft the section header, inserting the section reference, if supplied.
 sectionHeader :: PandocMonad m
diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs
index a4aa69dcd..fbbf9b948 100644
--- a/test/Tests/Writers/ConTeXt.hs
+++ b/test/Tests/Writers/ConTeXt.hs
@@ -43,6 +43,9 @@ tests =
   [ testGroup "inline code"
     [ "with '}'" =: code "}" =?> "\\mono{\\}}"
     , "without '}'" =: code "]" =?> "\\type{]}"
+    , "span with ID" =:
+      spanWith ("city", [], []) "Berlin" =?>
+      "\\reference[city]{}Berlin"
     , testProperty "code property" $ \s -> null s || '\n' `elem` s ||
             if '{' `elem` s || '}' `elem` s
                then context' (code $ pack s) == "\\mono{" ++
-- 
cgit v1.2.3


From 25f5b927773eb730c2d5ef834bd61e1d2d5f09df Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 17 May 2021 15:37:25 +0200
Subject: HTML writer: ensure headings only have valid attribs in HTML4

Fixes: #5944
---
 src/Text/Pandoc/Writers/HTML.hs |  16 +++++-
 test/Tests/Writers/HTML.hs      | 109 +++++++++++++++++++++-------------------
 2 files changed, 71 insertions(+), 54 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 332de1545..f7a387927 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -882,7 +882,7 @@ blockToHtml opts (BlockQuote blocks) = do
      else do
        contents <- blockListToHtml opts blocks
        return $ H.blockquote $ nl opts >> contents >> nl opts
-blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do
+blockToHtml opts (Header level (ident,classes,kvs) lst) = do
   contents <- inlineListToHtml opts lst
   let secnum = fromMaybe mempty $ lookup "number" kvs
   let contents' = if writerNumberSections opts && not (T.null secnum)
@@ -890,7 +890,13 @@ blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do
                      then (H.span ! A.class_ "header-section-number"
                              $ toHtml secnum) >> strToHtml " " >> contents
                      else contents
-  addAttrs opts attr
+  html5 <- gets stHtml5
+  let kvs' = if html5
+             then kvs
+             else [ (k, v) | (k, v) <- kvs
+                           , k `elem` (["lang", "dir", "title", "style"
+                                      , "align"] ++ intrinsicEventsHTML4)]
+  addAttrs opts (ident,classes,kvs')
          $ case level of
               1 -> H.h1 contents'
               2 -> H.h2 contents'
@@ -1526,6 +1532,12 @@ allowsMathEnvironments MathML      = True
 allowsMathEnvironments (WebTeX _)  = True
 allowsMathEnvironments _           = False
 
+-- | List of intrinsic event attributes allowed on all elements in HTML4.
+intrinsicEventsHTML4 :: [Text]
+intrinsicEventsHTML4 =
+  [ "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover"
+  , "onmouseout", "onmouseout", "onkeypress", "onkeydown", "onkeyup"]
+
 isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool
 isRawHtml f = do
   html5 <- gets stHtml5
diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs
index 328801e31..404f6da98 100644
--- a/test/Tests/Writers/HTML.hs
+++ b/test/Tests/Writers/HTML.hs
@@ -34,55 +34,60 @@ infix 4 =:
 (=:) = test html
 
 tests :: [TestTree]
-tests = [ testGroup "inline code"
-          [ "basic" =: code "@&" =?> "<code>@&amp;</code>"
-          , "haskell" =: codeWith ("",["haskell"],[]) ">>="
-            =?> "<code class=\"sourceCode haskell\"><span class=\"op\">&gt;&gt;=</span></code>"
-          , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>="
-            =?> "<code class=\"nolanguage\">&gt;&gt;=</code>"
-          ]
-        , testGroup "images"
-          [ "alt with formatting" =:
-            image "/url" "title" ("my " <> emph "image")
-            =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />"
-          ]
-        , testGroup "blocks"
-          [ "definition list with empty <dt>" =:
-            definitionList [(mempty, [para $ text "foo bar"])]
-            =?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>"
-          ]
-        , testGroup "quotes"
-          [ "quote with cite attribute (without q-tags)" =:
-            doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
-            =?> "“<span cite=\"http://example.org\">examples</span>”"
-          , tQ "quote with cite attribute (with q-tags)" $
-            doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
-            =?> "<q cite=\"http://example.org\">examples</q>"
-          ]
-        , testGroup "sample"
-          [ "sample should be rendered correctly" =:
-            plain (codeWith ("",["sample"],[]) "Answer is 42") =?>
-            "<samp>Answer is 42</samp>"
-          ]
-        , testGroup "variable"
-          [ "variable should be rendered correctly" =:
-            plain (codeWith ("",["variable"],[]) "result") =?>
-            "<var>result</var>"
-          ]
-        , testGroup "sample with style"
-          [ "samp should wrap highlighted code" =:
-            codeWith ("",["sample","haskell"],[]) ">>="
-            =?> ("<samp><code class=\"sourceCode haskell\">" ++
-                "<span class=\"op\">&gt;&gt;=</span></code></samp>")
-          ]
-        , testGroup "variable with style"
-          [ "var should wrap highlighted code" =:
-            codeWith ("",["haskell","variable"],[]) ">>="
-            =?> ("<var><code class=\"sourceCode haskell\">" ++
-                "<span class=\"op\">&gt;&gt;=</span></code></var>")
-          ]
-        ]
-        where
-          tQ :: (ToString a, ToPandoc a)
-               => String -> (a, String) -> TestTree
-          tQ = test htmlQTags
+tests =
+  [ testGroup "inline code"
+    [ "basic" =: code "@&" =?> "<code>@&amp;</code>"
+    , "haskell" =: codeWith ("",["haskell"],[]) ">>="
+      =?> "<code class=\"sourceCode haskell\"><span class=\"op\">&gt;&gt;=</span></code>"
+    , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>="
+      =?> "<code class=\"nolanguage\">&gt;&gt;=</code>"
+    ]
+  , testGroup "images"
+    [ "alt with formatting" =:
+      image "/url" "title" ("my " <> emph "image")
+      =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />"
+    ]
+  , testGroup "blocks"
+    [ "definition list with empty <dt>" =:
+      definitionList [(mempty, [para $ text "foo bar"])]
+      =?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>"
+    , "heading with disallowed attributes" =:
+      headerWith ("", [], [("invalid","1"), ("lang", "en")]) 1 "test"
+      =?>
+      "<h1 lang=\"en\">test</h1>"
+    ]
+  , testGroup "quotes"
+    [ "quote with cite attribute (without q-tags)" =:
+      doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
+      =?> "“<span cite=\"http://example.org\">examples</span>”"
+    , tQ "quote with cite attribute (with q-tags)" $
+      doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples"))
+      =?> "<q cite=\"http://example.org\">examples</q>"
+    ]
+  , testGroup "sample"
+    [ "sample should be rendered correctly" =:
+      plain (codeWith ("",["sample"],[]) "Answer is 42") =?>
+      "<samp>Answer is 42</samp>"
+    ]
+  , testGroup "variable"
+    [ "variable should be rendered correctly" =:
+      plain (codeWith ("",["variable"],[]) "result") =?>
+      "<var>result</var>"
+    ]
+  , testGroup "sample with style"
+    [ "samp should wrap highlighted code" =:
+      codeWith ("",["sample","haskell"],[]) ">>="
+      =?> ("<samp><code class=\"sourceCode haskell\">" ++
+          "<span class=\"op\">&gt;&gt;=</span></code></samp>")
+    ]
+  , testGroup "variable with style"
+    [ "var should wrap highlighted code" =:
+      codeWith ("",["haskell","variable"],[]) ">>="
+      =?> ("<var><code class=\"sourceCode haskell\">" ++
+          "<span class=\"op\">&gt;&gt;=</span></code></var>")
+    ]
+  ]
+  where
+    tQ :: (ToString a, ToPandoc a)
+         => String -> (a, String) -> TestTree
+    tQ = test htmlQTags
-- 
cgit v1.2.3


From 1843a8793a9043a45c8c427b06f100461889d7ef Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 17 May 2021 18:08:02 +0200
Subject: HTML writer: keep attributes from code nested below pre tag.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a code block is defined with `<pre><code
class="language-x">…</code></pre>`, where the `<pre>` element has no
attributes, then the attributes from the `<code>` element are used
instead. Any leading `language-` prefix is dropped in the code's *class*
attribute are dropped to improve syntax highlighting.

Closes: #7221
---
 src/Text/Pandoc/Readers/HTML.hs | 13 ++++++++++++-
 test/Tests/Readers/HTML.hs      | 11 +++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 0a9e4addf..fc4575f2d 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -557,7 +557,18 @@ pFigure = try $ do
 pCodeBlock :: PandocMonad m => TagParser m Blocks
 pCodeBlock = try $ do
   TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
-  let attr = toAttr attr'
+  -- if the `pre` has no attributes, try if it is followed by a `code`
+  -- element and use those attributes if possible.
+  attr <- case attr' of
+    _:_ -> pure (toAttr attr')
+    []  -> option nullAttr $ do
+      TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" [])
+      pure $ toAttr
+        [ (k, v') | (k, v) <- codeAttr
+                    -- strip language from class
+                  , let v' = if k == "class"
+                             then fromMaybe v (T.stripPrefix "language-" v)
+                             else v ]
   contents <- manyTill pAny (pCloses "pre" <|> eof)
   let rawText = T.concat $ map tagToText contents
   -- drop leading newline if any
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index 7f5849991..9bf567194 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -108,6 +108,17 @@ tests = [ testGroup "base tag"
             "<header id=\"title\">Title</header>" =?>
             divWith ("title", mempty, mempty) (plain "Title")
           ]
+        , testGroup "code block"
+          [ test html "attributes in pre > code element" $
+            "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>"
+            =?>
+            codeBlockWith ("a", ["python"], []) "print('hi')"
+
+          , test html "attributes in pre take precendence" $
+            "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>"
+            =?>
+            codeBlockWith ("c", [], []) "print('hi mom!')"
+          ]
         , askOption $ \(QuickCheckTests numtests) ->
             testProperty "Round trip" $
               withMaxSuccess (if QuickCheckTests numtests == defaultValue
-- 
cgit v1.2.3


From 56fb4dae1ba2f6c28f561964249b89385d482f53 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 17 May 2021 20:42:43 -0700
Subject: Citeproc: ensure that CSL-related attributes are passed on...

...to a Div with id 'refs'.  Previously we just left the
attributes of such a Div alone, which meant that style
options like entry-spacing had no effect there.
---
 src/Text/Pandoc/Citeproc.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 2f4936fa6..de63aed1f 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -499,7 +499,7 @@ insertRefs refkvs refclasses meta refs bs =
      put True
      -- refHeader isn't used if you have an explicit references div
      let cs' = ordNub $ cs ++ refclasses
-     return $ Div ("refs",cs',kvs) (xs ++ refs)
+     return $ Div ("refs",cs' ++ refclasses,kvs ++ refkvs) (xs ++ refs)
    go x = return x
 
 refTitle :: Meta -> Maybe [Inline]
-- 
cgit v1.2.3


From eb3dff148e67e84362632e63848d40ba808940f4 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 18 May 2021 22:55:47 +0200
Subject: LaTeX writer: separate successive quote chars with thin space

Successive quote characters are separated with a thin space to improve
readability and to prevent unwanted ligatures. Detection of these quotes
sometimes had failed if the second quote was nested in a span element.

Closes: #6958
---
 src/Text/Pandoc/Writers/LaTeX.hs |  5 +++--
 test/command/6958.md             | 10 ++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 test/command/6958.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index bf57937bd..978f94ea0 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -894,8 +894,9 @@ inlineToLaTeX (Quoted qt lst) = do
                       then char '`' <> inner <> char '\''
                       else char '\x2018' <> inner <> char '\x2019'
     where
-      isQuoted (Quoted _ _) = True
-      isQuoted _            = False
+      isQuoted (Span _ (x:_)) = isQuoted x
+      isQuoted (Quoted _ _)   = True
+      isQuoted _              = False
 inlineToLaTeX (Str str) = do
   setEmptyLine False
   liftM literal $ stringToLaTeX TextString str
diff --git a/test/command/6958.md b/test/command/6958.md
new file mode 100644
index 000000000..230371d7d
--- /dev/null
+++ b/test/command/6958.md
@@ -0,0 +1,10 @@
+Add thin space between single and double quotes.
+```
+% pandoc -t latex+smart
+---
+lang: en-GB
+---
+'["On the Outside"]{}: Constructing Cycling Citizenship.'
+^D
+`\,{``On the Outside''}: Constructing Cycling Citizenship.'
+```
-- 
cgit v1.2.3


From ddbd984a0d8ea7e75f78ad6632fe3568e2390deb Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 17 May 2021 09:31:52 -0700
Subject: Text.Pandoc.MediaBag: change type to use a Text key...

instead of `[FilePath]`.
We normalize the path and use `/` separators for consistency.
---
 src/Text/Pandoc/Class/PandocMonad.hs |  1 +
 src/Text/Pandoc/MediaBag.hs          | 20 +++++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 7559cd7cd..226194503 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 3249bcdeb..4a9b4efa1 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP                        #-}
+{-# LANGUAGE OverloadedStrings          #-}
 {-# LANGUAGE DeriveDataTypeable         #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {- |
@@ -27,26 +28,31 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import Data.Typeable (Typeable)
 import System.FilePath
-import qualified System.FilePath.Posix as Posix
 import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | A container for a collection of binary resources, with names and
 -- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
 -- can be used for an empty 'MediaBag', and '<>' can be used to append
 -- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString))
+newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString))
         deriving (Semigroup, Monoid, Data, Typeable)
 
 instance Show MediaBag where
   show bag = "MediaBag " ++ show (mediaDirectory bag)
 
+-- | We represent paths with /, in normalized form.
+canonicalize :: FilePath -> Text
+canonicalize = T.replace "\\" "/" . T.pack . normalise
+
 -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
 -- to the given path.
 deleteMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 deleteMedia fp (MediaBag mediamap) =
-  MediaBag $ M.delete (splitDirectories fp) mediamap
+  MediaBag $ M.delete (canonicalize fp) mediamap
 
 -- | Insert a media item into a 'MediaBag', replacing any existing
 -- value with the same name.
@@ -56,7 +62,7 @@ insertMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 insertMedia fp mbMime contents (MediaBag mediamap) =
-  MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap)
+  MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap)
   where mime = fromMaybe fallback mbMime
         fallback = case takeExtension fp of
                         ".gz" -> getMimeTypeDef $ dropExtension fp
@@ -66,16 +72,16 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
 lookupMedia :: FilePath
             -> MediaBag
             -> Maybe (MimeType, BL.ByteString)
-lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap
+lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap
 
 -- | Get a list of the file paths stored in a 'MediaBag', with
 -- their corresponding mime types and the lengths in bytes of the contents.
 mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
 mediaDirectory (MediaBag mediamap) =
   M.foldrWithKey (\fp (mime,contents) ->
-      ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
+      ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap
 
 mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
 mediaItems (MediaBag mediamap) =
   M.foldrWithKey (\fp (mime,contents) ->
-      ((Posix.joinPath fp, mime, contents):)) [] mediamap
+      ((T.unpack fp, mime, contents):)) [] mediamap
-- 
cgit v1.2.3


From 9b5798bd9abe267f783dab7fe5295c9b61e2fdce Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 17 May 2021 21:22:46 -0700
Subject: Use fetchItem instead of downloadOrRead in fetchMediaResource.

---
 src/Text/Pandoc/Class/PandocMonad.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 226194503..b12850de5 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -635,7 +635,7 @@ withPaths (p:ps) action fp =
 fetchMediaResource :: PandocMonad m
               => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
 fetchMediaResource src = do
-  (bs, mt) <- downloadOrRead src
+  (bs, mt) <- fetchItem src
   let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
                       (mt >>= extensionFromMimeType)
   let bs' = BL.fromChunks [bs]
-- 
cgit v1.2.3


From 640dbf8b8f5e652661df42c631b4343570d7448e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 May 2021 09:51:50 -0700
Subject: Remove unused pragma.

---
 src/Text/Pandoc/Class/PandocMonad.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index b12850de5..dd6499a73 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE CPP #-}
-{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
-- 
cgit v1.2.3


From 5736b331d8ecaa12cc3e2712211ada37c665a93a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 19 May 2021 16:14:49 -0700
Subject: LaTeX reader: better support for `\xspace`.

Previously we only supported it in inline contexts; now
we support it in all contexts, including math.

Partially addresses #7299.
---
 src/Text/Pandoc/Readers/LaTeX.hs         | 12 ------------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 21 +++++++++++++++++++--
 test/command/4442.md                     |  2 +-
 test/command/7299.md                     | 23 +++++++++++++++++++++++
 4 files changed, 43 insertions(+), 15 deletions(-)
 create mode 100644 test/command/7299.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f90d562ae..2ace18d1b 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -232,16 +232,6 @@ mkImage options (T.unpack -> src) = do
                _  -> return src
    return $ imageWith attr (T.pack src') "" alt
 
-doxspace :: PandocMonad m => LP m Inlines
-doxspace =
-  (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty
-  where startsWithLetter (Tok _ Word t) =
-          case T.uncons t of
-               Just (c, _) | isLetter c -> True
-               _           -> False
-        startsWithLetter _ = False
-
-
 removeDoubleQuotes :: Text -> Text
 removeDoubleQuotes t =
   Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -417,8 +407,6 @@ inlineCommands = M.unions
     -- LaTeX colors
     , ("textcolor", coloredInline "color")
     , ("colorbox", coloredInline "background-color")
-    -- xspace
-    , ("xspace", doxspace)
     -- etoolbox
     , ("ifstrequal", ifstrequal)
     , ("newtoggle", braced >>= newToggle)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 35ce3509d..b6804a825 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -464,7 +464,7 @@ satisfyTok f = do
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
   st <- getState
-  unless (sVerbatimMode st || M.null (sMacros st)) $ do
+  unless (sVerbatimMode st) $
     getInput >>= doMacros' 1 >>= setInput
 
 doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok]
@@ -526,7 +526,7 @@ doMacros' n inp =
         $ throwError $ PandocMacroLoop name
       macros <- sMacros <$> getState
       case M.lookup name macros of
-           Nothing -> mzero
+           Nothing -> trySpecialMacro name ts
            Just (Macro expansionPoint argspecs optarg newtoks) -> do
              let getargs' = do
                    args <-
@@ -554,6 +554,23 @@ doMacros' n inp =
                    ExpandWhenUsed    -> doMacros' (n' + 1) result
                    ExpandWhenDefined -> return result
 
+-- | Certain macros do low-level tex manipulations that can't
+-- be represented in our Macro type, so we handle them here.
+trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok]
+trySpecialMacro "xspace" ts = do
+  ts' <- doMacros' 1 ts
+  case ts' of
+    Tok pos Word t : _
+      | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
+    _ -> return ts'
+trySpecialMacro _ _ = mzero
+
+startsWithAlphaNum :: Text -> Bool
+startsWithAlphaNum t =
+  case T.uncons t of
+       Just (c, _) | isAlphaNum c -> True
+       _           -> False
+
 setpos :: SourcePos -> Tok -> Tok
 setpos spos (Tok _ tt txt) = Tok spos tt txt
 
diff --git a/test/command/4442.md b/test/command/4442.md
index 8574fe759..447073406 100644
--- a/test/command/4442.md
+++ b/test/command/4442.md
@@ -5,5 +5,5 @@
 ^D
 \newcommand{\myFruit}{Mango\xspace}
 
-Mango\xspace is the king of fruits.
+Mango is the king of fruits.
 ```
diff --git a/test/command/7299.md b/test/command/7299.md
new file mode 100644
index 000000000..0847c40ce
--- /dev/null
+++ b/test/command/7299.md
@@ -0,0 +1,23 @@
+```
+% pandoc -f latex -t plain
+$1-{\ensuremath{r}\xspace}$
+^D
+1 − r
+```
+
+```
+% pandoc -f latex -t plain
+\newcommand{\foo}{Foo\xspace}
+
+$\text{\foo bar}$
+^D
+Foo bar
+```
+
+```
+% pandoc -f latex -t plain
+a\xspace b
+^D
+a b
+```
+
-- 
cgit v1.2.3


From b6239f41509c368b5befd316c290b5b6cc6f00e3 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 20 May 2021 10:48:28 +0200
Subject: ZimWiki writer: allow links and emphasis in headers

The latest version of ZimWiki supports this.

Closes: #6605
---
 src/Text/Pandoc/Writers/ZimWiki.hs | 2 +-
 test/writer.zimwiki                | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs
index fcf9e000d..df914f590 100644
--- a/src/Text/Pandoc/Writers/ZimWiki.hs
+++ b/src/Text/Pandoc/Writers/ZimWiki.hs
@@ -116,7 +116,7 @@ blockToZimWiki opts b@(RawBlock f str)
 blockToZimWiki _ HorizontalRule = return "\n----\n"
 
 blockToZimWiki opts (Header level _ inlines) = do
-  contents <- inlineListToZimWiki opts $ removeFormatting inlines   -- emphasis, links etc. not allowed in headers
+  contents <- inlineListToZimWiki opts inlines
   let eqs = T.replicate ( 7 - level ) "="
   return $ eqs <> " " <> contents <> " " <> eqs <> "\n"
 
diff --git a/test/writer.zimwiki b/test/writer.zimwiki
index 4b384fb20..f793e5760 100644
--- a/test/writer.zimwiki
+++ b/test/writer.zimwiki
@@ -8,9 +8,9 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s
 
 ====== Headers ======
 
-===== Level 2 with an embedded link =====
+===== Level 2 with an [[url|embedded link]] =====
 
-==== Level 3 with emphasis ====
+==== Level 3 with //emphasis// ====
 
 === Level 4 ===
 
@@ -18,7 +18,7 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s
 
 ====== Level 1 ======
 
-===== Level 2 with emphasis =====
+===== Level 2 with //emphasis// =====
 
 ==== Level 3 ====
 
-- 
cgit v1.2.3


From 8437a4a002210a33ee721c58f5f95605898a8e1b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 08:15:48 -0700
Subject: LaTeX reader: support `\pm` in `SI{..}`.

Closes #6620.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 4 +++-
 test/command/6620.md                     | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1952f4e1a..c4fb06700 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,7 +45,9 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize <$> braced)
+doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
+  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
+        convertPM t = t
 
 tonum :: Text -> Inlines
 tonum value =
diff --git a/test/command/6620.md b/test/command/6620.md
index 314200d30..00643e57c 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -7,9 +7,12 @@
 \SI{0.135(21)}{\m}
 
 \SI{12.3(60)}{\m}
+
+\SI{10.0 \pm 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From a366bd6abccd18a49f0033de850ddd53830cc5ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:03:29 -0700
Subject: LaTeX reader: Fix parsing of `+-` in siunitx numbers.

See #6658.
---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 11 +++++++----
 test/command/6620.md                     |  5 ++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index c4fb06700..5e140ef7a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -45,9 +45,7 @@ doSI tok = do
                       unit]
 
 doSInum :: PandocMonad m => LP m Inlines
-doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced)
-  where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0"
-        convertPM t = t
+doSInum = skipopts *> (tonum . untokenize <$> braced)
 
 tonum :: Text -> Inlines
 tonum value =
@@ -74,12 +72,16 @@ parseNumPart =
   parseDecimalNum <|>
   parseComma <|>
   parsePlusMinus <|>
+  parsePM <|>
   parseI <|>
   parseExp <|>
   parseX <|>
   parseSpace
  where
-  parseDecimalNum = do
+  parseDecimalNum, parsePlusMinus, parsePM,
+    parseComma, parseI, parseX,
+    parseExp, parseSpace :: Parser Text () Inlines
+  parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
     basenum <- (pref <>) . T.pack
                 <$> many1 (satisfy (\c -> isDigit c || c == '.'))
@@ -100,6 +102,7 @@ parseNumPart =
                                                | otherwise -> "." <> t
   parseComma = str "." <$ char ','
   parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-")
+  parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm")
   parseParens =
     char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')'
   parseI = str "i" <$ char 'i'
diff --git a/test/command/6620.md b/test/command/6620.md
index 00643e57c..b19cd1972 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,10 +9,13 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
+
+\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
-<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
+<p>10.0 ± 3.3 ms</p>
 ```
-- 
cgit v1.2.3


From 183ce584779b344ad6a6a3e085ddfdb00faf62aa Mon Sep 17 00:00:00 2001
From: Denis Maier <denis.maier@ub.unibe.ch>
Date: Thu, 20 May 2021 18:59:53 +0200
Subject: ConTeXt reader: improve ordered lists (#7304)

Closes #5016

- change ordered list from itemize to enumerate
- adds new itemgroup for ordered lists
- add fontfeature for table figures
- remove width from itemize in context writer
---
 data/templates/default.context     |  4 ++
 src/Text/Pandoc/Writers/ConTeXt.hs | 14 ++-----
 test/writer.context                | 80 ++++++++++++++++++++------------------
 test/writers-lang-and-dir.context  |  4 ++
 4 files changed, 53 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/data/templates/default.context b/data/templates/default.context
index 64de448ca..df39130b2 100644
--- a/data/templates/default.context
+++ b/data/templates/default.context
@@ -51,6 +51,7 @@ $endif$
 
 % use microtypography
 \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes]
+\definefontfeature[default:tnum][default][tnum=yes, pnum=no]
 \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes]
 \setupalign[hz,hanging]
 \setupitaliccorrection[global, always]
@@ -97,6 +98,9 @@ $endif$
 \setupitemize[autointro]    % prevent orphan list intro
 \setupitemize[indentnext=no]
 
+\defineitemgroup[enumerate]
+\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}]
+
 \setupfloat[figure][default={here,nonumber}]
 \setupfloat[table][default={here,nonumber}]
 
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 57d752a67..3cafcefba 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -16,7 +16,6 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
 import Control.Monad.State.Strict
 import Data.Char (ord, isDigit)
 import Data.List (intersperse)
-import Data.List.NonEmpty (nonEmpty)
 import Data.Maybe (mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -233,14 +232,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                         Period       -> "stopper=."
                         OneParen     -> "stopper=)"
                         TwoParens    -> "left=(,stopper=)"
-    let width = maybe 0 maximum $ nonEmpty $ map T.length $
-                  take (length contents)
-                       (orderedListMarkers (start, style', delim))
-    let width' = (toEnum width + 1) / 2
-    let width'' = if width' > (1.5 :: Double)
-                     then "width=" <> tshow width' <> "em"
-                     else ""
-    let specs2Items = filter (not . T.null) [start', delim', width'']
+    let specs2Items = filter (not . T.null) [start', delim']
     let specs2 = if null specs2Items
                     then ""
                     else "[" <> T.intercalate "," specs2Items <> "]"
@@ -254,8 +246,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
                           UpperAlpha   -> 'A') :
                        if isTightList lst then ",packed]" else "]"
     let specs = T.pack style'' <> specs2
-    return $ "\\startitemize" <> literal specs $$ vcat contents $$
-             "\\stopitemize" <> blankline
+    return $ "\\startenumerate" <> literal specs $$ vcat contents $$
+             "\\stopenumerate" <> blankline
 blockToConTeXt (DefinitionList lst) =
   liftM vcat $ mapM defListItemToConTeXt lst
 blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
diff --git a/test/writer.context b/test/writer.context
index 77570c249..78667813b 100644
--- a/test/writer.context
+++ b/test/writer.context
@@ -17,6 +17,7 @@
 
 % use microtypography
 \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes]
+\definefontfeature[default:tnum][default][tnum=yes, pnum=no]
 \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes]
 \setupalign[hz,hanging]
 \setupitaliccorrection[global, always]
@@ -48,6 +49,9 @@
 \setupitemize[autointro]    % prevent orphan list intro
 \setupitemize[indentnext=no]
 
+\defineitemgroup[enumerate]
+\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}]
+
 \setupfloat[figure][default={here,nonumber}]
 \setupfloat[table][default={here,nonumber}]
 
@@ -133,12 +137,12 @@ sub status {
 
 A list:
 
-\startitemize[n,packed][stopper=.]
+\startenumerate[n,packed][stopper=.]
 \item
   item one
 \item
   item two
-\stopitemize
+\stopenumerate
 
 Nested block quotes:
 
@@ -255,51 +259,51 @@ Minuses loose:
 
 Tight:
 
-\startitemize[n,packed][stopper=.]
+\startenumerate[n,packed][stopper=.]
 \item
   First
 \item
   Second
 \item
   Third
-\stopitemize
+\stopenumerate
 
 and:
 
-\startitemize[n,packed][stopper=.]
+\startenumerate[n,packed][stopper=.]
 \item
   One
 \item
   Two
 \item
   Three
-\stopitemize
+\stopenumerate
 
 Loose using tabs:
 
-\startitemize[n][stopper=.]
+\startenumerate[n][stopper=.]
 \item
   First
 \item
   Second
 \item
   Third
-\stopitemize
+\stopenumerate
 
 and using spaces:
 
-\startitemize[n][stopper=.]
+\startenumerate[n][stopper=.]
 \item
   One
 \item
   Two
 \item
   Three
-\stopitemize
+\stopenumerate
 
 Multiple paragraphs:
 
-\startitemize[n][stopper=.]
+\startenumerate[n][stopper=.]
 \item
   Item 1, graf one.
 
@@ -308,7 +312,7 @@ Multiple paragraphs:
   Item 2.
 \item
   Item 3.
-\stopitemize
+\stopenumerate
 
 \subsection[title={Nested},reference={nested}]
 
@@ -327,7 +331,7 @@ Multiple paragraphs:
 
 Here's another:
 
-\startitemize[n,packed][stopper=.]
+\startenumerate[n,packed][stopper=.]
 \item
   First
 \item
@@ -342,11 +346,11 @@ Here's another:
   \stopitemize
 \item
   Third
-\stopitemize
+\stopenumerate
 
 Same thing but with paragraphs:
 
-\startitemize[n][stopper=.]
+\startenumerate[n][stopper=.]
 \item
   First
 \item
@@ -362,7 +366,7 @@ Same thing but with paragraphs:
   \stopitemize
 \item
   Third
-\stopitemize
+\stopenumerate
 
 \subsection[title={Tabs and spaces},reference={tabs-and-spaces}]
 
@@ -382,7 +386,7 @@ Same thing but with paragraphs:
 
 \subsection[title={Fancy list markers},reference={fancy-list-markers}]
 
-\startitemize[n][start=2,left=(,stopper=),width=2.0em]
+\startenumerate[n][start=2,left=(,stopper=)]
 \item
   begins with 2
 \item
@@ -390,51 +394,51 @@ Same thing but with paragraphs:
 
   with a continuation
 
-  \startitemize[r,packed][start=4,stopper=.,width=2.0em]
+  \startenumerate[r,packed][start=4,stopper=.]
   \item
     sublist with roman numerals, starting with 4
   \item
     more items
-    \startitemize[A,packed][left=(,stopper=),width=2.0em]
+    \startenumerate[A,packed][left=(,stopper=)]
     \item
       a subsublist
     \item
       a subsublist
-    \stopitemize
-  \stopitemize
-\stopitemize
+    \stopenumerate
+  \stopenumerate
+\stopenumerate
 
 Nesting:
 
-\startitemize[A,packed][stopper=.]
+\startenumerate[A,packed][stopper=.]
 \item
   Upper Alpha
-  \startitemize[R,packed][stopper=.]
+  \startenumerate[R,packed][stopper=.]
   \item
     Upper Roman.
-    \startitemize[n,packed][start=6,left=(,stopper=),width=2.0em]
+    \startenumerate[n,packed][start=6,left=(,stopper=)]
     \item
       Decimal start with 6
-      \startitemize[a,packed][start=3,stopper=)]
+      \startenumerate[a,packed][start=3,stopper=)]
       \item
         Lower alpha with paren
-      \stopitemize
-    \stopitemize
-  \stopitemize
-\stopitemize
+      \stopenumerate
+    \stopenumerate
+  \stopenumerate
+\stopenumerate
 
 Autonumbering:
 
-\startitemize[n,packed]
+\startenumerate[n,packed]
 \item
   Autonumber.
 \item
   More.
-  \startitemize[a,packed]
+  \startenumerate[a,packed]
   \item
     Nested.
-  \stopitemize
-\stopitemize
+  \stopenumerate
+\stopenumerate
 
 Should not be a list item:
 
@@ -547,12 +551,12 @@ Blank line after term, indented marker, alternate markers:
 \startdescription{orange}
   orange fruit
 
-  \startitemize[n,packed][stopper=.]
+  \startenumerate[n,packed][stopper=.]
   \item
     sublist
   \item
     sublist
-  \stopitemize
+  \stopenumerate
 \stopdescription
 
 \section[title={HTML Blocks},reference={html-blocks}]
@@ -897,10 +901,10 @@ note{]} Here is an inline note.\footnote{This is {\em easier} to type. Inline
 Notes can go in quotes.\footnote{In quote.}
 \stopblockquote
 
-\startitemize[n,packed][stopper=.]
+\startenumerate[n,packed][stopper=.]
 \item
   And in list items.\footnote{In list.}
-\stopitemize
+\stopenumerate
 
 This paragraph should not be part of the note, as it is not indented.
 
diff --git a/test/writers-lang-and-dir.context b/test/writers-lang-and-dir.context
index c4bff1a01..c01ecab58 100644
--- a/test/writers-lang-and-dir.context
+++ b/test/writers-lang-and-dir.context
@@ -15,6 +15,7 @@
 
 % use microtypography
 \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes]
+\definefontfeature[default:tnum][default][tnum=yes, pnum=no]
 \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes]
 \setupalign[hz,hanging]
 \setupitaliccorrection[global, always]
@@ -46,6 +47,9 @@
 \setupitemize[autointro]    % prevent orphan list intro
 \setupitemize[indentnext=no]
 
+\defineitemgroup[enumerate]
+\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}]
+
 \setupfloat[figure][default={here,nonumber}]
 \setupfloat[table][default={here,nonumber}]
 
-- 
cgit v1.2.3


From 5dc917da3ed997c6e48e22bde242f0f8e1ae5333 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:11:26 -0700
Subject: LaTeX reader siunitx: add leading 0 to numbers starting with .

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 7 +++++--
 test/command/6620.md                     | 3 ---
 test/command/6658.md                     | 9 +++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)
 create mode 100644 test/command/6658.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 5e140ef7a..1474329d4 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -83,8 +83,11 @@ parseNumPart =
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
     pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
-    basenum <- (pref <>) . T.pack
-                <$> many1 (satisfy (\c -> isDigit c || c == '.'))
+    basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
+    let basenum = pref <> T.pack
+                    (case basenum' of
+                      '.':_ -> '0':basenum'
+                      _ -> basenum')
     uncertainty <- option mempty $ T.pack <$> parseParens
     if T.null uncertainty
        then return $ str basenum
diff --git a/test/command/6620.md b/test/command/6620.md
index b19cd1972..e448ca6b5 100644
--- a/test/command/6620.md
+++ b/test/command/6620.md
@@ -9,13 +9,10 @@
 \SI{12.3(60)}{\m}
 
 \SI{10.0 \pm 3.3}{\ms}
-
-\SI{10.0 +- 3.3}{\ms}
 ^D
 <p>23 ± 2 m</p>
 <p>125 ± 12 m</p>
 <p>0.135 ± 0.021 m</p>
 <p>12.3 ± 6 m</p>
 <p>10.0 ± 3.3 ms</p>
-<p>10.0 ± 3.3 ms</p>
 ```
diff --git a/test/command/6658.md b/test/command/6658.md
new file mode 100644
index 000000000..bcd174465
--- /dev/null
+++ b/test/command/6658.md
@@ -0,0 +1,9 @@
+```
+pandoc -f latex
+\SI{10.0 +- 3.3}{\ms}
+
+\num{.3e45}
+^D
+<p>10.0 ± 3.3 ms</p>
+<p>0.3 × 10<sup>45</sup></p>
+```
-- 
cgit v1.2.3


From bc5058234feab7646f58dc01379b4eadf95bf411 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 09:18:23 -0700
Subject: LaTeX reader sinuitx: fix + sign on ang.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 9 ++++++---
 test/command/6658.md                     | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 1474329d4..72f81dcde 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -118,11 +118,14 @@ doSIang :: PandocMonad m => LP m Inlines
 doSIang = do
   skipopts
   ps <- T.splitOn ";" . untokenize <$> braced
+  let dropPlus t = case T.uncons t of
+                     Just ('+',t') -> t'
+                     _ -> t
   case ps ++ repeat "" of
     (d:m:s:_) -> return $
-      (if T.null d then mempty else str d <> str "\xb0") <>
-      (if T.null m then mempty else str m <> str "\x2032") <>
-      (if T.null s then mempty else str s <> str "\x2033")
+      (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <>
+      (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <>
+      (if T.null s then mempty else str (dropPlus s) <> str "\x2033")
     _ -> return mempty
 
 -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms"
diff --git a/test/command/6658.md b/test/command/6658.md
index bcd174465..0a8512f85 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -3,7 +3,10 @@ pandoc -f latex
 \SI{10.0 +- 3.3}{\ms}
 
 \num{.3e45}
+
+\ang{+10;+3;}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
+<p>10°3′</p>
 ```
-- 
cgit v1.2.3


From 4e990a8cf9207f2315d6a55a45c93c2857663316 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 10:12:44 -0700
Subject: LaTeX/siunitx: fix parsing of `\cubic` etc. See #6658.

---
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 85 +++++++++++++++++++-------------
 test/command/6658.md                     |  3 ++
 2 files changed, 53 insertions(+), 35 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 72f81dcde..63ab7267d 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE OverloadedStrings #-}
 module Text.Pandoc.Readers.LaTeX.SIunitx
   ( siunitxCommands )
@@ -154,40 +155,55 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = try (do
-  Tok _ (CtrlSeq name) _ <- anyControlSeq
-  case name of
-    "square" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "2"
-    "cubic" -> do
-       unit <- siUnit tok
-       return $ unit <> superscript "3"
-    "raisetothe" -> do
-       n <- tok
-       unit <- siUnit tok
-       return $ unit <> superscript n
-    _ ->
-       case M.lookup name siUnitMap of
-            Just il ->
-              option il $
-                choice
-                 [ (il <> superscript "2") <$ controlSeq "squared"
-                 , (il <> superscript "3") <$ controlSeq "cubed"
-                 , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok)
-                 ]
-            Nothing -> fail "not an siunit unit command")
- <|> (lookAhead anyControlSeq >> tok)
- <|> (do Tok _ Word t <- satisfyTok isWordTok
-         return $ str t)
- <|> (symbol '^' *> (superscript <$> tok))
- <|> (symbol '_' *> (subscript <$> tok))
- <|> ("\xa0" <$ symbol '.')
- <|> ("\xa0" <$ symbol '~')
- <|> tok
- <|> (do Tok _ _ t <- anyTok
-         return (str t))
+siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
+siUnit tok = mconcat <$> many1 siUnitPart
+ where
+  siUnitPart :: LP m Inlines
+  siUnitPart =
+        (siPrefix <*> siUnitPart)
+    <|> (do u <- siBase <|> tok
+            option u $ siSuffix <*> pure u)
+  siPrefix :: LP m (Inlines -> Inlines)
+  siPrefix =
+       (do _ <- controlSeq "per"
+           skipopts -- TODO handle option
+           return (str "/" <>))
+   <|> (do _ <- controlSeq "square"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubic"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "raisetothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siSuffix :: LP m (Inlines -> Inlines)
+  siSuffix =
+       (do _ <- controlSeq "squared"
+           skipopts
+           return (<> superscript "2"))
+   <|> (do _ <- controlSeq "cubed"
+           skipopts
+           return (<> superscript "3"))
+   <|> (do _ <- controlSeq "tothe"
+           skipopts
+           n <- tok
+           return (<> superscript n))
+  siBase :: LP m Inlines
+  siBase = mconcat <$> many1
+    ((try
+       (do Tok _ (CtrlSeq name) _ <- anyControlSeq
+           case M.lookup name siUnitMap of
+              Just il -> pure il
+              Nothing -> fail "not a unit command"))
+    <|> (do Tok _ Word t <- satisfyTok isWordTok
+            return $ str t)
+    <|> (symbol '^' *> (superscript <$> tok))
+    <|> (symbol '_' *> (subscript <$> tok))
+    <|> (str "\xa0" <$ symbol '.')
+    <|> (str "\xa0" <$ symbol '~')
+     )
 
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
@@ -347,7 +363,6 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("per", str "/")
   , ("peta", str "P")
   , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
diff --git a/test/command/6658.md b/test/command/6658.md
index 0a8512f85..549610992 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -5,8 +5,11 @@ pandoc -f latex
 \num{.3e45}
 
 \ang{+10;+3;}
+
+\si{\gram\per\cubic\centi\metre}
 ^D
 <p>10.0 ± 3.3 ms</p>
 <p>0.3 × 10<sup>45</sup></p>
 <p>10°3′</p>
+<p>g/cm<sup>3</sup></p>
 ```
-- 
cgit v1.2.3


From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 12:06:15 -0700
Subject: LaTeX reader: More siunitx improvements.  Closes #6658.

There's still one slight divergence from the siunitx behavior:
we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm
not going to worry about that.
---
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |   3 +-
 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++----------
 test/command/6658.md                     |  72 ++++++++++++++--
 3 files changed, 161 insertions(+), 52 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index b6804a825..1c77eb299 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -806,7 +806,8 @@ withRaw parser = do
 
 keyval :: PandocMonad m => LP m (Text, Text)
 keyval = try $ do
-  Tok _ Word key <- satisfyTok isWordTok
+  key <- untokenize <$> many1 (notFollowedBy (symbol '=') >>
+                         (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok))
   sp
   val <- option mempty $ do
            symbol '='
diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
index 63ab7267d..b8bf0ce7f 100644
--- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs
@@ -10,27 +10,32 @@ import Text.Pandoc.Class
 import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
                             optional, space, spaces, withRaw, (<|>))
 import Control.Applicative ((<|>))
+import Control.Monad (void)
 import qualified Data.Map as M
 import Data.Char (isDigit)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.List (intersperse)
-
+import qualified Data.Sequence as Seq
+import Text.Pandoc.Walk (walk)
 
 siunitxCommands :: PandocMonad m
                  => LP m Inlines -> M.Map Text (LP m Inlines)
 siunitxCommands tok = M.fromList
-  [ ("si", skipopts *> dosi tok)
+  [ ("si", dosi tok)
   , ("SI", doSI tok)
   , ("SIrange", doSIrange True tok)
   , ("numrange", doSIrange False tok)
   , ("numlist", doSInumlist)
+  , ("SIlist", doSIlist tok)
   , ("num", doSInum)
   , ("ang", doSIang)
   ]
 
 dosi :: PandocMonad m => LP m Inlines -> LP m Inlines
-dosi tok = grouped (siUnit tok) <|> siUnit tok
+dosi tok = do
+  options <- option [] keyvals
+  grouped (siUnit options tok) <|> siUnit options tok
 
 -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €"
 doSI :: PandocMonad m => LP m Inlines -> LP m Inlines
@@ -65,9 +70,29 @@ doSInumlist = do
              mconcat (intersperse (str "," <> space) (init xs)) <>
              text ", & " <> last xs
 
+doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines
+doSIlist tok = do
+  options <- option [] keyvals
+  nums <- map tonum . T.splitOn ";" . untokenize <$> braced
+  unit <- grouped (siUnit options tok) <|> siUnit options tok
+  let xs = map (<> (str "\xa0" <> unit)) nums
+  case xs of
+    []  -> return mempty
+    [x] -> return x
+    _   -> return $
+             mconcat (intersperse (str "," <> space) (init xs)) <>
+             text ", & " <> last xs
+
 parseNum :: Parser Text () Inlines
 parseNum = (mconcat <$> many parseNumPart) <* eof
 
+minus :: Text
+minus = "\x2212"
+
+hyphenToMinus :: Inline -> Inline
+hyphenToMinus (Str t) = Str (T.replace "-" minus t)
+hyphenToMinus x = x
+
 parseNumPart :: Parser Text () Inlines
 parseNumPart =
   parseDecimalNum <|>
@@ -83,7 +108,7 @@ parseNumPart =
     parseComma, parseI, parseX,
     parseExp, parseSpace :: Parser Text () Inlines
   parseDecimalNum = try $ do
-    pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-')
+    pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-')
     basenum' <- many1 (satisfy (\c -> isDigit c || c == '.'))
     let basenum = pref <> T.pack
                     (case basenum' of
@@ -155,20 +180,30 @@ doSIrange includeUnits tok = do
 emptyOr160 :: Inlines -> Inlines
 emptyOr160 x = if x == mempty then x else str "\160"
 
-siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines
-siUnit tok = mconcat <$> many1 siUnitPart
+siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines
+siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart
  where
   siUnitPart :: LP m Inlines
-  siUnitPart =
-        (siPrefix <*> siUnitPart)
-    <|> (do u <- siBase <|> tok
-            option u $ siSuffix <*> pure u)
+  siUnitPart = try $ do
+    skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1)
+    x <- ((siPrefix <*> siBase)
+            <|> (do u <- siBase <|> tok
+                    option u $ siSuffix <*> pure u))
+    option x (siInfix x)
+  siInfix :: Inlines -> LP m Inlines
+  siInfix u1 = try $
+       (do _ <- controlSeq "per"
+           u2 <- siUnitPart
+           let useSlash = lookup "per-mode" options == Just "symbol"
+           if useSlash
+              then return (u1 <> str "/" <> u2)
+              else return (u1 <> str "\xa0" <> negateExponent u2))
+   <|> (do _ <- symbol '/'
+           u2 <- siUnitPart
+           return (u1 <> str "/" <> u2))
   siPrefix :: LP m (Inlines -> Inlines)
   siPrefix =
-       (do _ <- controlSeq "per"
-           skipopts -- TODO handle option
-           return (str "/" <>))
-   <|> (do _ <- controlSeq "square"
+       (do _ <- controlSeq "square"
            skipopts
            return (<> superscript "2"))
    <|> (do _ <- controlSeq "cubic"
@@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "raisetothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
   siSuffix :: LP m (Inlines -> Inlines)
   siSuffix =
@@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart
            return (<> superscript "3"))
    <|> (do _ <- controlSeq "tothe"
            skipopts
-           n <- tok
+           n <- walk hyphenToMinus <$> tok
            return (<> superscript n))
+   <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> superscript n)))
+   <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok
+                          return (<> subscript n)))
+  negateExponent :: Inlines -> Inlines
+  negateExponent ils =
+    case Seq.viewr (unMany ils) of
+      xs Seq.:> Superscript ss -> (Many xs) <>
+                                     superscript (str minus <> fromList ss)
+      _ -> ils <> superscript (str (minus <> "1"))
   siBase :: LP m Inlines
-  siBase = mconcat <$> many1
+  siBase =
     ((try
        (do Tok _ (CtrlSeq name) _ <- anyControlSeq
-           case M.lookup name siUnitMap of
-              Just il -> pure il
-              Nothing -> fail "not a unit command"))
+           case M.lookup name siUnitModifierMap of
+              Just il -> (il <>) <$> siBase
+              Nothing ->
+                case M.lookup name siUnitMap of
+                   Just il -> pure il
+                   Nothing -> fail "not a unit command"))
     <|> (do Tok _ Word t <- satisfyTok isWordTok
             return $ str t)
-    <|> (symbol '^' *> (superscript <$> tok))
-    <|> (symbol '_' *> (subscript <$> tok))
-    <|> (str "\xa0" <$ symbol '.')
-    <|> (str "\xa0" <$ symbol '~')
      )
 
+siUnitModifierMap :: M.Map Text Inlines
+siUnitModifierMap = M.fromList
+  [ ("atto", str "a")
+  , ("centi", str "c")
+  , ("deca", str "d")
+  , ("deci", str "d")
+  , ("deka", str "d")
+  , ("exa", str "E")
+  , ("femto", str "f")
+  , ("giga", str "G")
+  , ("hecto", str "h")
+  , ("kilo", str "k")
+  , ("mega", str "M")
+  , ("micro", str "μ")
+  , ("milli", str "m")
+  , ("nano", str "n")
+  , ("peta", str "P")
+  , ("pico", str "p")
+  , ("tera", str "T")
+  , ("yocto", str "y")
+  , ("yotta", str "Y")
+  , ("zepto", str "z")
+  , ("zetta", str "Z")
+  ]
+
 siUnitMap :: M.Map Text Inlines
 siUnitMap = M.fromList
   [ ("fg", str "fg")
@@ -303,7 +372,6 @@ siUnitMap = M.fromList
   , ("arcsecond", str "″")
   , ("astronomicalunit", str "ua")
   , ("atomicmassunit", str "u")
-  , ("atto", str "a")
   , ("bar", str "bar")
   , ("barn", str "b")
   , ("becquerel", str "Bq")
@@ -311,51 +379,38 @@ siUnitMap = M.fromList
   , ("bohr", emph (str "a") <> subscript (str "0"))
   , ("candela", str "cd")
   , ("celsius", str "°C")
-  , ("centi", str "c")
   , ("clight", emph (str "c") <> subscript (str "0"))
   , ("coulomb", str "C")
   , ("dalton", str "Da")
   , ("day", str "d")
-  , ("deca", str "d")
-  , ("deci", str "d")
   , ("decibel", str "db")
   , ("degreeCelsius",str "°C")
   , ("degree", str "°")
-  , ("deka", str "d")
   , ("electronmass", emph (str "m") <> subscript (str "e"))
   , ("electronvolt", str "eV")
   , ("elementarycharge", emph (str "e"))
-  , ("exa", str "E")
   , ("farad", str "F")
-  , ("femto", str "f")
-  , ("giga", str "G")
   , ("gram", str "g")
   , ("gray", str "Gy")
   , ("hartree", emph (str "E") <> subscript (str "h"))
   , ("hectare", str "ha")
-  , ("hecto", str "h")
   , ("henry", str "H")
   , ("hertz", str "Hz")
   , ("hour", str "h")
   , ("joule", str "J")
   , ("katal", str "kat")
   , ("kelvin", str "K")
-  , ("kilo", str "k")
   , ("kilogram", str "kg")
   , ("knot", str "kn")
   , ("liter", str "L")
   , ("litre", str "l")
   , ("lumen", str "lm")
   , ("lux", str "lx")
-  , ("mega", str "M")
   , ("meter", str "m")
   , ("metre", str "m")
-  , ("micro", str "μ")
-  , ("milli", str "m")
   , ("minute", str "min")
   , ("mmHg", str "mmHg")
   , ("mole", str "mol")
-  , ("nano", str "n")
   , ("nauticalmile", str "M")
   , ("neper", str "Np")
   , ("newton", str "N")
@@ -363,24 +418,17 @@ siUnitMap = M.fromList
   , ("Pa", str "Pa")
   , ("pascal", str "Pa")
   , ("percent", str "%")
-  , ("peta", str "P")
-  , ("pico", str "p")
   , ("planckbar", emph (str "\x210f"))
   , ("radian", str "rad")
   , ("second", str "s")
   , ("siemens", str "S")
   , ("sievert", str "Sv")
   , ("steradian", str "sr")
-  , ("tera", str "T")
   , ("tesla", str "T")
   , ("tonne", str "t")
   , ("volt", str "V")
   , ("watt", str "W")
   , ("weber", str "Wb")
-  , ("yocto", str "y")
-  , ("yotta", str "Y")
-  , ("zepto", str "z")
-  , ("zetta", str "Z")
   ]
 
 
diff --git a/test/command/6658.md b/test/command/6658.md
index 549610992..96700c8fe 100644
--- a/test/command/6658.md
+++ b/test/command/6658.md
@@ -1,15 +1,75 @@
 ```
-pandoc -f latex
-\SI{10.0 +- 3.3}{\ms}
+pandoc -f latex -t html
+\num{12345,67890}
+
+\num{1+-2i}
 
 \num{.3e45}
 
-\ang{+10;+3;}
+\num{1.654 x 2.34 x 3.430}
+
+\si{kg.m.s^{-1}}
+
+\si{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\second}
+
+\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second}
+
+\numlist{10;20;30}
+
+\SIlist{0.13;0.67;0.80}{\milli\metre}
+
+\numrange{10}{20}
+
+\SIrange{0.13}{0.67}{\milli\metre}
+
+\ang{10}
+
+\ang{1;2;3}
+
+\ang{;;1}
+
+\ang{+10;;}
+
+\ang{-0;1;}
+
+\si{kg.m/s^2}
+
+\si{g_{polymer}~mol_{cat}.s^{-1}}
+
+\si{\kilo\gram\metre\per\square\second}
 
 \si{\gram\per\cubic\centi\metre}
+
+\si{\square\volt\cubic\lumen\per\farad}
+
+\si{\metre\squared\per\gray\cubic\lux}
+
+\si{\henry\second}
 ^D
-<p>10.0 ± 3.3 ms</p>
+<p>12345.67890</p>
+<p>1 ± 2i</p>
 <p>0.3 × 10<sup>45</sup></p>
-<p>10°3′</p>
-<p>g/cm<sup>3</sup></p>
+<p>1.654 × 2.34 × 3.430</p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m s<sup>−1</sup></p>
+<p>kg m/s</p>
+<p>kg m/A/s</p>
+<p>10, 20, &amp; 30</p>
+<p>0.13 mm, 0.67 mm, &amp; 0.80 mm</p>
+<p>10–20</p>
+<p>0.13 mm–0.67 mm</p>
+<p>10°</p>
+<p>1°2′3″</p>
+<p>1″</p>
+<p>10°</p>
+<p>-0°1′</p>
+<p>kg m/s<sup>2</sup></p>
+<p>g<sub>polymer</sub> mol<sub>cat</sub> s<sup>−1</sup></p>
+<p>kg m s<sup>−2</sup></p>
+<p>g cm<sup>−3</sup></p>
+<p>V<sup>2</sup> lm<sup>3</sup> F<sup>−1</sup></p>
+<p>m<sup>2</sup> Gy<sup>−1</sup> lx<sup>3</sup></p>
+<p>H s</p>
 ```
-- 
cgit v1.2.3


From d7b5def287aefe91f881daeecc5f72121c843b66 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 17:12:00 -0700
Subject: Ms writer: handle tables with multiple paragraphs.

Previously they overflowed the table cell width.
We now set line lengths per-cell and restore them
after the table has been written.

Closes #7288.
---
 src/Text/Pandoc/Writers/Ms.hs | 28 ++++++++++++++++++++++------
 test/command/7288.md          | 40 ++++++++++++++++++++++++++++++++++++++++
 test/tables.ms                | 30 ++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 6 deletions(-)
 create mode 100644 test/command/7288.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 0ed7a8a64..97c23f24d 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -245,13 +245,17 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) =
       aligncode AlignDefault = "l"
   in do
   caption' <- inlineListToMs' opts caption
-  let iwidths = if all (== 0) widths
-                   then repeat ""
-                   else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths
+  let isSimple = all (== 0) widths
+  let totalWidth = 70
   -- 78n default width - 8n indent = 70n
   let coldescriptions = literal $ T.unwords
-                        (zipWith (\align width -> aligncode align <> width)
-                        alignments iwidths) <> "."
+                        (zipWith (\align width -> aligncode align <>
+                                    if width == 0
+                                       then ""
+                                       else T.pack $
+                                              printf "w(%0.1fn)"
+                                              (totalWidth * width))
+                        alignments widths) <> "."
   colheadings <- mapM (blockListToMs opts) headers
   let makeRow cols = literal "T{" $$
                      vcat (intersperse (literal "T}\tT{") cols) $$
@@ -260,13 +264,25 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) =
                         then empty
                         else makeRow colheadings $$ char '_'
   body <- mapM (\row -> do
-                         cols <- mapM (blockListToMs opts) row
+                         cols <- mapM (\(cell, w) ->
+                                   (if isSimple
+                                       then id
+                                       else (literal (".nr LL " <>
+                                              T.pack (printf "%0.1fn"
+                                                (w * totalWidth))) $$)) <$>
+                                   blockListToMs opts cell) (zip row widths)
                          return $ makeRow cols) rows
   setFirstPara
   return $ literal ".PP" $$ caption' $$
            literal ".na" $$ -- we don't want justification in table cells
+           (if isSimple
+               then ""
+               else ".nr LLold \\n[LL]") $$
            literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$
            colheadings' $$ vcat body $$ literal ".TE" $$
+           (if isSimple
+               then ""
+               else ".nr LL \\n[LLold]") $$
            literal ".ad"
 
 blockToMs opts (BulletList items) = do
diff --git a/test/command/7288.md b/test/command/7288.md
new file mode 100644
index 000000000..e94aeeeb3
--- /dev/null
+++ b/test/command/7288.md
@@ -0,0 +1,40 @@
+```
+% pandoc -f rst -t ms
+.. list-table::
+   :widths: 50 50
+   :header-rows: 1
+
+   * - Left
+     - Right
+   * - Long text that should be easy to break up into multiple lines
+     - Another long text that should be easy to break up into multiple lines
+
+       Bar
+^D
+.PP
+.na
+.nr LLold \n[LL]
+.TS
+delim(@@) tab(	);
+lw(35.0n) lw(35.0n).
+T{
+Left
+T}	T{
+Right
+T}
+_
+T{
+.nr LL 35.0n
+.LP
+Long text that should be easy to break up into multiple lines
+T}	T{
+.nr LL 35.0n
+.PP
+Another long text that should be easy to break up into multiple lines
+.PP
+Bar
+T}
+.TE
+.nr LL \n[LLold]
+.ad
+```
diff --git a/test/tables.ms b/test/tables.ms
index 7337db25f..21183a5eb 100644
--- a/test/tables.ms
+++ b/test/tables.ms
@@ -143,6 +143,7 @@ Multiline table with caption:
 .PP
 Here\[cq]s the caption. It may span multiple lines.
 .na
+.nr LLold \n[LL]
 .TS
 delim(@@) tab(	);
 cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n).
@@ -157,30 +158,40 @@ Default aligned
 T}
 _
 T{
+.nr LL 10.5n
 First
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 12.0
 T}	T{
+.nr LL 24.5n
 Example of a row that spans multiple lines.
 T}
 T{
+.nr LL 10.5n
 Second
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 5.0
 T}	T{
+.nr LL 24.5n
 Here\[cq]s another one.
 Note the blank line between rows.
 T}
 .TE
+.nr LL \n[LLold]
 .ad
 .LP
 Multiline table without caption:
 .PP
 .na
+.nr LLold \n[LL]
 .TS
 delim(@@) tab(	);
 cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n).
@@ -195,25 +206,34 @@ Default aligned
 T}
 _
 T{
+.nr LL 10.5n
 First
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 12.0
 T}	T{
+.nr LL 24.5n
 Example of a row that spans multiple lines.
 T}
 T{
+.nr LL 10.5n
 Second
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 5.0
 T}	T{
+.nr LL 24.5n
 Here\[cq]s another one.
 Note the blank line between rows.
 T}
 .TE
+.nr LL \n[LLold]
 .ad
 .LP
 Table without column headers:
@@ -255,27 +275,37 @@ T}
 Multiline table without column headers:
 .PP
 .na
+.nr LLold \n[LL]
 .TS
 delim(@@) tab(	);
 cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n).
 T{
+.nr LL 10.5n
 First
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 12.0
 T}	T{
+.nr LL 24.5n
 Example of a row that spans multiple lines.
 T}
 T{
+.nr LL 10.5n
 Second
 T}	T{
+.nr LL 9.6n
 row
 T}	T{
+.nr LL 11.4n
 5.0
 T}	T{
+.nr LL 24.5n
 Here\[cq]s another one.
 Note the blank line between rows.
 T}
 .TE
+.nr LL \n[LLold]
 .ad
-- 
cgit v1.2.3


From 07d299d353761a7c29aa7e7a51371ad7842ec767 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 20 May 2021 18:45:39 -0700
Subject: DocBook reader: ensure that first and last names are separated.

Closes #6541.
---
 src/Text/Pandoc/Readers/DocBook.hs | 20 ++++++++++++++------
 test/command/6541.md               | 27 +++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 test/command/6541.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3db459cfd..b01ad3252 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -600,16 +600,24 @@ addMetadataFromElement e = do
          Nothing -> return ()
          Just z  -> addMetaField "author" z
     addMetaField "subtitle" e
-    addMetaField "author" e
+    addAuthor e
     addMetaField "date" e
     addMetaField "release" e
     addMetaField "releaseinfo" e
     return mempty
-  where addMetaField fieldname elt =
-            case filterChildren (named fieldname) elt of
-                   []  -> return ()
-                   [z] -> getInlines z >>= addMeta fieldname
-                   zs  -> mapM getInlines zs >>= addMeta fieldname
+  where
+   addAuthor elt =
+     case filterChildren (named "author") elt of
+       [] -> return ()
+       [z] -> fromAuthor z >>= addMeta "author"
+       zs  -> mapM fromAuthor zs >>= addMeta "author"
+   fromAuthor elt =
+     mconcat . intersperse space <$> mapM getInlines (elChildren elt)
+   addMetaField fieldname elt =
+     case filterChildren (named fieldname) elt of
+       []  -> return ()
+       [z] -> getInlines z >>= addMeta fieldname
+       zs  -> mapM getInlines zs >>= addMeta fieldname
 
 addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
 addMeta field val = modify (setMeta field val)
diff --git a/test/command/6541.md b/test/command/6541.md
new file mode 100644
index 000000000..956340d4c
--- /dev/null
+++ b/test/command/6541.md
@@ -0,0 +1,27 @@
+```
+% pandoc -f docbook -t markdown -s
+<?xml version="1.0"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+  "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+<book>
+
+<bookinfo>
+<title>Title</title>
+<author>
+<firstname>Firstname</firstname><surname>Lastname</surname>
+</author>
+<releaseinfo>1.17</releaseinfo>
+</bookinfo>
+
+<para>Text.</para>
+
+</book>
+^D
+---
+author: Firstname Lastname
+releaseinfo: 1.17
+title: Title
+---
+
+Text.
+```
-- 
cgit v1.2.3


From f76fe2ab56606528d4710cc6c40bceb5788c3906 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 22 May 2021 13:29:13 +0200
Subject: HTML reader: simplify col width parsing

---
 src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index ad0b51253..6537bbce9 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -1,6 +1,5 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
    Copyright   : © 2006-2021 John MacFarlane,
@@ -42,18 +41,15 @@ pCol = try $ do
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
-                Nothing -> case lookup "style" attribs of
-                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
-                Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+  let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead
+  return $ fromMaybe ColWidthDefault $
+    (case lookup "width" attribs >>= T.unsnoc of
+       Just (xs, '%') -> Just (toColWidth xs)
+       _ -> Nothing) <|>
+    (case lookup "style" attribs >>= T.stripPrefix "width" of
+        Just xs | T.any (== '%') xs -> Just . toColWidth $
+                  T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs
+        _ -> Nothing)
 
 pColgroup :: PandocMonad m => TagParser m [ColWidth]
 pColgroup = try $ do
-- 
cgit v1.2.3


From 80b4b3fe82a19a4ea1e76fc4a81c9c88676c7ce0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 May 2021 22:03:51 -0700
Subject: Revert "HTML reader: simplify col width parsing"

This reverts commit f76fe2ab56606528d4710cc6c40bceb5788c3906.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 6537bbce9..ad0b51253 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns      #-}
 {- |
    Module      : Text.Pandoc.Readers.HTML.Table
    Copyright   : © 2006-2021 John MacFarlane,
@@ -41,15 +42,18 @@ pCol = try $ do
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead
-  return $ fromMaybe ColWidthDefault $
-    (case lookup "width" attribs >>= T.unsnoc of
-       Just (xs, '%') -> Just (toColWidth xs)
-       _ -> Nothing) <|>
-    (case lookup "style" attribs >>= T.stripPrefix "width" of
-        Just xs | T.any (== '%') xs -> Just . toColWidth $
-                  T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs
-        _ -> Nothing)
+  let width = case lookup "width" attribs of
+                Nothing -> case lookup "style" attribs of
+                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
+                    fromMaybe 0.0 $ safeRead (T.filter
+                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> 0.0
+                Just (T.unsnoc -> Just (xs, '%')) ->
+                  fromMaybe 0.0 $ safeRead xs
+                _ -> 0.0
+  if width > 0.0
+    then return $ ColWidth $ width / 100.0
+    else return ColWidthDefault
 
 pColgroup :: PandocMonad m => TagParser m [ColWidth]
 pColgroup = try $ do
-- 
cgit v1.2.3


From 1af2cfb2873c5bb6ddd9fc00d076088b2e62af30 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 May 2021 21:56:10 -0700
Subject: Handle relative lengths (e.g. `2*`) in HTML column widths.

See <https://www.w3.org/TR/html4/types.html#h-6.6>.

"A relative length has the form "i*", where "i" is an integer. When
allotting space among elements competing for that space, user agents
allot pixel and percentage lengths first, then divide up remaining
available space among relative lengths. Each relative length receives a
portion of the available space that is proportional to the integer
preceding the "*". The value "*" is equivalent to "1*". Thus, if 60
pixels of space are available after the user agent allots pixel and
percentage space, and the competing relative lengths are 1*, 2*, and 3*,
the 1* will be alloted 10 pixels, the 2* will be alloted 20 pixels, and
the 3* will be alloted 30 pixels."

Closes #4063.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 47 ++++++++++++++++++++++++-----------
 test/command/4063.md                  | 29 +++++++++++++++++++++
 2 files changed, 62 insertions(+), 14 deletions(-)
 create mode 100644 test/command/4063.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index ad0b51253..3a569dd0a 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
@@ -33,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used:  Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
 pCol = try $ do
   TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
   let attribs = toStringAttr attribs'
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
+  return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
+                    maybe (Right ColWidthDefault) (Right . ColWidth)
+                      $ safeRead (T.filter
+                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> Right ColWidthDefault
+                Just (T.unsnoc -> Just (xs, '*')) ->
+                  maybe (Left 1) Left $ safeRead xs
                 Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+                  maybe (Right ColWidthDefault)
+                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
+                _ -> Right ColWidthDefault
 
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
 pColgroup = try $ do
   pSatisfy (matchTagOpen "colgroup" [])
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+  let remaining = 1 - sum (map getColWidth $ rights ws)
+      relatives = sum $ lefts ws
+      relUnit = remaining / fromIntegral relatives
+      toColWidth (Right x) = x
+      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+  in  map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
 data CellType
   = HeaderCell
   | BodyCell
@@ -182,7 +200,8 @@ pTable :: PandocMonad m
 pTable block = try $ do
   TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
   caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
-  widths  <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+  widths <- resolveRelativeLengths <$>
+              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
   thead   <- pTableHead block               <* skipMany pBlank
   topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
   tbodies <- many (pTableBody block)        <* skipMany pBlank
diff --git a/test/command/4063.md b/test/command/4063.md
new file mode 100644
index 000000000..838472b46
--- /dev/null
+++ b/test/command/4063.md
@@ -0,0 +1,29 @@
+```
+% pandoc -f html -t native
+<table>
+<colgroup>
+  <col width="30%" />
+  <col width="*" />
+</colgroup>
+<tr>
+  <td>1</td>
+  <td>2</td>
+</tr>
+</table>
+^D
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.3)
+ ,(AlignDefault,ColWidth 0.7)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]]])]
+ (TableFoot ("",[],[])
+ [])]
+```
-- 
cgit v1.2.3


From 58fbf56548bf985b40e4338befaf5b11a0665cbe Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 24 May 2021 09:56:02 +0200
Subject: Jira writer: use `{color}` when span has a color attribute

Closes: tarleb/jira-wiki-markup#10
---
 src/Text/Pandoc/Writers/Jira.hs | 10 +++++++---
 test/Tests/Writers/Jira.hs      |  4 ++++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index aa78d9419..cf4dadebc 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -309,9 +309,13 @@ quotedToJira qtype xs = do
 spanToJira :: PandocMonad m
            => Attr -> [Inline]
            -> JiraConverter m [Jira.Inline]
-spanToJira (ident, _classes, _attribs) inls = case ident of
-  "" -> toJiraInlines inls
-  _  -> (Jira.Anchor ident :) <$> toJiraInlines inls
+spanToJira (ident, _classes, attribs) inls =
+  let wrap = case lookup "color" attribs of
+               Nothing -> id
+               Just color -> singleton . Jira.ColorInline (Jira.ColorName color)
+  in wrap <$> case ident of
+    "" -> toJiraInlines inls
+    _  -> (Jira.Anchor ident :) <$> toJiraInlines inls
 
 registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline]
 registerNotes contents = do
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index 0c6f48853..d8e856e34 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -67,6 +67,10 @@ tests =
       [ "id is used as anchor" =:
         spanWith ("unicorn", [], []) (str "Unicorn") =?>
         "{anchor:unicorn}Unicorn"
+
+      , "use `color` attribute" =:
+        spanWith ("",[],[("color","red")]) "ruby" =?>
+        "{color:red}ruby{color}"
       ]
 
     , testGroup "code"
-- 
cgit v1.2.3


From 8511f6fdf6c9fbc2cc926538bca4ae9f554b4ed9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 23 May 2021 22:57:02 -0700
Subject: MediaBag improvements.

In the current dev version, we will sometimes add
a version of an image with a hashed name, keeping
the original version with the original name, which
would leave to undesirable duplication.

This change separates the media's filename from the
media's canonical name (which is the path of the link
in the document itself).  Filenames are based on SHA1
hashes and assigned automatically.

In Text.Pandoc.MediaBag:

- Export MediaItem type [API change].
- Change MediaBag type to a map from Text to MediaItem [API change].
- `lookupMedia` now returns a `MediaItem` [API change].
- Change `insertMedia` so it sets the `mediaPath` to
  a filename based on the SHA1 hash of the contents.
  This will be used when contents are extracted.

In Text.Pandoc.Class.PandocMonad:

- Remove `fetchMediaResource` [API change].

Lua MediaBag module has been changed minimally. In the future
it would be better, probably, to give Lua access to the full
MediaItem type.
---
 src/Text/Pandoc/Class/IO.hs            |  9 ++++---
 src/Text/Pandoc/Class/PandocMonad.hs   | 43 ++++++++++++++--------------------
 src/Text/Pandoc/Lua/Module/MediaBag.hs |  6 ++---
 src/Text/Pandoc/MediaBag.hs            | 35 ++++++++++++++++++++-------
 test/Tests/Readers/Docx.hs             | 10 ++++----
 5 files changed, 55 insertions(+), 48 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index bb4e2b732..f12c0a938 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))
 import Text.Pandoc.Error (PandocError (..))
 import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
 import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory)
 import Text.Pandoc.Walk (walk)
 import qualified Control.Exception as E
 import qualified Data.ByteString as B
@@ -213,14 +213,13 @@ writeMedia :: (PandocMonad m, MonadIO m)
 writeMedia dir mediabag subpath = do
   -- we join and split to convert a/b/c to a\b\c on Windows;
   -- in zip containers all paths use /
-  let fullpath = dir </> unEscapeString (normalise subpath)
   let mbcontents = lookupMedia subpath mediabag
   case mbcontents of
        Nothing -> throwError $ PandocResourceNotFound $ pack subpath
-       Just (_, bs) -> do
-         report $ Extracting $ pack fullpath
+       Just item -> do
+         let fullpath = dir </> mediaPath item
          liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
-         logIOError $ BL.writeFile fullpath bs
+         logIOError $ BL.writeFile fullpath $ mediaContents item
 
 -- | If the given Inline element is an image with a @src@ path equal to
 -- one in the list of @paths@, then prepends @dir@ to the image source;
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index dd6499a73..ae6917e06 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -37,7 +37,6 @@ module Text.Pandoc.Class.PandocMonad
   , setUserDataDir
   , getUserDataDir
   , fetchItem
-  , fetchMediaResource
   , getInputFiles
   , setInputFiles
   , getOutputFile
@@ -57,8 +56,6 @@ module Text.Pandoc.Class.PandocMonad
 import Codec.Archive.Zip
 import Control.Monad.Except (MonadError (catchError, throwError),
                              MonadTrans, lift, when)
-import Data.Digest.Pure.SHA (sha1, showDigest)
-import Data.Maybe (fromMaybe)
 import Data.List (foldl')
 import Data.Time (UTCTime)
 import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds,
@@ -67,7 +64,7 @@ import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime)
 import Network.URI ( escapeURIString, nonStrictRelativeTo,
                      unEscapeString, parseURIReference, isAllowedInURI,
                      parseURI, URI(..) )
-import System.FilePath ((</>), (<.>), takeExtension, dropExtension,
+import System.FilePath ((</>), takeExtension, dropExtension,
                         isRelative, splitDirectories)
 import System.Random (StdGen)
 import Text.Collate.Lang (Lang(..), parseLang, renderLang)
@@ -75,8 +72,8 @@ import Text.Pandoc.Class.CommonState (CommonState (..))
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.Logging
-import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType)
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
+import Text.Pandoc.MIME (MimeType, getMimeType)
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..))
 import Text.Pandoc.Shared (uriPathToPath, safeRead)
 import Text.Pandoc.Translations (Term(..), Translations, lookupTerm,
                                  readTranslations)
@@ -376,7 +373,8 @@ fetchItem :: PandocMonad m
 fetchItem s = do
   mediabag <- getMediaBag
   case lookupMedia (T.unpack s) mediabag of
-    Just (mime, bs) -> return (BL.toStrict bs, Just mime)
+    Just item -> return (BL.toStrict (mediaContents item),
+                         Just (mediaMimeType item))
     Nothing -> downloadOrRead s
 
 -- | Returns the content and, if available, the MIME type of a resource.
@@ -629,19 +627,6 @@ withPaths (p:ps) action fp =
   catchError (action (p </> fp))
              (\_ -> withPaths ps action fp)
 
--- | Fetch local or remote resource (like an image) and provide data suitable
--- for adding it to the MediaBag.
-fetchMediaResource :: PandocMonad m
-              => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString)
-fetchMediaResource src = do
-  (bs, mt) <- fetchItem src
-  let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src)
-                      (mt >>= extensionFromMimeType)
-  let bs' = BL.fromChunks [bs]
-  let basename = showDigest $ sha1 bs'
-  let fname = basename <.> T.unpack ext
-  return (fname, mt, bs')
-
 -- | Traverse tree, filling media bag for any images that
 -- aren't already in the media bag.
 fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc
@@ -649,12 +634,18 @@ fillMediaBag d = walkM handleImage d
   where handleImage :: PandocMonad m => Inline -> m Inline
         handleImage (Image attr lab (src, tit)) = catchError
           (do mediabag <- getMediaBag
-              case lookupMedia (T.unpack src) mediabag of
-                Just (_, _) -> return $ Image attr lab (src, tit)
-                Nothing -> do
-                  (fname, mt, bs) <- fetchMediaResource src
-                  insertMedia fname mt bs
-                  return $ Image attr lab (T.pack fname, tit))
+              let fp = T.unpack src
+              src' <- T.pack <$> case lookupMedia fp mediabag of
+                        Just item -> return $ mediaPath item
+                        Nothing -> do
+                          (bs, mt) <- fetchItem src
+                          insertMedia fp mt (BL.fromStrict bs)
+                          mediabag' <- getMediaBag
+                          case lookupMedia fp mediabag' of
+                             Just item -> return $ mediaPath item
+                             Nothing -> throwError $ PandocSomeError $
+                               src <> " not successfully inserted into MediaBag"
+              return $ Image attr lab (src', tit))
           (\e ->
               case e of
                 PandocResourceNotFound _ -> do
diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs
index 78b699176..3eed50fca 100644
--- a/src/Text/Pandoc/Lua/Module/MediaBag.hs
+++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs
@@ -73,9 +73,9 @@ lookup fp = do
   res <- MB.lookupMedia fp <$> getMediaBag
   liftPandocLua $ case res of
     Nothing -> 1 <$ Lua.pushnil
-    Just (mimeType, contents) -> do
-      Lua.push mimeType
-      Lua.push contents
+    Just item -> do
+      Lua.push $ MB.mediaMimeType item
+      Lua.push $ MB.mediaContents item
       return 2
 
 list :: PandocLua NumResults
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 4a9b4efa1..a65f315fc 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -15,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an
 interface for interacting with it.
 -}
 module Text.Pandoc.MediaBag (
+                     MediaItem(..),
                      MediaBag,
                      deleteMedia,
                      lookupMedia,
@@ -28,15 +29,23 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe)
 import Data.Typeable (Typeable)
 import System.FilePath
-import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
+import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
 import Data.Text (Text)
 import qualified Data.Text as T
+import Data.Digest.Pure.SHA (sha1, showDigest)
+
+data MediaItem =
+  MediaItem
+  { mediaMimeType :: MimeType
+  , mediaPath :: FilePath
+  , mediaContents :: BL.ByteString
+  } deriving (Eq, Ord, Show, Data, Typeable)
 
 -- | A container for a collection of binary resources, with names and
 -- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
 -- can be used for an empty 'MediaBag', and '<>' can be used to append
 -- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString))
+newtype MediaBag = MediaBag (M.Map Text MediaItem)
         deriving (Semigroup, Monoid, Data, Typeable)
 
 instance Show MediaBag where
@@ -62,26 +71,34 @@ insertMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 insertMedia fp mbMime contents (MediaBag mediamap) =
-  MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap)
-  where mime = fromMaybe fallback mbMime
+  MediaBag (M.insert (canonicalize fp) mediaItem mediamap)
+  where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <>
+                                 "." <> ext
+                             , mediaContents = contents
+                             , mediaMimeType = mt }
         fallback = case takeExtension fp of
                         ".gz" -> getMimeTypeDef $ dropExtension fp
                         _     -> getMimeTypeDef fp
+        mt = fromMaybe fallback mbMime
+        ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt
+
 
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
 lookupMedia :: FilePath
             -> MediaBag
-            -> Maybe (MimeType, BL.ByteString)
+            -> Maybe MediaItem
 lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap
 
 -- | Get a list of the file paths stored in a 'MediaBag', with
 -- their corresponding mime types and the lengths in bytes of the contents.
 mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
 mediaDirectory (MediaBag mediamap) =
-  M.foldrWithKey (\fp (mime,contents) ->
-      ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap
+  M.foldrWithKey (\fp item ->
+      ((T.unpack fp, mediaMimeType item,
+        fromIntegral (BL.length (mediaContents item))):)) [] mediamap
 
 mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
 mediaItems (MediaBag mediamap) =
-  M.foldrWithKey (\fp (mime,contents) ->
-      ((T.unpack fp, mime, contents):)) [] mediamap
+  M.foldrWithKey (\fp item ->
+      ((T.unpack fp, mediaMimeType item, mediaContents item):))
+     [] mediamap
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 2cce70cc5..939ff9939 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -24,7 +24,7 @@ import Test.Tasty.HUnit
 import Tests.Helpers
 import Text.Pandoc
 import qualified Text.Pandoc.Class as P
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import qualified Text.Pandoc.MediaBag as MB
 import Text.Pandoc.UTF8 as UTF8
 
 -- We define a wrapper around pandoc that doesn't normalize in the
@@ -91,11 +91,11 @@ getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
 getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath
     ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath
 
-compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
+compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool
 compareMediaPathIO mediaPath mediaBag docxPath = do
   docxMedia <- getMedia docxPath mediaPath
-  let mbBS   = case lookupMedia mediaPath mediaBag of
-                 Just (_, bs) -> bs
+  let mbBS   = case MB.lookupMedia mediaPath mediaBag of
+                 Just item    -> MB.mediaContents item
                  Nothing      -> error ("couldn't find " ++
                                         mediaPath ++
                                         " in media bag")
@@ -110,7 +110,7 @@ compareMediaBagIO docxFile = do
     mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag
     bools <- mapM
              (\(fp, _, _) -> compareMediaPathIO fp mb docxFile)
-             (mediaDirectory mb)
+             (MB.mediaDirectory mb)
     return $ and bools
 
 testMediaBagIO :: String -> FilePath -> IO TestTree
-- 
cgit v1.2.3


From d46ea7d7da3f842d265f09730c90cfc3691576ef Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 16:54:42 +0200
Subject: Jira: add support for "smart" links

Support has been added for the new
`[alias|https://example.com|smart-card]` syntax.
---
 pandoc.cabal                    | 2 +-
 src/Text/Pandoc/Readers/Jira.hs | 2 ++
 src/Text/Pandoc/Writers/Jira.hs | 2 ++
 stack.yaml                      | 2 +-
 test/Tests/Readers/Jira.hs      | 8 ++++++++
 test/Tests/Writers/Jira.hs      | 8 ++++++++
 6 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 4e10e01f3..241196d7c 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -470,7 +470,7 @@ library
                  http-client-tls       >= 0.2.4    && < 0.4,
                  http-types            >= 0.8      && < 0.13,
                  ipynb                 >= 0.1      && < 0.2,
-                 jira-wiki-markup      >= 1.3.5    && < 1.4,
+                 jira-wiki-markup      >= 1.4      && < 1.5,
                  mtl                   >= 2.2      && < 2.3,
                  network               >= 2.6,
                  network-uri           >= 2.6      && < 2.8,
diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs
index a3b415f09..cf111f173 100644
--- a/src/Text/Pandoc/Readers/Jira.hs
+++ b/src/Text/Pandoc/Readers/Jira.hs
@@ -172,6 +172,8 @@ jiraLinkToPandoc linkType alias url =
     Jira.Email      -> link ("mailto:" <> url') "" alias'
     Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias'
     Jira.User       -> linkWith ("", ["user-account"], []) url' "" alias'
+    Jira.SmartCard  -> linkWith ("", ["smart-card"], []) url' "" alias'
+    Jira.SmartLink  -> linkWith ("", ["smart-link"], []) url' "" alias'
 
 -- | Get unicode representation of a Jira icon.
 iconUnicode :: Jira.Icon -> Text
diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs
index cf4dadebc..1351814e9 100644
--- a/src/Text/Pandoc/Writers/Jira.hs
+++ b/src/Text/Pandoc/Writers/Jira.hs
@@ -280,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do
     | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email)
     | "user-account" `elem` classes              = (Jira.User, dropTilde url)
     | "attachment" `elem` classes                = (Jira.Attachment, url)
+    | "smart-card" `elem` classes                = (Jira.SmartCard, url)
+    | "smart-link" `elem` classes                = (Jira.SmartLink, url)
     | otherwise                                  = (Jira.External, url)
   dropTilde txt = case T.uncons txt of
     Just ('~', username) -> username
diff --git a/stack.yaml b/stack.yaml
index 809157425..16e5ad2cf 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -9,7 +9,7 @@ packages:
 extra-deps:
 - hslua-1.3.0
 - hslua-module-path-0.1.0
-- jira-wiki-markup-1.3.5
+- jira-wiki-markup-1.4.0
 - random-1.2.0
 - unicode-collation-0.1.3
 - citeproc-0.4
diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs
index cb7dde4ea..b7194a3b9 100644
--- a/test/Tests/Readers/Jira.hs
+++ b/test/Tests/Readers/Jira.hs
@@ -167,6 +167,14 @@ tests =
       , "user with description" =:
         "[John Doe|~johndoe]" =?>
         para (linkWith ("", ["user-account"], []) "~johndoe" "" "John Doe")
+
+      , "'smart' link" =:
+        "[x|http://example.com|smart-link]" =?>
+        para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x")
+
+      , "'smart' card" =:
+        "[x|http://example.com|smart-card]" =?>
+        para (linkWith ("", ["smart-card"], []) "http://example.com" "" "x")
       ]
 
     , "image" =:
diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs
index d8e856e34..00a7ae931 100644
--- a/test/Tests/Writers/Jira.hs
+++ b/test/Tests/Writers/Jira.hs
@@ -61,6 +61,14 @@ tests =
       , "user link with user as description" =:
         linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?>
         "[~johndoe]"
+
+      , "'smart' link" =:
+        para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") =?>
+        "[x|http://example.com|smart-link]"
+
+      , "'smart' card" =:
+        para (linkWith ("", ["smart-card"], []) "http://example.org" "" "x") =?>
+        "[x|http://example.org|smart-card]"
       ]
 
     , testGroup "spans"
-- 
cgit v1.2.3


From fb40c8109dc969dce74c8153ad1c0d4b33d54a6c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 May 2021 10:07:24 -0700
Subject: Logging: add LoadedResource constructor to LogMessage.

[API change]

This is for INFO-level messages telling where image data has been
loaded from.  (This can vary because of the resource path.)
---
 src/Text/Pandoc/Logging.hs | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 8c7292b69..2642d72ac 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -85,6 +85,7 @@ data LogMessage =
   | CouldNotParseCSS Text
   | Fetching Text
   | Extracting Text
+  | LoadedResource FilePath FilePath
   | NoTitleElement Text
   | NoLangSpecified
   | InvalidLang Text
@@ -195,6 +196,9 @@ instance ToJSON LogMessage where
            ["path" .= fp]
       Extracting fp ->
            ["path" .= fp]
+      LoadedResource orig found ->
+           ["for"  .= orig
+           ,"from" .= found]
       NoTitleElement fallback ->
            ["fallback" .= fallback]
       NoLangSpecified -> []
@@ -309,6 +313,8 @@ showLogMessage msg =
          "Fetching " <> fp <> "..."
        Extracting fp ->
          "Extracting " <> fp <> "..."
+       LoadedResource orig found ->
+         "Loaded " <> Text.pack orig <> " from " <> Text.pack found
        NoTitleElement fallback ->
          "This document format requires a nonempty <title> element.\n" <>
          "Defaulting to '" <> fallback <> "' as the title.\n" <>
@@ -389,6 +395,7 @@ messageVerbosity msg =
        CouldNotParseCSS{}            -> WARNING
        Fetching{}                    -> INFO
        Extracting{}                  -> INFO
+       LoadedResource{}              -> INFO
        NoTitleElement{}              -> WARNING
        NoLangSpecified               -> INFO
        InvalidLang{}                 -> WARNING
-- 
cgit v1.2.3


From f2c1b5746912db945be780961b6503e38c3c7e1e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 May 2021 10:08:30 -0700
Subject: PandocMonad: add info message in `downloadOrRead`...

indicating what path local resources have been loaded from.
---
 src/Text/Pandoc/Class/PandocMonad.hs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index ae6917e06..b5f401619 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE TupleSections #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -409,9 +410,10 @@ downloadOrRead s = do
             _ -> readLocalFile fp -- get from local file system
    where readLocalFile f = do
              resourcePath <- getResourcePath
-             cont <- if isRelative f
-                        then withPaths resourcePath readFileStrict f
-                        else readFileStrict f
+             (fp', cont) <- if isRelative f
+                               then withPaths resourcePath readFileStrict f
+                               else (f,) <$> readFileStrict f
+             report $ LoadedResource f fp'
              return (cont, mime)
          httpcolon = URI{ uriScheme = "http:",
                           uriAuthority = Nothing,
@@ -621,10 +623,11 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
 -- that filepath. Returns the result of the first successful execution
 -- of the action, or throws a @PandocResourceNotFound@ exception if the
 -- action errors for all filepaths.
-withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a
+withPaths :: PandocMonad m
+          => [FilePath] -> (FilePath -> m a) -> FilePath -> m (FilePath, a)
 withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp
 withPaths (p:ps) action fp =
-  catchError (action (p </> fp))
+  catchError ((p </> fp,) <$> action (p </> fp))
              (\_ -> withPaths ps action fp)
 
 -- | Traverse tree, filling media bag for any images that
-- 
cgit v1.2.3


From bb2530caa414234f00e7c89ef18a538708b2297c Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 17:49:48 +0200
Subject: Use haddock-library-1.10.0

---
 pandoc.cabal                       | 2 +-
 src/Text/Pandoc/Readers/Haddock.hs | 3 ++-
 stack.yaml                         | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 241196d7c..c8e49c620 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -461,7 +461,7 @@ library
                  exceptions            >= 0.8      && < 0.11,
                  file-embed            >= 0.0      && < 0.1,
                  filepath              >= 1.1      && < 1.5,
-                 haddock-library       >= 1.8      && < 1.10,
+                 haddock-library       >= 1.10     && < 1.11,
                  hslua                 >= 1.1      && < 1.4,
                  hslua-module-path     >= 0.1.0    && < 0.2.0,
                  hslua-module-system   >= 0.2      && < 0.3,
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 35eaac0a9..67b3af2d3 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -131,7 +131,8 @@ docHToInlines isCode d' =
           DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
           _               -> mempty
     DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
-    DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
+    DocModule s -> B.codeWith ("",["haskell","module"],[]) $
+                   T.pack (modLinkName s)
     DocWarning _ -> mempty -- TODO
     DocEmphasis d -> B.emph (docHToInlines isCode d)
     DocMonospaced (DocString s) -> B.code $ T.pack s
diff --git a/stack.yaml b/stack.yaml
index 16e5ad2cf..ea48fbb88 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -7,6 +7,7 @@ flags:
 packages:
 - '.'
 extra-deps:
+- haddock-library-1.10.0
 - hslua-1.3.0
 - hslua-module-path-0.1.0
 - jira-wiki-markup-1.4.0
-- 
cgit v1.2.3


From 105a50569be6d2c1b37cc290abcbfbae1cd0fd1e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Tue, 25 May 2021 17:49:48 +0200
Subject: Allow compilation with base 4.15

---
 src/Text/Pandoc/Options.hs                   | 34 +++++-----
 src/Text/Pandoc/Readers/Docx/Combine.hs      |  6 +-
 src/Text/Pandoc/Readers/Odt/ContentReader.hs | 11 ++--
 src/Text/Pandoc/Writers/OpenDocument.hs      | 98 +++++++++++++---------------
 4 files changed, 72 insertions(+), 77 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 92bda36b2..85d9aa103 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -315,6 +315,23 @@ defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.j
 defaultKaTeXURL :: Text
 defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/"
 
+-- Update documentation in doc/filters.md if this is changed.
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''TrackChanges)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''WrapOption)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated . drop 8
+                           } ''TopLevelDivision)
+
+$(deriveJSON defaultOptions{ constructorTagModifier =
+                               camelCaseStrToHyphenated
+                           } ''ReferenceLocation)
+
 -- Update documentation in doc/filters.md if this is changed.
 $(deriveJSON defaultOptions ''ReaderOptions)
 
@@ -338,20 +355,3 @@ $(deriveJSON defaultOptions{ constructorTagModifier =
                            } ''ObfuscationMethod)
 
 $(deriveJSON defaultOptions ''HTMLSlideVariant)
-
--- Update documentation in doc/filters.md if this is changed.
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''TrackChanges)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''WrapOption)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated . drop 8
-                           } ''TopLevelDivision)
-
-$(deriveJSON defaultOptions{ constructorTagModifier =
-                               camelCaseStrToHyphenated
-                           } ''ReferenceLocation)
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index 7c6d01769..6e4faa639 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -61,7 +61,7 @@ import Data.List
 import Data.Bifunctor
 import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl
                      , (><), (|>) )
-import Text.Pandoc.Builder
+import Text.Pandoc.Builder as B
 
 data Modifier a = Modifier (a -> a)
                 | AttrModifier (Attr -> a -> a) Attr
@@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of
 
 inlinesL :: Inlines -> (Inlines, Inlines)
 inlinesL ils = case viewl $ unMany ils of
-  (s :< sq) -> (singleton s, Many sq)
+  (s :< sq) -> (B.singleton s, Many sq)
   _         -> (mempty, ils)
 
 inlinesR :: Inlines -> (Inlines, Inlines)
 inlinesR ils = case viewr $ unMany ils of
-  (sq :> s) -> (Many sq, singleton s)
+  (sq :> s) -> (Many sq, B.singleton s)
   _         -> (ils, mempty)
 
 combineInlines :: Inlines -> Inlines -> Inlines
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index c4220b0db..5520d039f 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE Arrows            #-}
+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE DeriveFoldable    #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE PatternGuards     #-}
@@ -33,7 +34,7 @@ import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
-import Data.Semigroup (First(..), Option(..))
+import Data.Monoid (Alt (..))
 
 import Text.TeXMath (readMathML, writeTeX)
 import qualified Text.Pandoc.XML.Light as XML
@@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines
 
 type BlockMatcher  = ElementMatcher Blocks
 
-
-newtype FirstMatch a = FirstMatch (Option (First a))
-                     deriving (Foldable, Monoid, Semigroup)
+newtype FirstMatch a = FirstMatch (Alt Maybe a)
+  deriving (Foldable, Monoid, Semigroup)
 
 firstMatch :: a -> FirstMatch a
-firstMatch = FirstMatch . Option . Just . First
-
+firstMatch = FirstMatch . Alt . Just
 
 --
 matchingElement :: (Monoid e)
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 34a3a4aa5..5f3224c2f 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleContexts  #-}
+{-# LANGUAGE LambdaCase        #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE PatternGuards     #-}
 {-# LANGUAGE ViewPatterns      #-}
@@ -372,36 +373,32 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
 
 -- | Convert a Pandoc block element to OpenDocument.
 blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m (Doc Text)
-blockToOpenDocument o bs
-    | Plain          b <- bs = if null b
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs
-                             = figure attr c s t
-    | Para           b <- bs = if null b &&
-                                    not (isEnabled Ext_empty_paragraphs o)
-                                  then return empty
-                                  else inParagraphTags =<< inlinesToOpenDocument o b
-    | LineBlock      b <- bs = blockToOpenDocument o $ linesToPara b
-    | Div attr xs      <- bs = mkDiv attr xs
-    | Header     i (ident,_,_) b
-                       <- bs = setFirstPara >> (inHeaderTags i ident
-                                  =<< inlinesToOpenDocument o b)
-    | BlockQuote     b <- bs = setFirstPara >> mkBlockQuote b
-    | DefinitionList b <- bs = setFirstPara >> defList b
-    | BulletList     b <- bs = setFirstPara >> bulletListToOpenDocument o b
-    | OrderedList  a b <- bs = setFirstPara >> orderedList a b
-    | CodeBlock    _ s <- bs = setFirstPara >> preformatted s
-    | Table a bc s th tb tf <- bs =  setFirstPara >> table (Ann.toTable a bc s th tb tf)
-    | HorizontalRule   <- bs = setFirstPara >> return (selfClosingTag "text:p"
-                                [ ("text:style-name", "Horizontal_20_Line") ])
-    | RawBlock f     s <- bs = if f == Format "opendocument"
-                                  then return $ text $ T.unpack s
-                                  else do
-                                    report $ BlockNotRendered bs
-                                    return empty
-    | Null             <- bs = return empty
-    | otherwise              = return empty
+blockToOpenDocument o = \case
+    Plain          b -> if null b
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t
+    Para           b -> if null b &&
+                           not (isEnabled Ext_empty_paragraphs o)
+                        then return empty
+                        else inParagraphTags =<< inlinesToOpenDocument o b
+    LineBlock      b -> blockToOpenDocument o $ linesToPara b
+    Div attr xs      -> mkDiv attr xs
+    Header     i (ident,_,_) b -> do
+      setFirstPara
+      inHeaderTags i ident =<< inlinesToOpenDocument o b
+    BlockQuote     b -> setFirstPara >> mkBlockQuote b
+    DefinitionList b -> setFirstPara >> defList b
+    BulletList     b -> setFirstPara >> bulletListToOpenDocument o b
+    OrderedList  a b -> setFirstPara >> orderedList a b
+    CodeBlock    _ s -> setFirstPara >> preformatted s
+    Table a bc s th tb tf -> setFirstPara >> table (Ann.toTable a bc s th tb tf)
+    HorizontalRule   -> setFirstPara >> return (selfClosingTag "text:p"
+                         [ ("text:style-name", "Horizontal_20_Line") ])
+    b@(RawBlock f s) -> if f == Format "opendocument"
+                        then return $ text $ T.unpack s
+                        else empty <$ report (BlockNotRendered b)
+    Null             -> return empty
     where
       defList       b = do setInDefinitionList True
                            r <- vcat  <$> mapM (deflistItemToOpenDocument o) b
@@ -874,27 +871,26 @@ data TextStyle = Italic
 textStyleAttr :: Map.Map Text Text
               -> TextStyle
               -> Map.Map Text Text
-textStyleAttr m s
-    | Italic <- s = Map.insert "fo:font-style" "italic" .
-                    Map.insert "style:font-style-asian" "italic" .
-                    Map.insert "style:font-style-complex" "italic" $ m
-    | Bold   <- s = Map.insert "fo:font-weight" "bold" .
-                    Map.insert "style:font-weight-asian" "bold" .
-                    Map.insert "style:font-weight-complex" "bold" $ m
-    | Under  <- s = Map.insert "style:text-underline-style" "solid" .
-                    Map.insert "style:text-underline-width" "auto" .
-                    Map.insert "style:text-underline-color" "font-color" $ m
-    | Strike <- s = Map.insert "style:text-line-through-style" "solid" m
-    | Sub    <- s = Map.insert "style:text-position" "sub 58%" m
-    | Sup    <- s = Map.insert "style:text-position" "super 58%" m
-    | SmallC <- s = Map.insert "fo:font-variant" "small-caps" m
-    | Pre    <- s = Map.insert "style:font-name" "Courier New" .
-                    Map.insert "style:font-name-asian" "Courier New" .
-                    Map.insert "style:font-name-complex" "Courier New" $ m
-    | Language lang <- s
-                  = Map.insert "fo:language" (langLanguage lang) .
-                    maybe id (Map.insert "fo:country") (langRegion lang) $ m
-    | otherwise   = m
+textStyleAttr m = \case
+  Italic -> Map.insert "fo:font-style" "italic" .
+                Map.insert "style:font-style-asian" "italic" .
+                Map.insert "style:font-style-complex" "italic" $ m
+  Bold   -> Map.insert "fo:font-weight" "bold" .
+                Map.insert "style:font-weight-asian" "bold" .
+                Map.insert "style:font-weight-complex" "bold" $ m
+  Under  -> Map.insert "style:text-underline-style" "solid" .
+                Map.insert "style:text-underline-width" "auto" .
+                Map.insert "style:text-underline-color" "font-color" $ m
+  Strike -> Map.insert "style:text-line-through-style" "solid" m
+  Sub    -> Map.insert "style:text-position" "sub 58%" m
+  Sup    -> Map.insert "style:text-position" "super 58%" m
+  SmallC -> Map.insert "fo:font-variant" "small-caps" m
+  Pre    -> Map.insert "style:font-name" "Courier New" .
+            Map.insert "style:font-name-asian" "Courier New" .
+            Map.insert "style:font-name-complex" "Courier New" $ m
+  Language lang ->
+            Map.insert "fo:language" (langLanguage lang) .
+            maybe id (Map.insert "fo:country") (langRegion lang) $ m
 
 withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
 withLangFromAttr (_,_,kvs) action =
-- 
cgit v1.2.3


From 8d5014fdfc57b80be54a3d23358e92c3b45a7e7d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 25 May 2021 10:36:51 -0700
Subject: Logging: remove single quotes around paths in messages.

We weren't doing it consistently and it seems unnecessary.
---
 src/Text/Pandoc/Logging.hs | 12 ++++++------
 test/command/7099.md       |  2 +-
 test/command/svg.md        |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 2642d72ac..193b8b61c 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -282,7 +282,7 @@ showLogMessage msg =
        ParsingUnescaped s pos ->
          "Parsing unescaped '" <> s <> "' at " <> showPos pos
        CouldNotLoadIncludeFile fp pos ->
-         "Could not load include file '" <> fp <> "' at " <> showPos pos
+         "Could not load include file " <> fp <> " at " <> showPos pos
        MacroAlreadyDefined name pos ->
          "Macro '" <> name <> "' already defined, ignoring at " <> showPos pos
        InlineNotRendered il ->
@@ -294,18 +294,18 @@ showLogMessage msg =
        IgnoredIOError s ->
          "IO Error (ignored): " <> s
        CouldNotFetchResource fp s ->
-         "Could not fetch resource '" <> fp <> "'" <>
+         "Could not fetch resource " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotDetermineImageSize fp s ->
-         "Could not determine image size for '" <> fp <> "'" <>
+         "Could not determine image size for " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotConvertImage fp s ->
-         "Could not convert image '" <> fp <> "'" <>
+         "Could not convert image " <> fp <>
            if Text.null s then "" else ": " <> s
        CouldNotDetermineMimeType fp ->
-         "Could not determine mime type for '" <> fp <> "'"
+         "Could not determine mime type for " <> fp
        CouldNotConvertTeXMath s m ->
-         "Could not convert TeX math '" <> s <> "', rendering as TeX" <>
+         "Could not convert TeX math " <> s <> ", rendering as TeX" <>
            if Text.null m then "" else ":\n" <> m
        CouldNotParseCSS m ->
          "Could not parse CSS" <> if Text.null m then "" else ":\n" <> m
diff --git a/test/command/7099.md b/test/command/7099.md
index 33ac8aea1..467b22a16 100644
--- a/test/command/7099.md
+++ b/test/command/7099.md
@@ -11,7 +11,7 @@
 <iframe src="h:invalid@url"></iframe>
 ^D
 [INFO] Fetching h:invalid@url...
-[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url
+[WARNING] Could not fetch resource h:invalid@url: Could not fetch h:invalid@url
   InvalidUrlException "h:invalid@url" "Invalid scheme"
 [INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29
 []
diff --git a/test/command/svg.md b/test/command/svg.md
index 4ba836b20..57c99db33 100644
--- a/test/command/svg.md
+++ b/test/command/svg.md
@@ -2,7 +2,7 @@
 % pandoc -f latex -t icml
 \includegraphics{command/corrupt.svg}
 ^D
-[WARNING] Could not determine image size for 'command/corrupt.svg': could not determine image type
+[WARNING] Could not determine image size for command/corrupt.svg: could not determine image type
 <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph">
   <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle">
     <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100">
-- 
cgit v1.2.3


From 81eadfd99ad3e905b806cc6c80ab0fea0185286f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 26 May 2021 22:50:35 -0700
Subject: LaTeX reader: improve `\def` and implement `\newif`.

- Improve parsing of `\def` macros.  We previously set "verbatim mode"
  even for parsing the initial `\def`; this caused problems for things
  like
  ```
  \def\foo{\def\bar{BAR}}
  \foo
  \bar
  ```
- Implement `\newif`.
- Add tests.
---
 src/Text/Pandoc/Readers/LaTeX/Macro.hs   | 59 ++++++++++++++++++++++++--------
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 19 +++++++++-
 test/command/newif.md                    | 55 +++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+), 15 deletions(-)
 create mode 100644 test/command/newif.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
index 607f5438c..5495a8e74 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs
@@ -14,6 +14,7 @@ import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline,
 import Control.Applicative ((<|>), optional)
 import qualified Data.Map as M
 import Data.Text (Text)
+import qualified Data.Text as T
 
 macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
 macroDef constructor = do
@@ -22,9 +23,11 @@ macroDef constructor = do
       guardDisabled Ext_latex_macros)
      <|> return mempty
   where commandDef = do
-          (name, macro') <- newcommand <|> letmacro <|> defmacro
+          nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif
           guardDisabled Ext_latex_macros <|>
-           updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) })
+           mapM_ (\(name, macro') ->
+                   updateState (\s -> s{ sMacros = M.insert name macro'
+                                          (sMacros s) })) nameMacroPairs
         environmentDef = do
           mbenv <- newenvironment
           case mbenv of
@@ -40,7 +43,7 @@ macroDef constructor = do
         -- @\newcommand{\envname}[n-args][default]{begin}@
         -- @\newcommand{\endenvname}@
 
-letmacro :: PandocMonad m => LP m (Text, Macro)
+letmacro :: PandocMonad m => LP m [(Text, Macro)]
 letmacro = do
   controlSeq "let"
   (name, contents) <- withVerbatimMode $ do
@@ -53,18 +56,47 @@ letmacro = do
     contents <- bracedOrToken
     return (name, contents)
   contents' <- doMacros' 0 contents
-  return (name, Macro ExpandWhenDefined [] Nothing contents')
+  return [(name, Macro ExpandWhenDefined [] Nothing contents')]
 
-defmacro :: PandocMonad m => LP m (Text, Macro)
-defmacro = try $
+defmacro :: PandocMonad m => LP m [(Text, Macro)]
+defmacro = do
   -- we use withVerbatimMode, because macros are to be expanded
   -- at point of use, not point of definition
+  controlSeq "def"
   withVerbatimMode $ do
-    controlSeq "def"
     Tok _ (CtrlSeq name) _ <- anyControlSeq
     argspecs <- many (argspecArg <|> argspecPattern)
     contents <- bracedOrToken
-    return (name, Macro ExpandWhenUsed argspecs Nothing contents)
+    return [(name, Macro ExpandWhenUsed argspecs Nothing contents)]
+
+-- \newif\iffoo' defines:
+-- \iffoo to be \iffalse
+-- \footrue to be a command that defines \iffoo to be \iftrue
+-- \foofalse to be a command that defines \iffoo to be \iffalse
+newif :: PandocMonad m => LP m [(Text, Macro)]
+newif = do
+  controlSeq "newif"
+  withVerbatimMode $ do
+    Tok pos (CtrlSeq name) _ <- anyControlSeq
+    -- \def\iffoo\iffalse
+    -- \def\footrue{\def\iffoo\iftrue}
+    -- \def\foofalse{\def\iffoo\iffalse}
+    let base = T.drop 2 name
+    return [ (name, Macro ExpandWhenUsed [] Nothing
+                    [Tok pos (CtrlSeq "iffalse") "\\iffalse"])
+           , (base <> "true",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iftrue") "\\iftrue"
+                   ])
+           , (base <> "false",
+                   Macro ExpandWhenUsed [] Nothing
+                   [ Tok pos (CtrlSeq "def") "\\def"
+                   , Tok pos (CtrlSeq name) ("\\" <> name)
+                   , Tok pos (CtrlSeq "iffalse") "\\iffalse"
+                   ])
+           ]
 
 argspecArg :: PandocMonad m => LP m ArgSpec
 argspecArg = do
@@ -77,10 +109,9 @@ argspecPattern =
                               (toktype' == Symbol || toktype' == Word) &&
                               (txt /= "{" && txt /= "\\" && txt /= "}")))
 
-newcommand :: PandocMonad m => LP m (Text, Macro)
+newcommand :: PandocMonad m => LP m [(Text, Macro)]
 newcommand = do
-  pos <- getPosition
-  Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
+  Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|>
                              controlSeq "renewcommand" <|>
                              controlSeq "providecommand" <|>
                              controlSeq "DeclareMathOperator" <|>
@@ -112,9 +143,9 @@ newcommand = do
         Just macro
           | mtype == "newcommand" -> do
               report $ MacroAlreadyDefined txt pos
-              return (name, macro)
-          | mtype == "providecommand" -> return (name, macro)
-        _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
+              return [(name, macro)]
+          | mtype == "providecommand" -> return [(name, macro)]
+        _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)]
 
 newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro))
 newenvironment = do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 1c77eb299..a17b1f324 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -113,7 +113,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
                                         ArgSpec (..), Tok (..), TokType (..))
 import Text.Pandoc.Shared
 import Text.Parsec.Pos
--- import Debug.Trace
 
 newtype DottedNum = DottedNum [Int]
   deriving (Show, Eq)
@@ -563,8 +562,26 @@ trySpecialMacro "xspace" ts = do
     Tok pos Word t : _
       | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts'
     _ -> return ts'
+trySpecialMacro "iftrue" ts = handleIf True ts
+trySpecialMacro "iffalse" ts = handleIf False ts
 trySpecialMacro _ _ = mzero
 
+handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok]
+handleIf b ts = do
+  res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts
+  case res' of
+    Left _ -> Prelude.fail "Could not parse conditional"
+    Right ts' -> return ts'
+
+ifParser :: PandocMonad m => Bool -> LP m [Tok]
+ifParser b = do
+  ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi")
+                    *> anyTok)
+  elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi"))
+                 <|> ([] <$ controlSeq "fi")
+  rest <- getInput
+  return $ (if b then ifToks else elseToks) ++ rest
+
 startsWithAlphaNum :: Text -> Bool
 startsWithAlphaNum t =
   case T.uncons t of
diff --git a/test/command/newif.md b/test/command/newif.md
new file mode 100644
index 000000000..f444f14c9
--- /dev/null
+++ b/test/command/newif.md
@@ -0,0 +1,55 @@
+```
+% pandoc -f latex -t plain
+\iftrue
+should print
+\iftrue
+should print
+\else
+should not print
+\fi
+\else
+should not print
+\fi
+
+\iffalse
+should not print
+\else
+\iftrue
+should print
+\else
+should not print
+\fi
+\fi
+
+\newif\ifepub
+
+\ifepub
+should not print
+\fi
+
+\epubtrue
+
+\ifepub
+should print
+\else
+should not print
+\fi
+
+\epubfalse
+
+\ifepub
+should not print
+\else
+should print
+\fi
+^D
+should print
+
+should print
+
+should print
+
+should print
+
+should print
+```
-- 
cgit v1.2.3


From 834da53058069fe50da510fa86e0807a7ff7868f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 24 May 2021 10:17:37 -0700
Subject: Add `rebase_relative_paths` extension.

- Add manual entry for (non-default) extension
  `rebase_relative_paths`.
- Add constructor `Ext_rebase_relative_paths` to `Extensions`
  in Text.Pandoc.Extensions [API change]. When enabled, this
  extension rewrites relative image and link paths by prepending
  the (relative) directory of the containing file.
- Make Markdown reader sensitive to the new extension.
- Add tests for #3752.

Closes #3752.

NB. currently the extension applies to markdown and associated
readers but not commonmark/gfm.
---
 MANUAL.txt                          |  28 ++++++++++++++++++++++++++--
 pandoc.cabal                        |   4 ++++
 src/Text/Pandoc/Extensions.hs       |   3 +++
 src/Text/Pandoc/Readers/Markdown.hs |  36 +++++++++++++++++++++++++++++-------
 test/command/3752.md                |  35 +++++++++++++++++++++++++++++++++++
 test/command/chap1/spider.png       | Bin 0 -> 63531 bytes
 test/command/chap1/text.md          |  11 +++++++++++
 test/command/chap2/spider.png       | Bin 0 -> 9861 bytes
 test/command/chap2/text.md          |   3 +++
 9 files changed, 111 insertions(+), 9 deletions(-)
 create mode 100644 test/command/3752.md
 create mode 100644 test/command/chap1/spider.png
 create mode 100644 test/command/chap1/text.md
 create mode 100644 test/command/chap2/spider.png
 create mode 100644 test/command/chap2/text.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 6dc783e8c..48bf60d6e 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3755,7 +3755,7 @@ definition:
 Note that space between items in a definition list is required.
 (A variant that loosens this requirement, but disallows "lazy"
 hard wrapping, can be activated with `compact_definition_lists`: see
-[Non-pandoc extensions], below.)
+[Non-default extensions], below.)
 
 [^3]:  I have been influenced by the suggestions of [David
   Wheeler](https://justatheory.com/2009/02/modest-markdown-proposal/).
@@ -5051,13 +5051,37 @@ author-in-text style inside notes when using a note style.
 [finding and editing styles]: https://citationstyles.org/authors/
 [CSL locale files]: https://github.com/citation-style-language/locales
 
-## Non-pandoc extensions
+## Non-default extensions
 
 The following Markdown syntax extensions are not enabled by default
 in pandoc, but may be enabled by adding `+EXTENSION` to the format
 name, where `EXTENSION` is the name of the extension.  Thus, for
 example, `markdown+hard_line_breaks` is Markdown with hard line breaks.
 
+#### Extension: `rebase_relative_paths` ####
+
+Rewrite relative paths for Markdown links and images, depending
+on the path of the file containing the link or image link.  For
+each link or image, pandoc will compute the directory of the
+containing file, relative to the working directory, and prepend
+the resulting path to the link or image path.
+
+The use of this extension is best understood by example.
+Suppose you have a a subdirectory for each chapter of a book,
+`chap1`, `chap2`, `chap3`. Each contains a file `text.md` and a
+number of images used in the chapter.  You would like to have
+`![image](spider.jpg)` in `chap1/text.md` refer to
+`chap1/spider.jpg` and `![image](spider.jpg)` in `chap2/text.md`
+refer to `chap2/spider.jpg`.  To do this, use
+
+    pandoc chap*/*.md -f markdown+rebase_relative_paths
+
+Without this extension, you would have to use
+`![image](chap1/spider.jpg)` in `chap1/text.md` and
+`![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
+relative paths will be rewritten in the same way as images.
+*This option currently only affects Markdown input.*
+
 #### Extension: `attributes` ####
 
 Allows attributes to be attached to any inline or block-level
diff --git a/pandoc.cabal b/pandoc.cabal
index c74df3e69..c8343d16e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -214,6 +214,10 @@ extra-source-files:
                  test/command/C.txt
                  test/command/D.txt
                  test/command/01.csv
+                 test/command/chap1/spider.png
+                 test/command/chap2/spider.png
+                 test/command/chap1/text.md
+                 test/command/chap2/text.md
                  test/command/defaults1.yaml
                  test/command/defaults2.yaml
                  test/command/defaults3.yaml
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 6423d5f56..c4d54c06e 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -136,6 +136,8 @@ data Extension =
     | Ext_raw_html            -- ^ Allow raw HTML
     | Ext_raw_tex             -- ^ Allow raw TeX (other than math)
     | Ext_raw_markdown        -- ^ Parse markdown in ipynb as raw markdown
+    | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths,
+                                -- relative to directory of containing file
     | Ext_shortcut_reference_links -- ^ Shortcut reference links
     | Ext_simple_tables       -- ^ Pandoc-style simple tables
     | Ext_smart               -- ^ "Smart" quotes, apostrophes, ellipses, dashes
@@ -462,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f
        , Ext_gutenberg
        , Ext_smart
        , Ext_literate_haskell
+       , Ext_rebase_relative_paths
        ]
   getAll "markdown_strict"   = allMarkdownExtensions
   getAll "markdown_phpextra" = allMarkdownExtensions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34f16ab4e..968c6c165 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -1836,9 +1836,12 @@ regLink :: PandocMonad m
         -> MarkdownParser m (F Inlines)
 regLink constructor lab = try $ do
   (src, tit) <- source
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
+  let src' = if rebase then rebasePath pos src else src
   attr <- option nullAttr $
           guardEnabled Ext_link_attributes >> attributes
-  return $ constructor attr src tit <$> lab
+  return $ constructor attr src' tit <$> lab
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: PandocMonad m
@@ -1854,6 +1857,8 @@ referenceLink constructor (lab, raw) = do
                          return (mempty, "")))
       <|>
       try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  pos <- getPosition
   when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
   let labIsRef = raw' == "" || raw' == "[]"
   let key = toKey $ if labIsRef then raw else raw'
@@ -1878,7 +1883,9 @@ referenceLink constructor (lab, raw) = do
                    Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
                    Nothing              -> makeFallback
             else makeFallback
-       Just ((src,tit), attr) -> constructor attr src tit <$> lab
+       Just ((src,tit), attr) ->
+          let src' = if rebase then rebasePath pos src else src
+           in constructor attr src' tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
@@ -1911,15 +1918,30 @@ autoLink = try $ do
   return $ return $ B.linkWith attr (src <> escapeURI extra) ""
                      (B.str $ orig <> extra)
 
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position.  Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+  let fp = sourceName pos
+   in if isAbsolute (T.unpack path) || isURI path
+         then path
+         else
+           case takeDirectory fp of
+             ""  -> path
+             "." -> path
+             d   -> T.pack d <> "/" <> path
+
 image :: PandocMonad m => MarkdownParser m (F Inlines)
 image = try $ do
   char '!'
   (lab,raw) <- reference
   defaultExt <- getOption readerDefaultImageExtension
-  let constructor attr' src = case takeExtension (T.unpack src) of
-                          "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
-                                                          $ T.unpack defaultExt)
-                          _  -> B.imageWith attr' src
+  let constructor attr' src =
+         case takeExtension (T.unpack src) of
+            "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+                                            $ T.unpack defaultExt)
+            _  -> B.imageWith attr' src
   regLink constructor lab <|> referenceLink constructor (lab,raw)
 
 note :: PandocMonad m => MarkdownParser m (F Inlines)
diff --git a/test/command/3752.md b/test/command/3752.md
new file mode 100644
index 000000000..76d51989b
--- /dev/null
+++ b/test/command/3752.md
@@ -0,0 +1,35 @@
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
+^D
+[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
+[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
+Chapter one
+
+A spider: [spider]
+
+The moon: [moon]
+
+Link to spider picture.
+
+URL left alone: manual.
+
+Absolute path left alone: absolute.
+
+Chapter two
+
+A spider: [spider]
+```
+
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths -t html
+^D
+<h1 id="chapter-one">Chapter one</h1>
+<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
+<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
+<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
+<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<h1 id="chapter-two">Chapter two</h1>
+<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
+```
diff --git a/test/command/chap1/spider.png b/test/command/chap1/spider.png
new file mode 100644
index 000000000..7ee9fe339
Binary files /dev/null and b/test/command/chap1/spider.png differ
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
new file mode 100644
index 000000000..88b30313d
--- /dev/null
+++ b/test/command/chap1/text.md
@@ -0,0 +1,11 @@
+# Chapter one
+
+A spider: ![spider](spider.png)
+
+The moon: ![moon](../../lalune.jpg)
+
+Link to [spider picture](spider.png).
+
+URL left alone: [manual](https://pandoc.org/MANUAL.html).
+
+Absolute path left alone: [absolute](/foo/bar/baz.png).
diff --git a/test/command/chap2/spider.png b/test/command/chap2/spider.png
new file mode 100644
index 000000000..5377e940b
Binary files /dev/null and b/test/command/chap2/spider.png differ
diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md
new file mode 100644
index 000000000..435a266d7
--- /dev/null
+++ b/test/command/chap2/text.md
@@ -0,0 +1,3 @@
+# Chapter two
+
+A spider: ![spider](spider.png)
-- 
cgit v1.2.3


From cbe16b2866abd8d0e4c15f027562b4b2bed3f01e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 10:49:45 -0700
Subject: Citeproc: Don't detect math elements as locators.

Closes #7321.
---
 MANUAL.txt                          |  2 +-
 src/Text/Pandoc/Citeproc/Locator.hs |  7 +++++++
 test/command/7321.md                | 24 ++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7321.md

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 48bf60d6e..2ca50e44a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5015,7 +5015,7 @@ the suffix as locator by prepending curly braces:
 
     [@smith{ii, A, D-Z}, with a suffix]
     [@smith, {pp. iv, vi-xi, (xv)-(xvii)} with suffix here]
-    [@smith{}, $a^2$ and following]
+    [@smith{}, 99 years later]
 
 A minus sign (`-`) before the `@` will suppress mention of
 the author in the citation.  This can be useful when the
diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index 44416ca12..dbedc08d9 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -181,6 +181,7 @@ pPageUnit = roman <|> plainUnit
       plainUnit = do
           ts <- many1 (notFollowedBy pSpace >>
                        notFollowedBy pLocatorPunct >>
+                       notFollowedBy pMath >>
                        anyToken)
           let s = stringify ts
           -- otherwise look for actual digits or -s
@@ -211,6 +212,12 @@ pMatchChar msg f = satisfyTok f' <?> msg
 pSpace :: LocatorParser Inline
 pSpace = satisfyTok (\t -> isSpacey t || t == Str "\160") <?> "space"
 
+pMath :: LocatorParser Inline
+pMath = satisfyTok isMath
+ where
+  isMath (Math{}) = True
+  isMath _ = False
+
 satisfyTok :: (Inline -> Bool) -> LocatorParser Inline
 satisfyTok f = tokenPrim show (\sp _ _ -> sp) (\tok -> if f tok
                                                           then Just tok
diff --git a/test/command/7321.md b/test/command/7321.md
new file mode 100644
index 000000000..f5e644bee
--- /dev/null
+++ b/test/command/7321.md
@@ -0,0 +1,24 @@
+```
+% pandoc -t plain --citeproc --wrap=none
+---
+references:
+- id: fenner2012a
+  title: One-click science marketing
+  author:
+  - family: Fenner
+    given: Martin
+  container-title: Nature Materials
+  volume: 11
+  issue: 4
+  publisher: Nature Publishing Group
+  type: article-journal
+  issued:
+    year: 2012
+---
+
+[@fenner2012a, $a^2+b^2=c^2$]
+^D
+(Fenner 2012, a² + b² = c²)
+
+Fenner, Martin. 2012. “One-Click Science Marketing.” Nature Materials 11 (4).
+```
-- 
cgit v1.2.3


From 6972a7dc9158c1d56b2dc9a06f596386f6b30860 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 11:26:38 -0700
Subject: Modify rebase_reference_links treatment of reference links/images.

The directory is based on the file containing the link
reference, not the file containing the link, if these differ.
---
 MANUAL.txt                          | 6 +++++-
 src/Text/Pandoc/Readers/Markdown.hs | 9 ++++-----
 test/command/3752.md                | 5 ++++-
 test/command/chap1/text.md          | 2 ++
 test/command/chap2/text.md          | 2 ++
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 2ca50e44a..d755c0c00 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5080,7 +5080,11 @@ Without this extension, you would have to use
 `![image](chap1/spider.jpg)` in `chap1/text.md` and
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
-*This option currently only affects Markdown input.*
+
+Note that relative paths in reference links and images will
+be rewritten relative to the file containing the link
+reference definition, not the file containing the reference link
+or image itself, if these differ.
 
 #### Extension: `attributes` ####
 
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 968c6c165..e5cbadc94 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -336,7 +336,9 @@ referenceKey = try $ do
                     notFollowedBy' (() <$ reference)
                     many1Char $ notFollowedBy space >> litChar
   let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
-  src <- try betweenAngles <|> sourceURL
+  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+  src <- (if rebase then rebasePath pos else id) <$>
+             (try betweenAngles <|> sourceURL)
   tit <- option "" referenceTitle
   attr   <- option nullAttr $ try $
               do guardEnabled Ext_link_attributes
@@ -1857,8 +1859,6 @@ referenceLink constructor (lab, raw) = do
                          return (mempty, "")))
       <|>
       try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
-  rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
-  pos <- getPosition
   when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
   let labIsRef = raw' == "" || raw' == "[]"
   let key = toKey $ if labIsRef then raw else raw'
@@ -1884,8 +1884,7 @@ referenceLink constructor (lab, raw) = do
                    Nothing              -> makeFallback
             else makeFallback
        Just ((src,tit), attr) ->
-          let src' = if rebase then rebasePath pos src else src
-           in constructor attr src' tit <$> lab
+           constructor attr src tit <$> lab
 
 dropBrackets :: Text -> Text
 dropBrackets = dropRB . dropLB
diff --git a/test/command/3752.md b/test/command/3752.md
index 76d51989b..6d31eb450 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -2,12 +2,14 @@
 % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
 ^D
 [INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
-[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
 [INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
 Chapter one
 
 A spider: [spider]
 
+Another spider: [another spider]
+
 The moon: [moon]
 
 Link to spider picture.
@@ -26,6 +28,7 @@ A spider: [spider]
 ^D
 <h1 id="chapter-one">Chapter one</h1>
 <p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p>
 <p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
 <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index 88b30313d..d25514241 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -2,6 +2,8 @@
 
 A spider: ![spider](spider.png)
 
+Another spider: ![another spider][refspider]
+
 The moon: ![moon](../../lalune.jpg)
 
 Link to [spider picture](spider.png).
diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md
index 435a266d7..082a1d79e 100644
--- a/test/command/chap2/text.md
+++ b/test/command/chap2/text.md
@@ -1,3 +1,5 @@
 # Chapter two
 
 A spider: ![spider](spider.png)
+
+[refspider]: spider.png
-- 
cgit v1.2.3


From 0661ce699fe83c64dd5a5874e8ab17241b19550c Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 13:53:26 -0700
Subject: rebase_relative_paths extension: don't change fragment paths.

We don't want a pure fragment path to be rewritten, since
these are used for cross-referencing.
---
 MANUAL.txt                          | 3 +++
 src/Text/Pandoc/Readers/Markdown.hs | 3 ++-
 test/command/3752.md                | 3 +++
 test/command/chap1/text.md          | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index d755c0c00..fb1a76e9a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5081,6 +5081,9 @@ Without this extension, you would have to use
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
 
+Absolute paths and URLs are not changed.  Neither are paths
+consisting entirely of a fragment, e.g. `[section one](#foo)`.
+
 Note that relative paths in reference links and images will
 be rewritten relative to the file containing the link
 reference definition, not the file containing the reference link
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e5cbadc94..74f2668e4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1923,7 +1923,8 @@ autoLink = try $ do
 rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
-   in if isAbsolute (T.unpack path) || isURI path
+      isFragment = T.take 1 path == "#"
+   in if isFragment || isAbsolute (T.unpack path) || isURI path
          then path
          else
            case takeDirectory fp of
diff --git a/test/command/3752.md b/test/command/3752.md
index 6d31eb450..2765d79ac 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -18,6 +18,8 @@ URL left alone: manual.
 
 Absolute path left alone: absolute.
 
+Link to fragment: chapter two.
+
 Chapter two
 
 A spider: [spider]
@@ -33,6 +35,7 @@ A spider: [spider]
 <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
 <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index d25514241..68a317161 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -11,3 +11,5 @@ Link to [spider picture](spider.png).
 URL left alone: [manual](https://pandoc.org/MANUAL.html).
 
 Absolute path left alone: [absolute](/foo/bar/baz.png).
+
+Link to fragment: [chapter two](#chapter-two).
-- 
cgit v1.2.3


From 4b16d181e7219ed161a0e03c0c5ee9dec4b526b4 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 14:16:37 -0700
Subject: rebase_relative_paths: leave empty paths unchanged.

---
 MANUAL.txt                          | 4 ++--
 src/Text/Pandoc/Readers/Markdown.hs | 2 +-
 test/command/3752.md                | 3 +++
 test/command/chap1/text.md          | 2 ++
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index fb1a76e9a..c234debc9 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -5081,8 +5081,8 @@ Without this extension, you would have to use
 `![image](chap2/spider.jpg)` in `chap2/text.md`.  Links with
 relative paths will be rewritten in the same way as images.
 
-Absolute paths and URLs are not changed.  Neither are paths
-consisting entirely of a fragment, e.g. `[section one](#foo)`.
+Absolute paths and URLs are not changed.  Neither are empty
+paths or paths consisting entirely of a fragment, e.g., `#foo`.
 
 Note that relative paths in reference links and images will
 be rewritten relative to the file containing the link
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 74f2668e4..bc5e3e30f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1924,7 +1924,7 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-   in if isFragment || isAbsolute (T.unpack path) || isURI path
+   in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path
          then path
          else
            case takeDirectory fp of
diff --git a/test/command/3752.md b/test/command/3752.md
index 2765d79ac..df8af0ba1 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -20,6 +20,8 @@ Absolute path left alone: absolute.
 
 Link to fragment: chapter two.
 
+Empty path: empty.
+
 Chapter two
 
 A spider: [spider]
@@ -36,6 +38,7 @@ A spider: [spider]
 <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
 <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
 <p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
+<p>Empty path: <a href="">empty</a>.</p>
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
index 68a317161..54f0104de 100644
--- a/test/command/chap1/text.md
+++ b/test/command/chap1/text.md
@@ -13,3 +13,5 @@ URL left alone: [manual](https://pandoc.org/MANUAL.html).
 Absolute path left alone: [absolute](/foo/bar/baz.png).
 
 Link to fragment: [chapter two](#chapter-two).
+
+Empty path: [empty]().
-- 
cgit v1.2.3


From 4842c5fb828c3c34d816fa7bccd4656857742a0b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 27 May 2021 18:28:52 -0700
Subject: Two citeproc locator/suffix improvements:

- Recognize locators spelled with a capital letter.
  Closes #7323.
- Add a comma and a space in front of the suffix if it doesn't start
  with space or punctuation.  Closes #7324.
---
 src/Text/Pandoc/Citeproc/Locator.hs | 14 +++++++++++---
 test/command/7323.md                | 29 +++++++++++++++++++++++++++++
 test/command/7324.md                | 25 +++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 3 deletions(-)
 create mode 100644 test/command/7323.md
 create mode 100644 test/command/7324.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs
index dbedc08d9..f8931d7b5 100644
--- a/src/Text/Pandoc/Citeproc/Locator.hs
+++ b/src/Text/Pandoc/Citeproc/Locator.hs
@@ -20,7 +20,7 @@ parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline])
 parseLocator locale inp =
   case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of
        Right r -> r
-       Left _  -> (Nothing, inp)
+       Left _  -> (Nothing, maybeAddComma inp)
 
 splitInp :: [Inline] -> [Inline]
 splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':'))
@@ -42,9 +42,17 @@ pLocatorWords locMap = do
   -- i.e. the first one will be " 9"
   return $
     if T.null la && T.null lo
-       then (Nothing, s)
+       then (Nothing, maybeAddComma s)
        else (Just (la, T.strip lo), s)
 
+maybeAddComma :: [Inline] -> [Inline]
+maybeAddComma [] = []
+maybeAddComma ils@(Space : _) = ils
+maybeAddComma ils@(Str t : _)
+  | Just (c, _) <- T.uncons t
+  , isPunctuation c = ils
+maybeAddComma ils = Str "," : Space : ils
+
 pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text)
 pLocatorDelimited locMap = try $ do
   _ <- pMatchChar "{" (== '{')
@@ -97,7 +105,7 @@ pLocatorLabel' locMap lim = go ""
           t <- anyToken
           ts <- manyTill anyToken (try $ lookAhead lim)
           let s = acc <> stringify (t:ts)
-          case M.lookup (T.strip s) locMap of
+          case M.lookup (T.toCaseFold $ T.strip s) locMap of
             -- try to find a longer one, or return this one
             Just l -> go s <|> return (l, False)
             Nothing -> go s
diff --git a/test/command/7323.md b/test/command/7323.md
new file mode 100644
index 000000000..7de29cfe2
--- /dev/null
+++ b/test/command/7323.md
@@ -0,0 +1,29 @@
+```
+% pandoc --citeproc -t plain
+---
+references:
+- id: smith
+  author: John Smith
+  issued: 2019
+  title: Insects
+  type: book
+...
+
+@smith [chap. 6]
+
+@smith [chapter 6]
+
+@smith [Chap. 6]
+
+@smith [Chapter 6]
+^D
+John Smith (2019, chap. 6)
+
+John Smith (2019, chap. 6)
+
+John Smith (2019, chap. 6)
+
+John Smith (2019, chap. 6)
+
+John Smith. 2019. Insects.
+```
diff --git a/test/command/7324.md b/test/command/7324.md
new file mode 100644
index 000000000..fae1b9923
--- /dev/null
+++ b/test/command/7324.md
@@ -0,0 +1,25 @@
+```
+% pandoc --citeproc -t plain
+---
+references:
+- id: smith
+  author: John Smith
+  issued: 2019
+  title: Insects
+  type: book
+...
+
+@smith [, among others]
+
+@smith [ among others]
+
+@smith [among others]
+^D
+John Smith (2019, among others)
+
+John Smith (2019 among others)
+
+John Smith (2019, among others)
+
+John Smith. 2019. Insects.
+```
-- 
cgit v1.2.3


From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001
From: Emily Bourke <undergroundquizscene@gmail.com>
Date: Sun, 11 Apr 2021 21:42:53 +0100
Subject: Docx reader: Read table column widths.

---
 src/Text/Pandoc/Readers/Docx.hs             |   5 +-
 src/Text/Pandoc/Readers/Docx/Parse.hs       |   2 +-
 test/Tests/Writers/Docx.hs                  |   5 ++
 test/docx/0_level_headers.native            |   4 +-
 test/docx/golden/table_one_row.docx         | Bin 9840 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx  | Bin 10159 -> 10162 bytes
 test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes
 test/docx/golden/tables.docx                | Bin 10200 -> 10202 bytes
 test/docx/sdt_elements.native               |   6 +-
 test/docx/table_one_row.native              |   8 +--
 test/docx/table_variable_width.native       |  12 ++--
 test/docx/table_with_list_cell.native       |   6 +-
 test/docx/tables-default-widths.native      |  92 ++++++++++++++++++++++++++++
 test/docx/tables.native                     |  18 +++---
 14 files changed, 128 insertions(+), 30 deletions(-)
 create mode 100644 test/docx/golden/tables-default-widths.docx
 create mode 100644 test/docx/tables-default-widths.native

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 22dd54193..375bb7338 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
     bodyPartToBlocks $ Paragraph pPr' parparts
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
+bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do
   let cap' = simpleCaption $ plain $ text cap
       (hdr, rows) = case firstRowFormatting look of
         True | null rs -> (Nothing, [r])
@@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
       -- so should be possible. Alignment might be more difficult,
       -- since there doesn't seem to be a column entity in docx.
   let alignments = replicate width AlignDefault
-      widths = replicate width ColWidthDefault
+      totalWidth = sum grid
+      widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 7325ff300..978d6ff3a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
 elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
   let cols = findChildrenByName ns "w" "gridCol" element
   in
-   mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+   mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
    cols
 elemToTblGrid _ _ = throwError WrongElem
 
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 2e0f1e3fb..da25b95e0 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -111,6 +111,11 @@ tests = [ testGroup "inlines"
             def
             "docx/tables.native"
             "docx/golden/tables.docx"
+          , docxTest
+            "tables without explicit column widths"
+            def
+            "docx/tables-default-widths.native"
+            "docx/golden/tables-default-widths.docx"
           , docxTest
             "tables with lists in cells"
             def
diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native
index 7f875891e..ed589b029 100644
--- a/test/docx/0_level_headers.native
+++ b/test/docx/0_level_headers.native
@@ -1,6 +1,6 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 1.0)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -49,4 +49,4 @@
 ,Para [Strong [Str "Table",Space,Str "Page"]]
 ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
 ,Header 1 ("introduction",[],[]) [Str "Introduction"]
-,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
\ No newline at end of file
+,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index f75e567ab..a7a8f2519 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index a49f70643..1362d4609 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx
new file mode 100644
index 000000000..f24e27516
Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index f24e27516..9dcbbc9d0 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native
index dca82f0a0..a072c0d39 100644
--- a/test/docx/sdt_elements.native
+++ b/test/docx/sdt_elements.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.5586085939783689)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native
index e9188b145..88d5e3af5 100644
--- a/test/docx/table_one_row.native
+++ b/test/docx/table_one_row.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -15,4 +15,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "Table"]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native
index 229cb83b1..43ac40cca 100644
--- a/test/docx/table_variable_width.native
+++ b/test/docx/table_variable_width.native
@@ -1,10 +1,10 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 2.0096205237840725e-2)
+ ,(AlignDefault,ColWidth 1.9882415820416888e-2)
+ ,(AlignDefault,ColWidth 0.22202030999465527)
+ ,(AlignDefault,ColWidth 0.4761090326028862)
+ ,(AlignDefault,ColWidth 1.0689470871191876e-4)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -42,4 +42,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native
index 06d8606da..51a35184b 100644
--- a/test/docx/table_with_list_cell.native
+++ b/test/docx/table_with_list_cell.native
@@ -1,7 +1,7 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -22,4 +22,4 @@
      ,[Para [Str "A"]]
      ,[Para [Str "Numbered",Space,Str "list."]]]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native
new file mode 100644
index 000000000..e541e5a6e
--- /dev/null
+++ b/test/docx/tables-default-widths.native
@@ -0,0 +1,92 @@
+[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Name"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Game"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Fame"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Blame"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Lebron",Space,Str "James"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Basketball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Very",Space,Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Leaving",Space,Str "Cleveland"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Ryan",Space,Str "Braun"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Baseball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Moderate"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Steroids"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Russell",Space,Str "Wilson"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Football"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Tacky",Space,Str "uniform"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Sinple"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Table"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Without"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Header"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Simple"]
+    ,Para [Str "Multiparagraph"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Table"]
+    ,Para [Str "Full"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Of"]
+    ,Para [Str "Paragraphs"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "In",Space,Str "each"]
+    ,Para [Str "Cell."]]]])]
+ (TableFoot ("",[],[])
+ [])]
\ No newline at end of file
diff --git a/test/docx/tables.native b/test/docx/tables.native
index e541e5a6e..5a89496be 100644
--- a/test/docx/tables.native
+++ b/test/docx/tables.native
@@ -1,10 +1,10 @@
 [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -48,8 +48,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -68,8 +68,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -89,4 +89,4 @@
     [Para [Str "In",Space,Str "each"]
     ,Para [Str "Cell."]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
-- 
cgit v1.2.3


From 56b211120c62a01f8aba1c4512acfe4677d8c7d0 Mon Sep 17 00:00:00 2001
From: Emily Bourke <undergroundquizscene@protonmail.com>
Date: Thu, 18 Jun 2020 09:53:32 +0100
Subject: Docx reader: Support new table features.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Column spans
* Row spans
  - The spec says that if the `val` attribute is ommitted, its value
    should be assumed to be `continue`, and that its values are
    restricted to {`restart`, `continue`}. If the value has any other
    value, I think it seems reasonable to default it to `continue`. It
    might cause problems if the spec is extended in the future by adding
    a third possible value, in which case this would probably give
    incorrect behaviour, and wouldn't error.
* Allow multiple header rows
* Include table description in simple caption
  - The table description element is like alt text for a table (along
    with the table caption element). It seems like we should include
    this somewhere, but I’m not 100% sure how – I’m pairing it with the
    simple caption for the moment. (Should it maybe go in the block
    caption instead?)
* Detect table captions
  - Check for caption paragraph style /and/ either the simple or
    complex table field. This means the caption detection fails for
    captions which don’t contain a field, as in an example doc I added
    as a test. However, I think it’s better to be too conservative: a
    missed table caption will still show up as a paragraph next to the
    table, whereas if I incorrectly classify something else as a table
    caption it could cause havoc by pairing it up with a table it’s
    not at all related to, or dropping it entirely.
* Update tests and add new ones

Partially fixes: #6316
---
 src/Text/Pandoc/Readers/Docx.hs            |  92 ++++++++------
 src/Text/Pandoc/Readers/Docx/Parse.hs      | 113 +++++++++++++++--
 src/Text/Pandoc/Readers/Docx/Util.hs       |   7 ++
 test/Tests/Readers/Docx.hs                 |  16 +++
 test/docx/sdt_elements.native              |  13 +-
 test/docx/table_captions_no_field.docx     | Bin 0 -> 40482 bytes
 test/docx/table_captions_no_field.native   |  34 ++++++
 test/docx/table_captions_with_field.docx   | Bin 0 -> 41091 bytes
 test/docx/table_captions_with_field.native |  54 +++++++++
 test/docx/table_header_rowspan.docx        | Bin 0 -> 15826 bytes
 test/docx/table_header_rowspan.native      | 189 +++++++++++++++++++++++++++++
 test/docx/table_one_header_row.docx        | Bin 0 -> 12185 bytes
 test/docx/table_one_header_row.native      |  18 +++
 test/docx/table_one_row.docx               | Bin 25251 -> 12148 bytes
 test/docx/table_variable_width.native      |  19 ++-
 15 files changed, 487 insertions(+), 68 deletions(-)
 create mode 100644 test/docx/table_captions_no_field.docx
 create mode 100644 test/docx/table_captions_no_field.native
 create mode 100644 test/docx/table_captions_with_field.docx
 create mode 100644 test/docx/table_captions_with_field.native
 create mode 100644 test/docx/table_header_rowspan.docx
 create mode 100644 test/docx/table_header_rowspan.native
 create mode 100644 test/docx/table_one_header_row.docx
 create mode 100644 test/docx/table_one_header_row.native

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 375bb7338..c06adf7e3 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx
 import Codec.Archive.Zip
 import Control.Monad.Reader
 import Control.Monad.State.Strict
+import Data.Bifunctor (bimap, first)
 import qualified Data.ByteString.Lazy as B
 import Data.Default (Default)
-import Data.List (delete, intersect)
+import Data.List (delete, intersect, foldl')
 import Data.Char (isSpace)
 import qualified Data.Map as M
 import qualified Data.Text as T
-import Data.Maybe (isJust, fromMaybe)
+import Data.Maybe (catMaybes, isJust, fromMaybe)
 import Data.Sequence (ViewL (..), viewl)
 import qualified Data.Sequence as Seq
 import qualified Data.Set as Set
@@ -113,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
                      -- restarting
                      , docxListState :: M.Map (T.Text, T.Text) Integer
                      , docxPrevPara  :: Inlines
+                     , docxTableCaptions :: [Blocks]
                      }
 
 instance Default DState where
@@ -123,6 +125,7 @@ instance Default DState where
                , docxDropCap   = mempty
                , docxListState = M.empty
                , docxPrevPara  = mempty
+               , docxTableCaptions = []
                }
 
 data DEnv = DEnv { docxOptions       :: ReaderOptions
@@ -491,15 +494,32 @@ singleParaToPlain blks
       singleton $ Plain ils
 singleParaToPlain blks = blks
 
-cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
-cellToBlocks (Docx.Cell bps) = do
+cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell
+cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do
   blks <- smushBlocks <$> mapM bodyPartToBlocks bps
-  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
+  return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks')
+
+rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row]
+rowsToRows rows = do
+  let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows)
+  cells <- traverse (traverse (uncurry cellToCell)) rowspans
+  return (fmap (Pandoc.Row nullAttr) cells)
+
+splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row])
+splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst
+  $ if hasFirstRowFormatting
+    then foldl' f ((take 1 rs, []), True) (drop 1 rs)
+    else foldl' f (([], []), False) rs
+  where
+    f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs)
+      | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs)
+        = ((r : headerRows, bodyRows), True)
+      | otherwise
+        = ((headerRows, r : bodyRows), False)
+
+    isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue
 
-rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
-rowToBlocksList (Docx.Row cells) = do
-  blksList <- mapM cellToBlocks cells
-  return $ map singleParaToPlain blksList
 
 -- like trimInlines, but also take out linebreaks
 trimSps :: Inlines -> Inlines
@@ -546,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName
   where go c | isSpace c = '-'
              | otherwise = c
 
+bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks)
+bodyPartToTableCaption (TblCaption pPr parparts) =
+  Just <$> bodyPartToBlocks (Paragraph pPr parparts)
+bodyPartToTableCaption _ = pure Nothing
+
 bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
 bodyPartToBlocks (Paragraph pPr parparts)
   | Just True <- pBidi pPr = do
@@ -637,50 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
   let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
   in
     bodyPartToBlocks $ Paragraph pPr' parparts
+bodyPartToBlocks (TblCaption _ _) =
+  return $ para mempty -- collected separately
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do
-  let cap' = simpleCaption $ plain $ text cap
-      (hdr, rows) = case firstRowFormatting look of
-        True | null rs -> (Nothing, [r])
-             | otherwise -> (Just r, rs)
-        False -> (Nothing, r:rs)
-
-  cells <- mapM rowToBlocksList rows
+bodyPartToBlocks (Tbl cap grid look parts) = do
+  captions <- gets docxTableCaptions
+  fullCaption <- case captions of
+    c : cs -> do
+      modify (\s -> s { docxTableCaptions = cs })
+      return c
+    [] -> return $ if T.null cap then mempty else plain (text cap)
+  let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
+      cap' = caption shortCaption fullCaption
+      (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts
 
   let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
       rowLength :: Docx.Row -> Int
-      rowLength (Docx.Row c) = length c
+      rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c)
 
-  let toRow = Pandoc.Row nullAttr . map simpleCell
-      toHeaderRow l = [toRow l | not (null l)]
+  headerCells <- rowsToRows hdr
+  bodyCells <- rowsToRows rows
 
-  -- pad cells.  New Text.Pandoc.Builder will do that for us,
-  -- so this is for compatibility while we switch over.
-  let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
-
-  hdrCells <- case hdr of
-    Just r' -> toHeaderRow <$> rowToBlocksList r'
-    Nothing -> return []
-
-      -- The two following variables (horizontal column alignment and
-      -- relative column widths) go to the default at the
-      -- moment. Width information is in the TblGrid field of the Tbl,
-      -- so should be possible. Alignment might be more difficult,
-      -- since there doesn't seem to be a column entity in docx.
+      -- Horizontal column alignment goes to the default at the moment. Getting
+      -- it might be difficult, since there doesn't seem to be a column entity
+      -- in docx.
   let alignments = replicate width AlignDefault
       totalWidth = sum grid
       widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
-                 (TableHead nullAttr hdrCells)
-                 [TableBody nullAttr 0 [] cells']
+                 (TableHead nullAttr headerCells)
+                 [TableBody nullAttr 0 [] bodyCells]
                  (TableFoot nullAttr [])
 bodyPartToBlocks (OMathPara e) =
   return $ para $ displayMath (writeTeX e)
 
-
 -- replace targets with generated anchors.
 rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
 rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
@@ -716,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
 bodyToOutput (Body bps) = do
   let (metabps, blkbps) = sepBodyParts bps
   meta <- bodyPartsToMeta metabps
+  captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+  modify (\s -> s { docxTableCaptions = captions })
   blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
   blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
   blks'' <- removeOrphanAnchors blks'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 978d6ff3a..aaa8f4ad0 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , ParStyle
                                       , CharStyle(cStyleData)
                                       , Row(..)
+                                      , TblHeader(..)
                                       , Cell(..)
+                                      , VMerge(..)
                                       , TrackedChange(..)
                                       , ChangeType(..)
                                       , ChangeInfo(..)
@@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
                                       , pHeading
                                       , constructBogusParStyleData
                                       , leftBiasedMergeRunStyle
+                                      , rowsToRowspans
                                       ) where
 import Text.Pandoc.Readers.Docx.Parse.Styles
 import Codec.Archive.Zip
@@ -225,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
 data BodyPart = Paragraph ParagraphStyle [ParPart]
               | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
               | Tbl T.Text TblGrid TblLook [Row]
+              | TblCaption ParagraphStyle [ParPart]
               | OMathPara [Exp]
               deriving Show
 
@@ -236,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool}
 defaultTblLook :: TblLook
 defaultTblLook = TblLook{firstRowFormatting = False}
 
-newtype Row = Row [Cell]
-           deriving Show
+data Row = Row TblHeader [Cell] deriving Show
+
+data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq)
 
-newtype Cell = Cell [BodyPart]
+data Cell = Cell GridSpan VMerge [BodyPart]
             deriving Show
 
+type GridSpan = Integer
+
+data VMerge = Continue
+            -- ^ This cell should be merged with the one above it
+            | Restart
+            -- ^ This cell should not be merged with the one above it
+            deriving (Show, Eq)
+
+rowsToRowspans :: [Row] -> [[(Int, Cell)]]
+rowsToRowspans rows = let
+  removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart))
+  in removeMergedCells (foldr f [] rows)
+  where
+    f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]]
+    f (Row _ cells) acc = let
+      spans = g cells Nothing (listToMaybe acc)
+      in spans : acc
+
+    g ::
+      -- | The current row
+      [Cell] ->
+      -- | Number of columns left below
+      Maybe Integer ->
+      -- | (rowspan so far, cell) for the row below this one
+      Maybe [(Int, Cell)] ->
+      -- | (rowspan so far, cell) for this row
+      [(Int, Cell)]
+    g cells _ Nothing = zip (repeat 1) cells
+    g cells columnsLeftBelow (Just rowBelow) =
+        case cells of
+          [] -> []
+          thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of
+            [] -> zip (repeat 1) cells
+            (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ ->
+              let spanSoFar = case vmerge of
+                    Restart -> 1
+                    Continue -> 1 + spanSoFarBelow
+                  columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow)
+                  (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow
+              in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow)
+
+    dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)])
+    dropColumns n [] = (n, [])
+    dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) =
+      if n < gridSpan
+      then (gridSpan - n, cells)
+      else dropColumns (n - gridSpan) otherCells
+
 leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle
 leftBiasedMergeRunStyle a b = RunStyle
     { isBold = isBold a <|> isBold b
@@ -587,14 +640,31 @@ elemToRow ns element | isElem ns "w" "tr" element =
   do
     let cellElems = findChildrenByName ns "w" "tc" element
     cells <- mapD (elemToCell ns) cellElems
-    return $ Row cells
+    let hasTblHeader = maybe NoTblHeader (const HasTblHeader)
+          (findChildByName ns "w" "trPr" element
+           >>= findChildByName ns "w" "tblHeader")
+    return $ Row hasTblHeader cells
 elemToRow _ _ = throwError WrongElem
 
 elemToCell :: NameSpaces -> Element -> D Cell
 elemToCell ns element | isElem ns "w" "tc" element =
   do
+    let properties = findChildByName ns "w" "tcPr" element
+    let gridSpan = properties
+                     >>= findChildByName ns "w" "gridSpan"
+                     >>= findAttrByName ns "w" "val"
+                     >>= stringToInteger
+    let vMerge = case properties >>= findChildByName ns "w" "vMerge" of
+                   Nothing -> Restart
+                   Just e ->
+                     fromMaybe Continue $ do
+                       s <- findAttrByName ns "w" "val" e
+                       case s of
+                         "continue" -> Just Continue
+                         "restart" -> Just Restart
+                         _ -> Nothing
     cellContents <- mapD (elemToBodyPart ns) (elChildren element)
-    return $ Cell cellContents
+    return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents
 elemToCell _ _ = throwError WrongElem
 
 elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
@@ -626,10 +696,9 @@ pNumInfo = getParStyleField numInfo . pStyle
 elemToBodyPart :: NameSpaces -> Element -> D BodyPart
 elemToBodyPart ns element
   | isElem ns "w" "p" element
-  , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
-      do
-        expsLst <- eitherToD $ readOMML $ showElement c
-        return $ OMathPara expsLst
+  , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do
+      expsLst <- eitherToD $ readOMML $ showElement c
+      return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
   , Just (numId, lvl) <- getNumInfo ns element = do
@@ -647,13 +716,31 @@ elemToBodyPart ns element
         Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
                     levelInfo <- lookupLevel numId lvl <$> asks envNumbering
                     return $ ListItem parstyle numId lvl levelInfo parparts
-        _ -> return $ Paragraph parstyle parparts
+        _ -> let
+          hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+          hasSimpleTableField = fromMaybe False $ do
+            fldSimple <- findChildByName ns "w" "fldSimple" element
+            instr <- findAttrByName ns "w" "instr" fldSimple
+            pure ("Table" `elem` T.words instr)
+
+          hasComplexTableField = fromMaybe False $ do
+            instrText <- findElementByName ns "w" "instrText" element
+            pure ("Table" `elem` T.words (strContent instrText))
+
+          in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+             then return $ TblCaption parstyle parparts
+             else return $ Paragraph parstyle parparts
+
 elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
-    let caption' = findChildByName ns "w" "tblPr" element
+    let tblProperties = findChildByName ns "w" "tblPr" element
+        caption = fromMaybe "" $ tblProperties
                    >>= findChildByName ns "w" "tblCaption"
                    >>= findAttrByName ns "w" "val"
-        caption = fromMaybe "" caption'
+        description = fromMaybe "" $ tblProperties
+                       >>= findChildByName ns "w" "tblDescription"
+                       >>= findAttrByName ns "w" "val"
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
           Nothing -> return []
@@ -666,7 +753,7 @@ elemToBodyPart ns element
     grid <- grid'
     tblLook <- tblLook'
     rows <- mapD (elemToRow ns) (elChildren element)
-    return $ Tbl caption grid tblLook rows
+    return $ Tbl (caption <> description) grid tblLook rows
 elemToBodyPart _ _ = throwError WrongElem
 
 lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index ac331cba6..970697a2d 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
+                                      , findElementByName
                                       , findAttrByName
                                       ) where
 
@@ -56,6 +57,12 @@ findChildrenByName ns pref name el =
   let ns' = ns <> elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
+-- | Like 'findChildrenByName', but searches descendants.
+findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
+findElementByName ns pref name el =
+  let ns' = ns <> elemToNameSpaces el
+  in  findElement (elemName ns' pref name) el
+
 findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
   let ns' = ns <> elemToNameSpaces el
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
index 939ff9939..220c7d9c5 100644
--- a/test/Tests/Readers/Docx.hs
+++ b/test/Tests/Readers/Docx.hs
@@ -317,14 +317,30 @@ tests = [ testGroup "document"
             "tables with lists in cells"
             "docx/table_with_list_cell.docx"
             "docx/table_with_list_cell.native"
+          , testCompare
+            "a table with a header which contains rowspans greater than 1"
+            "docx/table_header_rowspan.docx"
+            "docx/table_header_rowspan.native"
           , testCompare
             "tables with one row"
             "docx/table_one_row.docx"
             "docx/table_one_row.native"
+          , testCompare
+            "tables with just one row, which is a header"
+            "docx/table_one_header_row.docx"
+            "docx/table_one_header_row.native"
           , testCompare
             "tables with variable width"
             "docx/table_variable_width.docx"
             "docx/table_variable_width.native"
+          , testCompare
+            "tables with captions which contain a Table field"
+            "docx/table_captions_with_field.docx"
+            "docx/table_captions_with_field.native"
+          , testCompare
+            "tables with captions which don't contain a Table field"
+            "docx/table_captions_no_field.docx"
+            "docx/table_captions_no_field.native"
           , testCompare
             "code block"
             "docx/codeblock.docx"
diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native
index a072c0d39..d2aa00994 100644
--- a/test/docx/sdt_elements.native
+++ b/test/docx/sdt_elements.native
@@ -4,17 +4,16 @@
  ,(AlignDefault,ColWidth 0.22069570301081556)
  ,(AlignDefault,ColWidth 0.5586085939783689)]
  (TableHead ("",[],[])
- [])
- [(TableBody ("",[],[]) (RowHeadColumns 0)
-  []
-  [Row ("",[],[])
+ [Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Strong [Str "col1Header"]]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Strong [Str "col2Header"]]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    [Plain [Strong [Str "col3Header"]]]]
-  ,Row ("",[],[])
+    [Plain [Strong [Str "col3Header"]]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "col1",Space,Str "content"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -22,4 +21,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "col3",Space,Str "content"]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_captions_no_field.docx b/test/docx/table_captions_no_field.docx
new file mode 100644
index 000000000..1687d32a2
Binary files /dev/null and b/test/docx/table_captions_no_field.docx differ
diff --git a/test/docx/table_captions_no_field.native b/test/docx/table_captions_no_field.native
new file mode 100644
index 000000000..b8f54d541
--- /dev/null
+++ b/test/docx/table_captions_no_field.native
@@ -0,0 +1,34 @@
+[Para [Str "See",Space,Str "Table",Space,Str "5.1."]
+,Para [Str "Table",Space,Str "5.1"]
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.7605739372523825)
+ ,(AlignDefault,ColWidth 0.11971303137380876)
+ ,(AlignDefault,ColWidth 0.11971303137380876)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  []
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Count"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "%"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "First",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "242"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "45"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Second",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "99"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "18"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Header 2 ("section", [], []) []]
diff --git a/test/docx/table_captions_with_field.docx b/test/docx/table_captions_with_field.docx
new file mode 100644
index 000000000..db6de3088
Binary files /dev/null and b/test/docx/table_captions_with_field.docx differ
diff --git a/test/docx/table_captions_with_field.native b/test/docx/table_captions_with_field.native
new file mode 100644
index 000000000..deb8afc6b
--- /dev/null
+++ b/test/docx/table_captions_with_field.native
@@ -0,0 +1,54 @@
+[Para [Str "See",Space,Str "Table",Space,Str "1."]
+,Para []
+,Table ("",[],[]) (Caption Nothing
+ [Para [Str "Table",Space,Str "1"]])
+ [(AlignDefault,ColWidth 0.7605739372523825)
+ ,(AlignDefault,ColWidth 0.11971303137380876)
+ ,(AlignDefault,ColWidth 0.11971303137380876)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  []
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Count"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "%"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "First",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "242"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "45"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "Second",Space,Str "option"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "99"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Str "18"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Header 2 ("section", [], []) []
+,Table ("",[],[]) (Caption Nothing
+ [Para [Str "Table",Space,Str "2"]])
+ [(AlignDefault,ColWidth 0.3332963620230701)
+ ,(AlignDefault,ColWidth 0.3332963620230701)
+ ,(AlignDefault,ColWidth 0.3334072759538598)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "One"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Two"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Three"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [])]
+ (TableFoot ("",[],[])
+ [])
+,Para []
+,Para [Str "See",Space,Str "Table",Space,Str "2."]]
diff --git a/test/docx/table_header_rowspan.docx b/test/docx/table_header_rowspan.docx
new file mode 100644
index 000000000..1cc32a105
Binary files /dev/null and b/test/docx/table_header_rowspan.docx differ
diff --git a/test/docx/table_header_rowspan.native b/test/docx/table_header_rowspan.native
new file mode 100644
index 000000000..d951f29e4
--- /dev/null
+++ b/test/docx/table_header_rowspan.native
@@ -0,0 +1,189 @@
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.30701754385964913)
+ ,(AlignDefault,ColWidth 0.1364522417153996)
+ ,(AlignDefault,ColWidth 0.10009746588693957)
+ ,(AlignDefault,ColWidth 9.707602339181287e-2)
+ ,(AlignDefault,ColWidth 7.719298245614035e-2)
+ ,(AlignDefault,ColWidth 7.085769980506823e-2)
+ ,(AlignDefault,ColWidth 7.09551656920078e-2)
+ ,(AlignDefault,ColWidth 0.14035087719298245)]
+ (TableHead ("",[],[])
+[Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Str "A"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "B"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "C"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Strong [Str "D"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3)
+  [Plain [Str "E"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1)
+  [Plain [Str "F"]]]
+,Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "G"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "H"]]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  [Plain [Strong [Str "I"]]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "1"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "2"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "3"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "4"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "5"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "6"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "7"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "8"]]]
+  ])]
+ (TableFoot ("",[],[])
+ [])]
diff --git a/test/docx/table_one_header_row.docx b/test/docx/table_one_header_row.docx
new file mode 100644
index 000000000..db715dda8
Binary files /dev/null and b/test/docx/table_one_header_row.docx differ
diff --git a/test/docx/table_one_header_row.native b/test/docx/table_one_header_row.native
new file mode 100644
index 000000000..4aae830ac
--- /dev/null
+++ b/test/docx/table_one_header_row.native
@@ -0,0 +1,18 @@
+[Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidth 0.33302433371958284)
+ ,(AlignDefault,ColWidth 0.3332560834298957)
+ ,(AlignDefault,ColWidth 0.33371958285052145)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "One"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Row"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Table"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [])]
+ (TableFoot ("",[],[])
+ [])]
diff --git a/test/docx/table_one_row.docx b/test/docx/table_one_row.docx
index f7e0ebe43..d05a856b5 100644
Binary files a/test/docx/table_one_row.docx and b/test/docx/table_one_row.docx differ
diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native
index 43ac40cca..ff1cc0dc4 100644
--- a/test/docx/table_variable_width.native
+++ b/test/docx/table_variable_width.native
@@ -4,7 +4,8 @@
  ,(AlignDefault,ColWidth 1.9882415820416888e-2)
  ,(AlignDefault,ColWidth 0.22202030999465527)
  ,(AlignDefault,ColWidth 0.4761090326028862)
- ,(AlignDefault,ColWidth 1.0689470871191876e-4)]
+ ,(AlignDefault,ColWidth 1.0689470871191876e-4)
+ ,(AlignDefault,ColWidth 0.26178514163548905)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -13,33 +14,27 @@
    []
   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
    [Plain [Str "h3"]]
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
    [Plain [Str "h4"]]
   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
    [Plain [Str "h5"]]]])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
-   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3)
     [Plain [Str "c11"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     []]
   ,Row ("",[],[])
    [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     [Plain [Str "c22"]]
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "c23"]]
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-    []
-   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2)
     []]])]
  (TableFoot ("",[],[])
  [])]
-- 
cgit v1.2.3


From b6b2331fdcee37f1bfb3fcc21816d73d6d56cfae Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 28 May 2021 13:58:44 -0700
Subject: Support `rebase_relative_paths` for commonmark based formats.

(Including `gfm`.)
---
 src/Text/Pandoc/Extensions.hs         |  1 +
 src/Text/Pandoc/Readers/CommonMark.hs |  4 +++-
 test/command/3752.md                  | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index c4d54c06e..9c55d0a7a 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -517,6 +517,7 @@ getAllExtensions f = universalExtensions <> getAll f
     , Ext_attributes
     , Ext_sourcepos
     , Ext_yaml_metadata_block
+    , Ext_rebase_relative_paths
     ]
   getAll "commonmark_x"    = getAll "commonmark"
   getAll "org"             = autoIdExtensions <>
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index b099a9b50..228e65312 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -112,5 +112,7 @@ specFor opts = foldr ($) defaultSyntaxSpec exts
            | isEnabled Ext_implicit_header_references opts ] ++
          [ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++
          [ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++
-         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ]
+         [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++
+         [ (rebaseRelativePathsSpec <>)
+           | isEnabled Ext_rebase_relative_paths opts ]
 
diff --git a/test/command/3752.md b/test/command/3752.md
index df8af0ba1..2e96b531e 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -42,3 +42,19 @@ A spider: [spider]
 <h1 id="chapter-two">Chapter two</h1>
 <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
 ```
+
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f commonmark+rebase_relative_paths -t html
+^D
+<h1>Chapter one</h1>
+<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p>
+<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
+<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
+<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
+<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p>
+<p>Empty path: <a href="">empty</a>.</p>
+<h1>Chapter two</h1>
+<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
+```
-- 
cgit v1.2.3


From 0d7103de7e893002eedcc50fea7f81aae9535106 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 29 May 2021 14:41:28 -0700
Subject: In rebasePath, check for absolute paths two ways.

isAbsolute from FilePath doesn't return True on Windows
for paths beginning with `/`, so we check that separately.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index bc5e3e30f..9a5e0889e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1924,7 +1924,10 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-   in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path
+      -- check for leading / because on Windows this won't be
+      -- recognized as absolute by isAbsolute
+      isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/"
+   in if T.null path || isFragment || isAbsolutePath || isURI path
          then path
          else
            case takeDirectory fp of
-- 
cgit v1.2.3


From cc6dcf03928404c08cfb3075a90d814b25fd7c69 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 29 May 2021 17:36:30 -0700
Subject: Markdown reader: in rebasePaths, check for both Windows and Posix

absolute paths.  Previously Windows pandoc was treating
`/foo/bar.jpg` as non-absolute.
---
 src/Text/Pandoc/Readers/Markdown.hs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 9a5e0889e..1761ee2c1 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,9 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
+import System.FilePath (addExtension, takeExtension, takeDirectory)
+import qualified System.FilePath.Windows as Windows
+import qualified System.FilePath.Posix as Posix
 import Text.HTML.TagSoup hiding (Row)
 import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
@@ -1924,9 +1926,8 @@ rebasePath :: SourcePos -> Text -> Text
 rebasePath pos path = do
   let fp = sourceName pos
       isFragment = T.take 1 path == "#"
-      -- check for leading / because on Windows this won't be
-      -- recognized as absolute by isAbsolute
-      isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/"
+      path' = T.unpack path
+      isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path'
    in if T.null path || isFragment || isAbsolutePath || isURI path
          then path
          else
-- 
cgit v1.2.3


From c2f46e6df4a80a313ab4329b740770dbdfbe1578 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 30 May 2021 10:07:28 -0700
Subject: Docx writer: fix regression on captions.

The "Table Caption" style was no longer getting applied.
(It was overwritten by "Compact.")

Closes #7328.
---
 src/Text/Pandoc/Writers/Docx/Table.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index e7fc82a10..13cb31040 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -33,7 +33,6 @@ tableToOpenXML :: PandocMonad m
                -> WS m [Content]
 tableToOpenXML blocksToOpenXML gridTable = do
   setFirstPara
-  modify $ \s -> s { stInTable = True }
   let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) =
         gridTable
   let (Caption _maybeShortCaption captionBlocks) = caption
@@ -43,6 +42,9 @@ tableToOpenXML blocksToOpenXML gridTable = do
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
                      $ blocksToOpenXML captionBlocks
+  -- We set "in table" after processing the caption, because we don't
+  -- want the "Table Caption" style to be overwritten with "Compact".
+  modify $ \s -> s { stInTable = True }
   head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead
   bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies
   foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot
-- 
cgit v1.2.3


From cc206af392a40dd7b01b714ae7f33b2fbf4925cc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 30 May 2021 10:22:02 -0700
Subject: Have LoadedResource use relative paths.

The immediate reason for this is to allow the test output of #3752
to work on both windows and linux.
---
 src/Text/Pandoc/Class/PandocMonad.hs | 4 ++--
 test/command/3752.md                 | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index b5f401619..4eb80df29 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -66,7 +66,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo,
                      unEscapeString, parseURIReference, isAllowedInURI,
                      parseURI, URI(..) )
 import System.FilePath ((</>), takeExtension, dropExtension,
-                        isRelative, splitDirectories)
+                        isRelative, splitDirectories, makeRelative)
 import System.Random (StdGen)
 import Text.Collate.Lang (Lang(..), parseLang, renderLang)
 import Text.Pandoc.Class.CommonState (CommonState (..))
@@ -413,7 +413,7 @@ downloadOrRead s = do
              (fp', cont) <- if isRelative f
                                then withPaths resourcePath readFileStrict f
                                else (f,) <$> readFileStrict f
-             report $ LoadedResource f fp'
+             report $ LoadedResource f (makeRelative "." fp')
              return (cont, mime)
          httpcolon = URI{ uriScheme = "http:",
                           uriAuthority = Nothing,
diff --git a/test/command/3752.md b/test/command/3752.md
index 6ac025ebe..863e3f2d4 100644
--- a/test/command/3752.md
+++ b/test/command/3752.md
@@ -1,9 +1,9 @@
 ```
 % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx -o - | pandoc -f docx -t plain
 ^D
-[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
-[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
-[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
+[INFO] Loaded command/chap1/spider.png from command/chap1/spider.png
+[INFO] Loaded command/chap2/spider.png from command/chap2/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from command/chap1/../../lalune.jpg
 Chapter one
 
 A spider: [spider]
-- 
cgit v1.2.3


From fc70f44ee2814914c0d32f6dff30fb36cc51bf11 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 30 May 2021 17:15:14 -0700
Subject: HTML reader: fix column width regression.

Column widths specified with a style attribute were
off by a factor of 100 in 2.14.

Closes #7334.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index 3a569dd0a..6e62e12f5 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -49,7 +49,7 @@ pCol = try $ do
   return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    maybe (Right ColWidthDefault) (Right . ColWidth)
+                    maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
                       $ safeRead (T.filter
                                    (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
                   _ -> Right ColWidthDefault
-- 
cgit v1.2.3


From 62f46b3995425c9a3ec87cba0eb8a4d736adec07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 31 May 2021 21:34:51 -0600
Subject: Fix regression with commonmark/gfm yaml metdata block parsing.

A regression in 2.14 led to the document body being omitted
after YAML metadata in some cases.  This is now fixed.

Closes #7339.
---
 src/Text/Pandoc/Readers/CommonMark.hs | 10 +++++-----
 test/command/7339.md                  | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 test/command/7339.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 228e65312..411d64278 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -30,7 +30,7 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock)
 import Control.Monad.Except
 import Data.Functor.Identity (runIdentity)
 import Data.Typeable
-import Text.Pandoc.Parsing (runParserT, getPosition,
+import Text.Pandoc.Parsing (runParserT, getInput,
                             runF, defaultParserState, option, many1, anyChar,
                             Sources(..), ToSources(..), ParserT, Future,
                             sourceName)
@@ -44,14 +44,14 @@ readCommonMark opts s
     let sources = toSources s
     let toks = concatMap sourceToToks (unSources sources)
     res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts)
-                          pos <- getPosition
-                          return (meta, pos))
+                          rest <- getInput
+                          return (meta, rest))
                       defaultParserState "YAML metadata" (toSources s)
     case res of
       Left _ -> readCommonMarkBody opts sources toks
-      Right (meta, pos) -> do
+      Right (meta, rest) -> do
         -- strip off metadata section and parse body
-        let body = dropWhile (\t -> tokPos t < pos) toks
+        let body = concatMap sourceToToks (unSources rest)
         Pandoc _ bs <- readCommonMarkBody opts sources body
         return $ Pandoc (runF meta defaultParserState) bs
   | otherwise = do
diff --git a/test/command/7339.md b/test/command/7339.md
new file mode 100644
index 000000000..9697c1c32
--- /dev/null
+++ b/test/command/7339.md
@@ -0,0 +1,11 @@
+```
+% pandoc -f gfm -s -t native
+---
+title: Test
+---
+
+Hi
+^D
+Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Test"])]})
+[Para [Str "Hi"]]
+```
-- 
cgit v1.2.3


From abb59bd58222c67fd36a8e447c01de3404a7ed1d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 1 Jun 2021 13:54:51 -0600
Subject: LaTeX reader: don't allow optional * on symbol control sequences.

Generally we allow optional starred variants of LaTeX commands
(since many allow them, and if we don't accept these explicitly,
ignoring the star usually gives acceptable results).  But we
don't want to do this for `\(*\)` and similar cases.

Closes #7340.
---
 src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++--
 test/command/7340.md             | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 test/command/7340.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 2ace18d1b..9e14c159a 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -24,7 +24,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX,
 import Control.Applicative (many, optional, (<|>))
 import Control.Monad
 import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toUpper, chr)
+import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr)
 import Data.Default
 import Data.List (intercalate)
 import qualified Data.Map as M
@@ -300,7 +300,9 @@ inlineCommand' :: PandocMonad m => LP m Inlines
 inlineCommand' = try $ do
   Tok _ (CtrlSeq name) cmd <- anyControlSeq
   guard $ name /= "begin" && name /= "end" && name /= "and"
-  star <- option "" ("*" <$ symbol '*' <* sp)
+  star <- if T.all isAlphaNum name
+             then option "" ("*" <$ symbol '*' <* sp)
+             else pure ""
   overlay <- option "" overlaySpecification
   let name' = name <> star <> overlay
   let names = ordNub [name', name] -- check non-starred as fallback
diff --git a/test/command/7340.md b/test/command/7340.md
new file mode 100644
index 000000000..25decd732
--- /dev/null
+++ b/test/command/7340.md
@@ -0,0 +1,6 @@
+```
+% pandoc -f latex -t native
+\(*\)
+^D
+[Para [Math InlineMath "*"]]
+```
-- 
cgit v1.2.3


From 2e4ef14d9112f230e336a1f3767f87293bdfb73e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 1 Jun 2021 21:44:55 -0600
Subject: Markdown reader: fix pipe table regression in 2.11.4.

Previously pipe tables with empty headers (that is, a header
line with all empty cells) would be rendered as headerless
tables.  This broke in 2.11.4.

The fix here is to produce an AST with an empty table head
when a pipe table has all empty header cells.

Closes #7343.
---
 src/Text/Pandoc/Readers/Markdown.hs |  2 +-
 test/pipe-tables.native             | 12 ++----------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1761ee2c1..1e9867d07 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -2254,4 +2254,4 @@ toRow :: [Blocks] -> Row
 toRow = Row nullAttr . map B.simpleCell
 
 toHeaderRow :: [Blocks] -> [Row]
-toHeaderRow l = [toRow l | not (null l)]
+toHeaderRow l = [toRow l | not (null l) && not (all null l)]
diff --git a/test/pipe-tables.native b/test/pipe-tables.native
index 557cd0642..249eec17e 100644
--- a/test/pipe-tables.native
+++ b/test/pipe-tables.native
@@ -131,13 +131,7 @@
  ,(AlignLeft,ColWidthDefault)
  ,(AlignCenter,ColWidthDefault)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []
-  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
@@ -213,9 +207,7 @@
  [])
  [(AlignCenter,ColWidthDefault)]
  (TableHead ("",[],[])
- [Row ("",[],[])
-  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
-   []]])
+ [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
   []
   [Row ("",[],[])
-- 
cgit v1.2.3


From 3b628f7664a95e6f3b3d7da177c83333ec2bc0fa Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 1 Jun 2021 21:57:49 -0600
Subject: HTML writer: Don't omit width attribute on div.

Closes #7342.
---
 src/Text/Pandoc/Writers/HTML.hs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index f7a387927..b1dd9a659 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -770,9 +770,10 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
                                            lookup "entry-spacing" kvs' >>=
                                            safeRead }
   let isCslBibEntry = "csl-entry" `elem` classes
-  let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++
-            [("style", "width:" <> w <> ";") | "column" `elem` classes,
-             ("width", w) <- kvs'] ++
+  let kvs = [(k,v) | (k,v) <- kvs'
+                   , k /= "width" || "column" `notElem` classes] ++
+            [("style", "width:" <> w <> ";") | "column" `elem` classes
+                                             , ("width", w) <- kvs'] ++
             [("role", "doc-bibliography") | isCslBibBody && html5] ++
             [("role", "doc-biblioentry") | isCslBibEntry && html5]
   let speakerNotes = "notes" `elem` classes
-- 
cgit v1.2.3


From 2b5dad9912de659424246657c91f70417590c3fe Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 2 Jun 2021 10:42:22 -0600
Subject: Fix regression in 2.14 for generation of PDFs with SVGs.

Closes #7344.
---
 src/Text/Pandoc/PDF.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 6f462aad5..7fce17cea 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -202,7 +202,7 @@ convertImage opts tmpdir fname = do
     Just "image/svg+xml" -> E.catch (do
       (exit, _) <- pipeProcess Nothing "rsvg-convert"
                      ["-f","pdf","-a","--dpi-x",dpi,"--dpi-y",dpi,
-                      "-o",pdfOut,fname] BL.empty
+                      "-o",pdfOut,svgIn] BL.empty
       if exit == ExitSuccess
          then return $ Right pdfOut
          else return $ Left "conversion from SVG failed")
@@ -217,6 +217,7 @@ convertImage opts tmpdir fname = do
   where
     pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir
     pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir
+    svgIn = tmpdir </> fname
     mime = getMimeType fname
     doNothing = return (Right fname)
 
-- 
cgit v1.2.3


From 311736fb0ae21e5b5c78d1ad6fa4b3f4840941f9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 2 Jun 2021 15:21:13 -0600
Subject: Text.Pandoc.PDF: only print relevant part of environment on
 `--verbose`.

---
 src/Text/Pandoc/PDF.hs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 7fce17cea..f85ef5b1f 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -507,8 +507,20 @@ showVerboseInfo mbTmpDir program programArgs env source = do
   UTF8.hPutStrLn stderr $
        T.pack program <> " " <> T.pack (unwords (map show programArgs))
   UTF8.hPutStr stderr "\n"
-  UTF8.hPutStrLn stderr "[makePDF] Environment:"
-  mapM_ (UTF8.hPutStrLn stderr . tshow) env
+  UTF8.hPutStrLn stderr "[makePDF] Relevant environment variables:"
+  -- we filter out irrelevant stuff to avoid leaking passwords and keys!
+  let isRelevant ("PATH",_) = True
+      isRelevant ("TMPDIR",_) = True
+      isRelevant ("PWD",_) = True
+      isRelevant ("LANG",_) = True
+      isRelevant ("HOME",_) = True
+      isRelevant ("LUA_PATH",_) = True
+      isRelevant ("LUA_CPATH",_) = True
+      isRelevant ("SHELL",_) = True
+      isRelevant ("TEXINPUTS",_) = True
+      isRelevant ("TEXMFOUTPUT",_) = True
+      isRelevant _ = False
+  mapM_ (UTF8.hPutStrLn stderr . tshow) (filter isRelevant env)
   UTF8.hPutStr stderr "\n"
   UTF8.hPutStrLn stderr "[makePDF] Source:"
   UTF8.hPutStrLn stderr source
-- 
cgit v1.2.3


From b6c04383e403b0962db09e6748760d3ec376f2ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 3 Jun 2021 18:34:38 -0600
Subject: T.P.Class.IO: normalise path in writeMedia.

This ensures that we get `\` separators on Windows.
---
 src/Text/Pandoc/Class/IO.hs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index f12c0a938..6df39d4d0 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -211,13 +211,12 @@ writeMedia :: (PandocMonad m, MonadIO m)
            => FilePath -> MediaBag -> FilePath
            -> m ()
 writeMedia dir mediabag subpath = do
-  -- we join and split to convert a/b/c to a\b\c on Windows;
-  -- in zip containers all paths use /
   let mbcontents = lookupMedia subpath mediabag
   case mbcontents of
        Nothing -> throwError $ PandocResourceNotFound $ pack subpath
        Just item -> do
-         let fullpath = dir </> mediaPath item
+         -- we normalize to get proper path separators for the platform
+         let fullpath = dir </> normalise (mediaPath item)
          liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
          logIOError $ BL.writeFile fullpath $ mediaContents item
 
-- 
cgit v1.2.3


From af9de925de4a441f4c45a977363f9a56589f57bc Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 14:16:44 +0200
Subject: DocBook writer: Remove non-existent admonitions

attention, error and hint are actually just reStructuredText specific.
danger was too until introduced in DocBook 5.2: https://github.com/docbook/docbook/issues/55
---
 src/Text/Pandoc/Writers/Docbook.hs |  3 +--
 test/Tests/Writers/Docbook.hs      | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 25bd308bf..33a6f5f0c 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -198,8 +198,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs))
 blockToDocbook opts (Div (ident,classes,_) bs) = do
   version <- ask
   let identAttribs = [(idName version, ident) | not (T.null ident)]
-      admonitions = ["attention","caution","danger","error","hint",
-                     "important","note","tip","warning"]
+      admonitions = ["caution","danger","important","note","tip","warning"]
   case classes of
     (l:_) | l `elem` admonitions -> do
         let (mTitleBs, bodyBs) =
diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs
index 46203eeae..f517f803a 100644
--- a/test/Tests/Writers/Docbook.hs
+++ b/test/Tests/Writers/Docbook.hs
@@ -83,32 +83,32 @@ tests = [ testGroup "line blocks"
                                     , "</warning>"
                                     ]
           , "admonition-with-title" =:
-                            divWith ("foo", ["attention"], []) (
+                            divWith ("foo", ["note"], []) (
                               divWith ("foo", ["title"], [])
                                 (plain (text "This is title")) <>
                               para "This is a test"
                             )
                               =?> unlines
-                                    [ "<attention id=\"foo\">"
+                                    [ "<note id=\"foo\">"
                                     , "  <title>This is title</title>"
                                     , "  <para>"
                                     , "    This is a test"
                                     , "  </para>"
-                                    , "</attention>"
+                                    , "</note>"
                                     ]
           , "admonition-with-title-in-para" =:
-                            divWith ("foo", ["attention"], []) (
+                            divWith ("foo", ["note"], []) (
                               divWith ("foo", ["title"], [])
                                 (para "This is title") <>
                               para "This is a test"
                             )
                               =?> unlines
-                                    [ "<attention id=\"foo\">"
+                                    [ "<note id=\"foo\">"
                                     , "  <title>This is title</title>"
                                     , "  <para>"
                                     , "    This is a test"
                                     , "  </para>"
-                                    , "</attention>"
+                                    , "</note>"
                                     ]
           , "single-child" =:
                             divWith ("foo", [], []) (para "This is a test")
-- 
cgit v1.2.3


From c8ab8bccf25ad01b9a98b2e59c84c151c5b0c371 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 14:26:55 +0200
Subject: DocBook reader: Add support for danger element

Added in DocBook 5.2:

- https://github.com/docbook/docbook/pull/64
- https://tdg.docbook.org/tdg/5.2/danger.html
---
 src/Text/Pandoc/Readers/DocBook.hs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index b01ad3252..6ac1c99f9 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -134,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [ ] corpcredit - A corporation or organization credited in a document
 [ ] corpname - The name of a corporation
 [ ] country - The name of a country
+[x] danger - An admonition set off from the text indicating hazardous situation
 [ ] database - The name of a database, or part of a database
 [x] date - The date of publication or revision of a document
 [ ] dedication - A wrapper for the dedication section of a book
@@ -718,7 +719,7 @@ blockTags =
   ] ++ admonitionTags
 
 admonitionTags :: [Text]
-admonitionTags = ["important","caution","note","tip","warning"]
+admonitionTags = ["caution","danger","important","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
 trimNl :: Text -> Text
-- 
cgit v1.2.3


From c6f8c38c49e9579bf0bc0c91131bb206f0617e6b Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 16:31:12 +0200
Subject: Markdown writer: re-use functions from Inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of duplicating linkAttributes and attrsToMarkdown, let’s just use those from the Inline module.
---
 src/Text/Pandoc/Writers/Markdown.hs        | 29 +----------------------------
 src/Text/Pandoc/Writers/Markdown/Inline.hs |  4 +++-
 2 files changed, 4 insertions(+), 29 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 2ad9eabd9..38227dfa8 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -43,7 +43,7 @@ import Text.Pandoc.Templates (renderTemplate)
 import Text.DocTemplates (Val(..), Context(..), FromContext(..))
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.HTML (writeHtml5String)
-import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown)
+import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, linkAttributes, attrsToMarkdown)
 import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..),
                                            WriterState(..),
                                            WriterEnv(..),
@@ -257,39 +257,12 @@ noteToMarkdown opts num blocks = do
               then hang (writerTabStop opts) (marker <> spacer) contents
               else marker <> spacer <> contents
 
-attrsToMarkdown :: Attr -> Doc Text
-attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
-        where attribId = case attribs of
-                                ("",_,_) -> empty
-                                (i,_,_)  -> "#" <> escAttr i
-              attribClasses = case attribs of
-                                (_,[],_) -> empty
-                                (_,cs,_) -> hsep $
-                                            map (escAttr . ("."<>))
-                                            cs
-              attribKeys = case attribs of
-                                (_,_,[]) -> empty
-                                (_,_,ks) -> hsep $
-                                            map (\(k,v) -> escAttr k
-                                              <> "=\"" <>
-                                              escAttr v <> "\"") ks
-              escAttr          = mconcat . map escAttrChar . T.unpack
-              escAttrChar '"'  = literal "\\\""
-              escAttrChar '\\' = literal "\\\\"
-              escAttrChar c    = literal $ T.singleton c
-
 -- | (Code) blocks with a single class can just use it standalone,
 -- no need to bother with curly braces.
 classOrAttrsToMarkdown :: Attr -> Doc Text
 classOrAttrsToMarkdown ("",[cls],_) = literal cls
 classOrAttrsToMarkdown attrs = attrsToMarkdown attrs
 
-linkAttributes :: WriterOptions -> Attr -> Doc Text
-linkAttributes opts attr =
-  if isEnabled Ext_link_attributes opts && attr /= nullAttr
-     then attrsToMarkdown attr
-     else empty
-
 -- | Ordered list start parser for use in Para below.
 olMarker :: Parser Text ParserState ()
 olMarker = do (start, style', delim) <- anyOrderedListMarker
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index e6c6da5a9..e66258220 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -11,7 +11,9 @@
    Portability : portable
 -}
 module Text.Pandoc.Writers.Markdown.Inline (
-  inlineListToMarkdown
+  inlineListToMarkdown,
+  linkAttributes,
+  attrsToMarkdown
   ) where
 import Control.Monad.Reader
 import Control.Monad.State.Strict
-- 
cgit v1.2.3


From 7a3ee9d3d83b73cb53de80e01a9968ebf8f7cf12 Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 15:53:24 +0200
Subject: CommonMark writer: do not throw away attributes when Ext_attributes
 is enabled

Ext_attributes covers at least the following:

- Ext_fenced_code_attributes
- Ext_header_attributes
- Ext_inline_code_attributes
- Ext_link_attributes
---
 src/Text/Pandoc/Writers/Markdown.hs        |  8 +++++---
 src/Text/Pandoc/Writers/Markdown/Inline.hs | 22 ++++++++++++----------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 38227dfa8..6316d9419 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -366,7 +366,7 @@ blockToMarkdown' opts (Plain inlines) = do
 -- title beginning with fig: indicates figure
 blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))])
   | isEnabled Ext_raw_html opts &&
-    not (isEnabled Ext_link_attributes opts) &&
+    not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) &&
     attr /= nullAttr = -- use raw HTML
     (<> blankline) . literal . T.strip <$>
       writeHtml5String opts{ writerTemplate = Nothing }
@@ -431,7 +431,8 @@ blockToMarkdown' opts (Header level attr inlines) = do
                                  && id' == autoId -> empty
                    (id',_,_)   | isEnabled Ext_mmd_header_identifiers opts ->
                                     space <> brackets (literal id')
-                   _ | isEnabled Ext_header_attributes opts ->
+                   _ | isEnabled Ext_header_attributes opts ||
+                       isEnabled Ext_attributes opts ->
                                     space <> attrsToMarkdown attr
                      | otherwise -> empty
   contents <- inlineListToMarkdown opts $
@@ -490,7 +491,8 @@ blockToMarkdown' opts (CodeBlock attribs str) = do
      endline c = literal $ T.replicate (endlineLen c) $ T.singleton c
      backticks = endline '`'
      tildes = endline '~'
-     attrs  = if isEnabled Ext_fenced_code_attributes opts
+     attrs  = if isEnabled Ext_fenced_code_attributes opts ||
+                 isEnabled Ext_attributes opts
                  then nowrap $ " " <> classOrAttrsToMarkdown attribs
                  else case attribs of
                             (_,cls:_,_) -> " " <> literal cls
diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs
index e66258220..cd5f5b896 100644
--- a/src/Text/Pandoc/Writers/Markdown/Inline.hs
+++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs
@@ -117,7 +117,7 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
 
 linkAttributes :: WriterOptions -> Attr -> Doc Text
 linkAttributes opts attr =
-  if isEnabled Ext_link_attributes opts && attr /= nullAttr
+  if (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr
      then attrsToMarkdown attr
      else empty
 
@@ -394,13 +394,15 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do
                    then "&ldquo;" <> contents <> "&rdquo;"
                    else "“" <> contents <> "”"
 inlineToMarkdown opts (Code attr str) = do
-  let tickGroups = filter (T.any (== '`')) $ T.group str
-  let longest    = maybe 0 maximum $ nonEmpty $ map T.length tickGroups
-  let marker     = T.replicate (longest + 1) "`"
-  let spacer     = if longest == 0 then "" else " "
-  let attrs      = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr
-                      then attrsToMarkdown attr
-                      else empty
+  let tickGroups   = filter (T.any (== '`')) $ T.group str
+  let longest      = maybe 0 maximum $ nonEmpty $ map T.length tickGroups
+  let marker       = T.replicate (longest + 1) "`"
+  let spacer       = if longest == 0 then "" else " "
+  let attrsEnabled = isEnabled Ext_inline_code_attributes opts ||
+                     isEnabled Ext_attributes opts
+  let attrs        = if attrsEnabled && attr /= nullAttr
+                        then attrsToMarkdown attr
+                        else empty
   variant <- asks envVariant
   case variant of
      PlainText -> return $ literal str
@@ -559,7 +561,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
                            else "[" <> reftext <> "]"
            in  return $ first <> second
       | isEnabled Ext_raw_html opts
-      , not (isEnabled Ext_link_attributes opts)
+      , not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts)
       , attr /= nullAttr -> -- use raw HTML to render attributes
           literal . T.strip <$>
             writeHtml5String opts{ writerTemplate = Nothing }
@@ -569,7 +571,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do
          linkAttributes opts attr
 inlineToMarkdown opts img@(Image attr alternate (source, tit))
   | isEnabled Ext_raw_html opts &&
-    not (isEnabled Ext_link_attributes opts) &&
+    not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) &&
     attr /= nullAttr = -- use raw HTML
     literal . T.strip <$>
       writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]])
-- 
cgit v1.2.3


From c550bf8482310dfbcb20694e7bc969d19acc5f7d Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Sat, 5 Jun 2021 16:34:27 +0200
Subject: CommonMark writer: do not use simple class for fenced-divs

In https://github.com/jgm/pandoc/pull/7242, we introduced a simple attribute style for for code blocks and fenced divs with a single class but turns out the CommonMark extension does not support it for fenced divs.

https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/fenced_divs.md
---
 src/Text/Pandoc/Writers/Markdown.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 6316d9419..b16c71358 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -322,9 +322,12 @@ blockToMarkdown' opts (Div attrs ils) = do
     case () of
          _ | isEnabled Ext_fenced_divs opts &&
              attrs /= nullAttr ->
-                nowrap (literal ":::" <+> classOrAttrsToMarkdown attrs) $$
-                chomp contents $$
-                literal ":::" <> blankline
+                let attrsToMd = if variant == Commonmark
+                                then attrsToMarkdown
+                                else classOrAttrsToMarkdown
+                in nowrap (literal ":::" <+> attrsToMd attrs) $$
+                   chomp contents $$
+                   literal ":::" <> blankline
            | isEnabled Ext_native_divs opts ||
              (isEnabled Ext_raw_html opts &&
               (variant == Commonmark ||
-- 
cgit v1.2.3


From 21cc52abe33997ea2f2c539f10d26684b7633bc0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 5 Jun 2021 14:13:58 -0600
Subject: LaTeX writer: Fix regression in table header position.

In recent versions the table headers were no longer bottom-aligned
(if more than one line).  This patch fixes that by using minipages
for table headers in non-simple tables.

Closes #7347.
---
 src/Text/Pandoc/Writers/LaTeX/Table.hs | 13 ++++++++++---
 test/command/5367.md                   |  8 ++++++--
 test/tables.latex                      | 30 +++++++++++++++++++++++++++---
 test/tables/nordics.latex              | 26 ++++++++++++++++++++++----
 test/tables/planets.latex              |  8 ++++----
 test/tables/students.latex             | 12 ++++++++++--
 6 files changed, 79 insertions(+), 18 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 16f63314b..8dc7d1162 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.Writers.LaTeX.Table
   ) where
 import Control.Monad.State.Strict
 import Data.List (intersperse)
+import qualified Data.List.NonEmpty as NonEmpty
 import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -243,8 +244,13 @@ cellToLaTeX :: PandocMonad m
             -> Ann.Cell
             -> LW m (Doc Text)
 cellToLaTeX blockListToLaTeX celltype annotatedCell = do
-  let (Ann.Cell _specs _colnum cell) = annotatedCell
-  let (Cell _attr align rowspan colspan blocks) = cell
+  let (Ann.Cell specs _colnum cell) = annotatedCell
+  let hasWidths = snd (NonEmpty.head specs) /= ColWidthDefault
+  let specAlign = fst (NonEmpty.head specs)
+  let (Cell _attr align' rowspan colspan blocks) = cell
+  let align = case align' of
+                AlignDefault -> specAlign
+                _            -> align'
   beamer <- gets stBeamer
   externalNotes <- gets stExternalNotes
   inMinipage <- gets stInMinipage
@@ -256,7 +262,7 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do
         Plain{} -> True
         _       -> False
   result <-
-    if all isPlainOrPara blocks
+    if not hasWidths || (celltype /= HeaderCell && all isPlainOrPara blocks)
        then
          blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
        else do
@@ -290,3 +296,4 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do
 data CellType
   = HeaderCell
   | BodyCell
+  deriving Eq
diff --git a/test/command/5367.md b/test/command/5367.md
index 2d3a5e52e..a67011c2f 100644
--- a/test/command/5367.md
+++ b/test/command/5367.md
@@ -24,11 +24,15 @@ hello\footnote{doc footnote}
   >{\centering\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{0.17}}@{}}
 \caption[Sample table.]{Sample table.\footnote{caption footnote}}\tabularnewline
 \toprule
-Fruit\footnote{header footnote} \\
+\begin{minipage}[b]{\linewidth}\centering
+Fruit\footnote{header footnote}
+\end{minipage} \\
 \midrule
 \endfirsthead
 \toprule
-Fruit{} \\
+\begin{minipage}[b]{\linewidth}\centering
+Fruit{}
+\end{minipage} \\
 \midrule
 \endhead
 Bans\footnote{table cell footnote} \\
diff --git a/test/tables.latex b/test/tables.latex
index afa14d845..9d111fa7a 100644
--- a/test/tables.latex
+++ b/test/tables.latex
@@ -56,11 +56,27 @@ Multiline table with caption:
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}}
 \caption{Here's the caption. It may span multiple lines.}\tabularnewline
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default aligned \\
+\begin{minipage}[b]{\linewidth}\centering
+Centered Header
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Left Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft
+Right Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Default aligned
+\end{minipage} \\
 \midrule
 \endfirsthead
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default aligned \\
+\begin{minipage}[b]{\linewidth}\centering
+Centered Header
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Left Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft
+Right Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Default aligned
+\end{minipage} \\
 \midrule
 \endhead
 First & row & 12.0 & Example of a row that spans multiple lines. \\
@@ -76,7 +92,15 @@ Multiline table without caption:
   >{\raggedleft\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.16}}
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}}
 \toprule
-Centered Header & Left Aligned & Right Aligned & Default aligned \\
+\begin{minipage}[b]{\linewidth}\centering
+Centered Header
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Left Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft
+Right Aligned
+\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright
+Default aligned
+\end{minipage} \\
 \midrule
 \endhead
 First & row & 12.0 & Example of a row that spans multiple lines. \\
diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex
index 1b5929bad..1dcac7319 100644
--- a/test/tables/nordics.latex
+++ b/test/tables/nordics.latex
@@ -5,13 +5,31 @@
   >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}@{}}
 \caption{States belonging to the \emph{Nordics.}}\tabularnewline
 \toprule
-Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
-\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\
+\begin{minipage}[b]{\linewidth}\centering
+Name
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Capital
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Population\\
+(in 2018)
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Area\\
+(in km\textsuperscript{2})
+\end{minipage} \\
 \midrule
 \endfirsthead
 \toprule
-Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} &
-\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\
+\begin{minipage}[b]{\linewidth}\centering
+Name
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Capital
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Population\\
+(in 2018)
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Area\\
+(in km\textsuperscript{2})
+\end{minipage} \\
 \midrule
 \endhead
 Denmark & Copenhagen & 5,809,502 & 43,094 \\
diff --git a/test/tables/planets.latex b/test/tables/planets.latex
index 8238c43f3..b22c3adeb 100644
--- a/test/tables/planets.latex
+++ b/test/tables/planets.latex
@@ -1,18 +1,18 @@
 \begin{longtable}[]{@{}cclrrrrrrrrl@{}}
 \caption{Data about the planets of our solar system.}\tabularnewline
 \toprule
-\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
+\multicolumn{2}{c}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
 (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
 (10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\
 \midrule
 \endfirsthead
 \toprule
-\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
+\multicolumn{2}{c}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density
 (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun
 (10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\
 \midrule
 \endhead
-\multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 &
+\multicolumn{2}{c}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 &
 4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the Sun \\
 & & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 & \\
 & & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our world \\
@@ -24,7 +24,7 @@ planet \\
 & \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 &
 2872.5 & -195 & 27 & \\
 & & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 & \\
-\multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 &
+\multicolumn{2}{c}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 &
 153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\
 \bottomrule
 \end{longtable}
diff --git a/test/tables/students.latex b/test/tables/students.latex
index 87efb0851..3d4d287d9 100644
--- a/test/tables/students.latex
+++ b/test/tables/students.latex
@@ -3,11 +3,19 @@
   >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}}
 \caption{List of Students}\tabularnewline
 \toprule
-Student ID & Name \\
+\begin{minipage}[b]{\linewidth}\centering
+Student ID
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Name
+\end{minipage} \\
 \midrule
 \endfirsthead
 \toprule
-Student ID & Name \\
+\begin{minipage}[b]{\linewidth}\centering
+Student ID
+\end{minipage} & \begin{minipage}[b]{\linewidth}\centering
+Name
+\end{minipage} \\
 \midrule
 \endhead
 \multicolumn{2}{l}{Computer Science} \\
-- 
cgit v1.2.3


From 76e5f047b0ef4c293687f9ddce62d601b23058a9 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 8 Jun 2021 08:20:22 -0600
Subject: Citeproc: avoid duplicate classes and attributes on refs div.

---
 src/Text/Pandoc/Citeproc.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index de63aed1f..ad3b26c0f 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -49,7 +49,6 @@ import qualified Data.Text as T
 import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
 
-
 processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
   style <- getStyle (Pandoc meta bs)
@@ -499,7 +498,8 @@ insertRefs refkvs refclasses meta refs bs =
      put True
      -- refHeader isn't used if you have an explicit references div
      let cs' = ordNub $ cs ++ refclasses
-     return $ Div ("refs",cs' ++ refclasses,kvs ++ refkvs) (xs ++ refs)
+     let kvs' = ordNub $ kvs ++ refkvs
+     return $ Div ("refs",cs',kvs') (xs ++ refs)
    go x = return x
 
 refTitle :: Meta -> Maybe [Inline]
-- 
cgit v1.2.3


From 55bcd4b4fb1dced6c6e316db6cd117b52bbadee5 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 10 Jun 2021 18:26:53 +0200
Subject: Lua utils: fix handling of table headers in `from_simple_table`

Passing an empty list of header cells now results in an empty table
header.

Fixes: #7369
---
 src/Text/Pandoc/Lua/Module/Utils.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs
index 1b04021a7..3ec3afc26 100644
--- a/src/Text/Pandoc/Lua/Module/Utils.hs
+++ b/src/Text/Pandoc/Lua/Module/Utils.hs
@@ -146,7 +146,7 @@ from_simple_table (SimpleTable capt aligns widths head' body) = do
     nullAttr
     (Caption Nothing [Plain capt])
     (zipWith (\a w -> (a, toColWidth w)) aligns widths)
-    (TableHead nullAttr [blockListToRow head'])
+    (TableHead nullAttr [blockListToRow head' | not (null head') ])
     [TableBody nullAttr 0 [] $ map blockListToRow body]
     (TableFoot nullAttr [])
   return (NumResults 1)
-- 
cgit v1.2.3


From c7dd33d5aaa30f4c95e141b10e6d8fe0212edf1e Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Thu, 10 Jun 2021 18:34:20 +0200
Subject: Docx writer: fix handling of empty table headers

A table header which does not contain any cells is now treated as an
empty header.

Fixes: #7369
---
 src/Text/Pandoc/Writers/Docx/Table.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index 13cb31040..49917e315 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -116,8 +116,8 @@ cellGridToOpenXML :: PandocMonad m
                   -> [Alignment]
                   -> Part
                   -> WS m [Element]
-cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ _ rowAttrs) =
-  if null (indices rowAttrs)
+cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ cellArray _) =
+  if null (elems cellArray)
   then return mempty
   else mapM (rowToOpenXML blocksToOpenXML) $
        partToRows rowType aligns part
-- 
cgit v1.2.3


From aa79b3035c3343adf1bb41b37266049a65ab5da7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Jun 2021 16:36:54 -0700
Subject: T.P.MIME, extensionFromMimeType: add a few special cases.

When we do a reverse lookup in the MIME table, we just get the
last match, so when the same mime type is associated with several
different extensions, we sometimes got weird results, e.g. `.vs`
for `text/plain`.  These special cases help us get the most standard
extensions for mime types like `text/plain`.
---
 src/Text/Pandoc/MIME.hs | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 3d06e1579..77c7069e9 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -43,6 +43,16 @@ getMimeTypeDef :: FilePath -> MimeType
 getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType
 
 extensionFromMimeType :: MimeType -> Maybe T.Text
+-- few special cases, where there are multiple options:
+extensionFromMimeType "text/plain" = Just "txt"
+extensionFromMimeType "video/quicktime" = Just "mov"
+extensionFromMimeType "video/mpeg" = Just "mpeg"
+extensionFromMimeType "video/dv" = Just "dv"
+extensionFromMimeType "image/vnd.djvu" = Just "djvu"
+extensionFromMimeType "image/tiff" = Just "tiff"
+extensionFromMimeType "image/jpeg" = Just "jpg"
+extensionFromMimeType "application/xml" = Just "xml"
+extensionFromMimeType "application/ogg" = Just "ogg"
 extensionFromMimeType mimetype =
   M.lookup (T.takeWhile (/=';') mimetype) reverseMimeTypes
   -- note:  we just look up the basic mime type, dropping the content-encoding etc.
-- 
cgit v1.2.3


From 3776e828a83048697e5c64d9fb4bedc0145197dc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 10 Jun 2021 16:47:02 -0700
Subject: Fix MediaBag regressions.

With the 2.14 release `--extract-media` stopped working as before;
there could be mismatches between the paths in the rendered document and
the extracted media.

This patch makes several changes (while keeping the same API).

The `mediaPath` in 2.14 was always constructed from the SHA1 hash of
the media contents.  Now, we preserve the original path unless it's
an absolute path or contains `..` segments (in that case we use a path
based on the SHA1 hash of the contents).

When constructing a path from the SHA1 hash, we always use the
original extension, if there is one. Otherwise we look up an
appropriate extension for the mime type.

`mediaDirectory` and `mediaItems` now use the `mediaPath`, rather
than the mediabag key, for the first component of the tuple.
This makes more sense, I think, and fits with the documentation
of these functions; eventually, though, we should rework the API so that
`mediaItems` returns both the keys and the MediaItems.

Rewriting of source paths in `extractMedia` has been fixed.

`fillMediaBag` has been modified so that it doesn't modify
image paths (that was part of the problem in #7345).

We now do path normalization (e.g. `\` separators on Windows) only
in writing the media; the paths are left unchanged in the image
links (sensibly, since they might be URLs and not file paths).

These changes should restore the original behavior from before 2.14.

Closes #7345.
---
 MANUAL.txt                           | 12 +++++------
 src/Text/Pandoc/Class/IO.hs          | 41 ++++++++++++++++++------------------
 src/Text/Pandoc/Class/PandocMonad.hs | 17 ++++++---------
 src/Text/Pandoc/MediaBag.hs          | 25 ++++++++++++----------
 4 files changed, 47 insertions(+), 48 deletions(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index b3a1f95e2..ef569433a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -675,12 +675,12 @@ header when requesting a document from a URL:
 :   Extract images and other media contained in or linked from
     the source document to the path *DIR*, creating it if
     necessary, and adjust the images references in the document
-    so they point to the extracted files.  If the source format is
-    a binary container (docx, epub, or odt), the media is
-    extracted from the container and the original
-    filenames are used. Otherwise the media is read from the
-    file system or downloaded, and new filenames are constructed
-    based on SHA1 hashes of the contents.
+    so they point to the extracted files.  Media are downloaded,
+    read from the file system, or extracted from a binary
+    container (e.g. docx), as needed.  The original file paths
+    are used if they are relative paths not containing `..`.
+    Otherwise filenames are constructed from the SHA1 hash of
+    the contents.
 
 `--abbreviations=`*FILE*
 
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 6df39d4d0..169074860 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))
 import Text.Pandoc.Error (PandocError (..))
 import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
 import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory)
+import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)
 import Text.Pandoc.Walk (walk)
 import qualified Control.Exception as E
 import qualified Data.ByteString as B
@@ -200,31 +200,32 @@ alertIndent (l:ls) = do
 extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
 extractMedia dir d = do
   media <- getMediaBag
-  case [fp | (fp, _, _) <- mediaDirectory media] of
-    []  -> return d
-    fps -> do
-      mapM_ (writeMedia dir media) fps
-      return $ walk (adjustImagePath dir fps) d
+  let items = mediaItems media
+  if null items
+    then return d
+    else do
+      mapM_ (writeMedia dir) items
+      return $ walk (adjustImagePath dir media) d
 
 -- | Write the contents of a media bag to a path.
 writeMedia :: (PandocMonad m, MonadIO m)
-           => FilePath -> MediaBag -> FilePath
+           => FilePath
+           -> (FilePath, MimeType, BL.ByteString)
            -> m ()
-writeMedia dir mediabag subpath = do
-  let mbcontents = lookupMedia subpath mediabag
-  case mbcontents of
-       Nothing -> throwError $ PandocResourceNotFound $ pack subpath
-       Just item -> do
-         -- we normalize to get proper path separators for the platform
-         let fullpath = dir </> normalise (mediaPath item)
-         liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
-         logIOError $ BL.writeFile fullpath $ mediaContents item
+writeMedia dir (fp, _mt, bs) = do
+  -- we normalize to get proper path separators for the platform
+  let fullpath = normalise $ dir </> fp
+  liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+  logIOError $ BL.writeFile fullpath bs
 
 -- | If the given Inline element is an image with a @src@ path equal to
 -- one in the list of @paths@, then prepends @dir@ to the image source;
 -- returns the element unchanged otherwise.
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
-   | unpack src `elem` paths
-     = Image attr lab (pack (normalise $ dir </> unpack src), tit)
+adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline
+adjustImagePath dir mediabag (Image attr lab (src, tit)) =
+  case lookupMedia (T.unpack src) mediabag of
+    Nothing -> Image attr lab (src, tit)
+    Just item ->
+      let fullpath = dir </> mediaPath item
+      in  Image attr lab (T.pack fullpath, tit)
 adjustImagePath _ _ x = x
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 4eb80df29..439aec071 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d
         handleImage (Image attr lab (src, tit)) = catchError
           (do mediabag <- getMediaBag
               let fp = T.unpack src
-              src' <- T.pack <$> case lookupMedia fp mediabag of
-                        Just item -> return $ mediaPath item
-                        Nothing -> do
-                          (bs, mt) <- fetchItem src
-                          insertMedia fp mt (BL.fromStrict bs)
-                          mediabag' <- getMediaBag
-                          case lookupMedia fp mediabag' of
-                             Just item -> return $ mediaPath item
-                             Nothing -> throwError $ PandocSomeError $
-                               src <> " not successfully inserted into MediaBag"
-              return $ Image attr lab (src', tit))
+              case lookupMedia fp mediabag of
+                Just _ -> return ()
+                Nothing -> do
+                  (bs, mt) <- fetchItem src
+                  insertMedia fp mt (BL.fromStrict bs)
+              return $ Image attr lab (src, tit))
           (\e ->
               case e of
                 PandocResourceNotFound _ -> do
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index a65f315fc..06fba5632 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -71,16 +71,21 @@ insertMedia :: FilePath       -- ^ relative path and canonical name of resource
             -> MediaBag
             -> MediaBag
 insertMedia fp mbMime contents (MediaBag mediamap) =
-  MediaBag (M.insert (canonicalize fp) mediaItem mediamap)
-  where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <>
-                                 "." <> ext
+  MediaBag (M.insert fp' mediaItem mediamap)
+  where mediaItem = MediaItem{ mediaPath = newpath
                              , mediaContents = contents
                              , mediaMimeType = mt }
+        fp' = canonicalize fp
+        newpath = if isRelative fp && ".." `notElem` splitPath fp
+                     then T.unpack fp'
+                     else showDigest (sha1 contents) <> "." <> ext
         fallback = case takeExtension fp of
                         ".gz" -> getMimeTypeDef $ dropExtension fp
                         _     -> getMimeTypeDef fp
         mt = fromMaybe fallback mbMime
-        ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt
+        ext = case takeExtension fp of
+               '.':e  -> e
+               _      -> maybe "" T.unpack $ extensionFromMimeType mt
 
 
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
@@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap
 -- | Get a list of the file paths stored in a 'MediaBag', with
 -- their corresponding mime types and the lengths in bytes of the contents.
 mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
-mediaDirectory (MediaBag mediamap) =
-  M.foldrWithKey (\fp item ->
-      ((T.unpack fp, mediaMimeType item,
-        fromIntegral (BL.length (mediaContents item))):)) [] mediamap
+mediaDirectory mediabag =
+  map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs)))
+    (mediaItems mediabag)
 
 mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
 mediaItems (MediaBag mediamap) =
-  M.foldrWithKey (\fp item ->
-      ((T.unpack fp, mediaMimeType item, mediaContents item):))
-     [] mediamap
+  map (\item -> (mediaPath item, mediaMimeType item, mediaContents item))
+      (M.elems mediamap)
-- 
cgit v1.2.3


From b0cd6c622494666add6bdd7674ec5b7791bc83d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 12 Jun 2021 10:16:44 -0700
Subject: Fix regression in citeproc processing.

If inline references are used (in the metadata `references` field),
we should still only include in the bibliography items that are
actually cited -- unless `nocite` is used.

Closes #7376.
---
 src/Text/Pandoc/Citeproc.hs         |  4 +++-
 test/command/7376.md                | 16 ++++++++++++++++
 test/command/pandoc-citeproc-356.md |  9 ++-------
 3 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 test/command/7376.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index ad3b26c0f..a5b26c9b4 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -194,7 +194,9 @@ getReferences mblocale (Pandoc meta bs) = do
                   then const True
                   else (`Set.member` citeIds)
   let inlineRefs = case lookupMeta "references" meta of
-                    Just (MetaList rs) -> mapMaybe metaValueToReference rs
+                    Just (MetaList rs) ->
+                      filter (idpred . unItemId . referenceId)
+                         $  mapMaybe metaValueToReference rs
                     _                  -> []
   externalRefs <- case lookupMeta "bibliography" meta of
                     Just (MetaList xs) ->
diff --git a/test/command/7376.md b/test/command/7376.md
new file mode 100644
index 000000000..229c61cfb
--- /dev/null
+++ b/test/command/7376.md
@@ -0,0 +1,16 @@
+```
+% pandoc --citeproc -t plain
+---
+references:
+- id: item1
+  type: book
+  author:
+    - family: Doe
+      given: Jane
+  issued: 2020
+  title: The title
+...
+^D
+
+
+```
diff --git a/test/command/pandoc-citeproc-356.md b/test/command/pandoc-citeproc-356.md
index 4463ef63f..b4f998dae 100644
--- a/test/command/pandoc-citeproc-356.md
+++ b/test/command/pandoc-citeproc-356.md
@@ -15,11 +15,6 @@ references:
 
 [@bar]
 ^D
-(Alice 2042)
-
-::: {#refs .references .csl-bib-body .hanging-indent}
-::: {#ref-foo .csl-entry}
-Alice. 2042.
-:::
-:::
+[WARNING] Citeproc: citation bar not found
+(**bar?**)
 ```
-- 
cgit v1.2.3


From ea53a1dc5c68e6a1a2e450422147a66fd2aa9efd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 12 Jun 2021 10:20:19 -0700
Subject: Markdown writer: allow `pipe_tables` to be disabled for commonmark...

(commonmark_x, gfm).  Closes #7375.
---
 src/Text/Pandoc/Writers/Markdown.hs | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index b16c71358..425ea07ca 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -75,7 +75,6 @@ writeCommonMark opts document =
                    -- we set them here so that escapeText will behave
                    -- properly.
                    enableExtension Ext_all_symbols_escapable $
-                   enableExtension Ext_pipe_tables $
                    enableExtension Ext_intraword_underscores $
                      writerExtensions opts }
 
-- 
cgit v1.2.3


From cfa26e3ca0346397f41af9aed5b4cd1d86be1220 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 12 Jun 2021 13:56:09 -0700
Subject: Docx reader: handle absolute URIs in Relationship Target.

Closes #7374.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index aaa8f4ad0..dbb16a821 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -479,20 +479,26 @@ filePathToRelType path docXmlPath =
   then Just InDocument
   else Nothing
 
-relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
-relElemToRelationship relType element | qName (elName element) == "Relationship" =
+relElemToRelationship :: FilePath -> DocumentLocation -> Element
+                      -> Maybe Relationship
+relElemToRelationship fp relType element | qName (elName element) == "Relationship" =
   do
     relId <- findAttr (QName "Id" Nothing Nothing) element
     target <- findAttr (QName "Target" Nothing Nothing) element
-    return $ Relationship relType relId target
-relElemToRelationship _ _ = Nothing
+    -- target may be relative (media/image1.jpeg) or absolute
+    -- (/word/media/image1.jpeg); we need to relativize it (see #7374)
+    let frontOfFp = T.pack $ takeWhile (/= '_') fp
+    let target' = fromMaybe target $
+           T.stripPrefix frontOfFp $ T.dropWhile (== '/') target
+    return $ Relationship relType relId target'
+relElemToRelationship _ _ _ = Nothing
 
 filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
   , Just relElems <- parseXMLFromEntry entry =
-  mapMaybe (relElemToRelationship relType) $ elChildren relElems
+  mapMaybe (relElemToRelationship fp relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
 archiveToRelationships :: Archive -> FilePath -> [Relationship]
-- 
cgit v1.2.3


From 3fb5499dd638dae7156fba63ba5c1522bed5e46d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 18 Jun 2021 12:06:20 -0700
Subject: insertMediaBag: ensure we get sane mediaPath for URLs.

Long URLs cannot be treated as mediaPaths, but System.FilePath's
`isRelative` often returns True for them.  So we add a check
for an absolute URL.  We also ensure that extensions are derived
only from the path portion of URLs (previously a following query
was being included).

Closes #7391.
---
 src/Text/Pandoc/MediaBag.hs | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index 06fba5632..098e484ee 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -26,13 +26,14 @@ module Text.Pandoc.MediaBag (
 import qualified Data.ByteString.Lazy as BL
 import Data.Data (Data)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isNothing)
 import Data.Typeable (Typeable)
 import System.FilePath
 import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Digest.Pure.SHA (sha1, showDigest)
+import Network.URI (URI (..), parseURI)
 
 data MediaItem =
   MediaItem
@@ -76,16 +77,20 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
                              , mediaContents = contents
                              , mediaMimeType = mt }
         fp' = canonicalize fp
-        newpath = if isRelative fp && ".." `notElem` splitPath fp
+        uri = parseURI fp
+        newpath = if isRelative fp
+                       && isNothing uri
+                       && ".." `notElem` splitPath fp
                      then T.unpack fp'
                      else showDigest (sha1 contents) <> "." <> ext
         fallback = case takeExtension fp of
                         ".gz" -> getMimeTypeDef $ dropExtension fp
                         _     -> getMimeTypeDef fp
         mt = fromMaybe fallback mbMime
-        ext = case takeExtension fp of
-               '.':e  -> e
-               _      -> maybe "" T.unpack $ extensionFromMimeType mt
+        path = maybe fp uriPath uri
+        ext = case takeExtension path of
+                '.':e -> e
+                _ -> maybe "" T.unpack $ extensionFromMimeType mt
 
 
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
-- 
cgit v1.2.3


From 82ad855f38b8fa8dc1cbfc14fa294dfd5f9f02ab Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 08:49:00 -0700
Subject: Markdown writer: Fix regression in code blocks with attributes.

Code blocks with a single class but nonempty attributes
were having attributes drop as a result of #7242.

Closes #7397.
---
 src/Text/Pandoc/Writers/Markdown.hs |  6 +++---
 test/command/7397.md                | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)
 create mode 100644 test/command/7397.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 425ea07ca..b13ab57ee 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -256,10 +256,10 @@ noteToMarkdown opts num blocks = do
               then hang (writerTabStop opts) (marker <> spacer) contents
               else marker <> spacer <> contents
 
--- | (Code) blocks with a single class can just use it standalone,
--- no need to bother with curly braces.
+-- | (Code) blocks with a single class and no attributes can just use it
+-- standalone, no need to bother with curly braces.
 classOrAttrsToMarkdown :: Attr -> Doc Text
-classOrAttrsToMarkdown ("",[cls],_) = literal cls
+classOrAttrsToMarkdown ("",[cls],[]) = literal cls
 classOrAttrsToMarkdown attrs = attrsToMarkdown attrs
 
 -- | Ordered list start parser for use in Para below.
diff --git a/test/command/7397.md b/test/command/7397.md
new file mode 100644
index 000000000..ca8b6a482
--- /dev/null
+++ b/test/command/7397.md
@@ -0,0 +1,14 @@
+```
+% pandoc -t markdown
+~~~~ { .haskell startFrom="100"}
+qsort []     = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+               qsort (filter (>= x) xs)
+~~~~
+^D
+``` {.haskell startFrom="100"}
+qsort []     = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+               qsort (filter (>= x) xs)
+```
+```
-- 
cgit v1.2.3


From 14b2eb2aeb21b3dee7d07c7348ebd2c586d6a866 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 16:40:52 -0700
Subject: reveal.js writer: better handling of options.

Previously it was impossible to specify false values for
options that default to true; setting the option to false
just caused the portion of the template setting the option
to be omitted.

Now we prepopulate all the variables with their default
values, including them unconditionally and allowing them
to be overridden.
---
 data/templates/default.revealjs | 198 ++++++++++++++--------------------------
 src/Text/Pandoc/Writers/HTML.hs |  50 ++++++++++
 2 files changed, 120 insertions(+), 128 deletions(-)

(limited to 'src')

diff --git a/data/templates/default.revealjs b/data/templates/default.revealjs
index 8a77674dd..203983522 100644
--- a/data/templates/default.revealjs
+++ b/data/templates/default.revealjs
@@ -90,256 +90,198 @@ $endif$
       // Full list of configuration options available at:
       // https://revealjs.com/config/
       Reveal.initialize({
-$if(center)$
-        // Determines whether slide content should be vertically centered
-        center: $center$,
-$endif$      
-$if(controls)$
         // Display controls in the bottom right corner
         controls: $controls$,
-$endif$
-$if(controlsTutorial)$
+
         // Help the user learn the controls by providing hints, for example by
         // bouncing the down arrow when they first encounter a vertical slide
         controlsTutorial: $controlsTutorial$,
-$endif$
-$if(controlsLayout)$
+
         // Determines where controls appear, "edges" or "bottom-right"
         controlsLayout: '$controlsLayout$',
-$endif$
-$if(controlsBackArrows)$
+
         // Visibility rule for backwards navigation arrows; "faded", "hidden"
         // or "visible"
         controlsBackArrows: '$controlsBackArrows$',
-$endif$
-$if(progress)$
+
         // Display a presentation progress bar
         progress: $progress$,
-$endif$
-$if(slideNumber)$
+
         // Display the page number of the current slide
         slideNumber: $slideNumber$,
-$endif$
-$if(showSlideNumber)$
+
         // 'all', 'print', or 'speaker'
         showSlideNumber: '$showSlideNumber$',
-$endif$
+
         // Add the current slide number to the URL hash so that reloading the
         // page/copying the URL will return you to the same slide
-        hash: $if(hash)$$hash$$else$true$endif$,
-$if(hashOneBasedIndex)$
+        hash: $hash$,
+
         // Start with 1 for the hash rather than 0
         hashOneBasedIndex: $hashOneBasedIndex$,
-$endif$
-$if(history)$
+
+        // Flags if we should monitor the hash and change slides accordingly
+        respondToHashChanges: $respondToHashChanges$,
+
         // Push each slide change to the browser history
         history: $history$,
-$endif$
-$if(keyboard)$
+
         // Enable keyboard shortcuts for navigation
         keyboard: $keyboard$,
-$endif$
-$if(overview)$
+
         // Enable the slide overview mode
         overview: $overview$,
-$endif$
-$if(center)$
+
+        // Disables the default reveal.js slide layout (scaling and centering)
+        // so that you can use custom CSS layout
+        disableLayout: false,
+
         // Vertical centering of slides
         center: $center$,
-$endif$
-$if(touch)$
+
         // Enables touch navigation on devices with touch input
         touch: $touch$,
-$endif$
-$if(loop)$
+
         // Loop the presentation
         loop: $loop$,
-$endif$
-$if(rtl)$
+
         // Change the presentation direction to be RTL
         rtl: $rtl$,
-$endif$
-$if(navigationMode)$
+
         // see https://revealjs.com/vertical-slides/#navigation-mode
         navigationMode: '$navigationMode$',
-$endif$
-$if(shuffle)$
+
         // Randomizes the order of slides each time the presentation loads
         shuffle: $shuffle$,
-$endif$
-$if(fragments)$
+
         // Turns fragments on and off globally
         fragments: $fragments$,
-$endif$
-$if(fragmentInURL)$
+
         // Flags whether to include the current fragment in the URL,
         // so that reloading brings you to the same fragment position
         fragmentInURL: $fragmentInURL$,
-$endif$
-$if(embedded)$
+
         // Flags if the presentation is running in an embedded mode,
         // i.e. contained within a limited portion of the screen
         embedded: $embedded$,
-$endif$
-$if(help)$
+
         // Flags if we should show a help overlay when the questionmark
         // key is pressed
         help: $help$,
-$endif$
-$if(pause)$
-       // Flags if it should be possible to pause the presentation (blackout)
-       pause: $pause$,
-$endif$
-$if(showNotes)$
+
+        // Flags if it should be possible to pause the presentation (blackout)
+        pause: $pause$,
+
         // Flags if speaker notes should be visible to all viewers
         showNotes: $showNotes$,
-$endif$
-$if(autoPlayMedia)$
-        // Global override for autoplaying embedded media (video/audio/iframe)
-        // - null: Media will only autoplay if data-autoplay is present
-        // - true: All media will autoplay, regardless of individual setting
-        // - false: No media will autoplay, regardless of individual setting
+
+        // Global override for autoplaying embedded media (null/true/false)
         autoPlayMedia: $autoPlayMedia$,
-$endif$
-$if(preloadIframes)$
-        // Global override for preloading lazy-loaded iframes
-        // - null: Iframes with data-src AND data-preload will be loaded when within
-        //   the viewDistance, iframes with only data-src will be loaded when visible
-        // - true: All iframes with data-src will be loaded when within the viewDistance
-        // - false: All iframes with data-src will be loaded only when visible
+
+        // Global override for preloading lazy-loaded iframes (null/true/false)
         preloadIframes: $preloadIframes$,
-$endif$
-$if(autoSlide)$
+
         // Number of milliseconds between automatically proceeding to the
         // next slide, disabled when set to 0, this value can be overwritten
         // by using a data-autoslide attribute on your slides
         autoSlide: $autoSlide$,
-$endif$
-$if(autoSlideStoppable)$
+
         // Stop auto-sliding after user input
         autoSlideStoppable: $autoSlideStoppable$,
-$endif$
-$if(autoSlideMethod)$
+
         // Use this method for navigation when auto-sliding
         autoSlideMethod: $autoSlideMethod$,
-$endif$
-$if(defaultTiming)$
+
         // Specify the average time in seconds that you think you will spend
         // presenting each slide. This is used to show a pacing timer in the
         // speaker view
         defaultTiming: $defaultTiming$,
-$endif$
-$if(totalTime)$
-        // Specify the total time in seconds that is available to
-        // present.  If this is set to a nonzero value, the pacing
-        // timer will work out the time available for each slide,
-        // instead of using the defaultTiming value
-        totalTime: $totalTime$,
-$endif$
-$if(minimumTimePerSlide)$
-        // Specify the minimum amount of time you want to allot to
-        // each slide, if using the totalTime calculation method.  If
-        // the automated time allocation causes slide pacing to fall
-        // below this threshold, then you will see an alert in the
-        // speaker notes window
-        minimumTimePerSlide: $minimumTimePerSlide$,
-$endif$
-$if(mouseWheel)$
+
         // Enable slide navigation via mouse wheel
         mouseWheel: $mouseWheel$,
-$endif$
-$if(rollingLinks)$
-        // Apply a 3D roll to links on hover
-        rollingLinks: $rollingLinks$,
-$endif$
-$if(hideInactiveCursor)$
+
+        // The display mode that will be used to show slides
+        display: '$display$',
+
         // Hide cursor if inactive
         hideInactiveCursor: $hideInactiveCursor$,
-$endif$
-$if(hideCursorTime)$
+
         // Time before the cursor is hidden (in ms)
         hideCursorTime: $hideCursorTime$,
-$endif$
-$if(hideAddressBar)$
-        // Hides the address bar on mobile devices
-        hideAddressBar: $hideAddressBar$,
-$endif$
-$if(previewLinks)$
+
         // Opens links in an iframe preview overlay
         previewLinks: $previewLinks$,
-$endif$
-$if(transition)$
-        // Transition style
-        transition: '$transition$', // none/fade/slide/convex/concave/zoom
-$endif$
-$if(transitionSpeed)$
-        // Transition speed
-        transitionSpeed: '$transitionSpeed$', // default/fast/slow
-$endif$
-$if(backgroundTransition)$
+
+        // Transition style (none/fade/slide/convex/concave/zoom)
+        transition: '$transition$',
+
+        // Transition speed (default/fast/slow)
+        transitionSpeed: '$transitionSpeed$',
+
         // Transition style for full page slide backgrounds
-        backgroundTransition: '$backgroundTransition$', // none/fade/slide/convex/concave/zoom
-$endif$
-$if(viewDistance)$
+        // (none/fade/slide/convex/concave/zoom)
+        backgroundTransition: '$backgroundTransition$',
+
         // Number of slides away from the current that are visible
         viewDistance: $viewDistance$,
-$endif$
-$if(mobileViewDistance)$
+
         // Number of slides away from the current that are visible on mobile
         // devices. It is advisable to set this to a lower number than
         // viewDistance in order to save resources.
         mobileViewDistance: $mobileViewDistance$,
-$endif$
 $if(parallaxBackgroundImage)$
+
         // Parallax background image
         parallaxBackgroundImage: '$parallaxBackgroundImage$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'"
 $else$
 $if(background-image)$
+
        // Parallax background image
        parallaxBackgroundImage: '$background-image$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'"
 $endif$
 $endif$
 $if(parallaxBackgroundSize)$
+
         // Parallax background size
         parallaxBackgroundSize: '$parallaxBackgroundSize$', // CSS syntax, e.g. "2100px 900px"
 $endif$
 $if(parallaxBackgroundHorizontal)$
+
         // Amount to move parallax background (horizontal and vertical) on slide change
         // Number, e.g. 100
         parallaxBackgroundHorizontal: $parallaxBackgroundHorizontal$,
 $endif$
 $if(parallaxBackgroundVertical)$
+
         parallaxBackgroundVertical: $parallaxBackgroundVertical$,
 $endif$
 $if(width)$
+
         // The "normal" size of the presentation, aspect ratio will be preserved
         // when the presentation is scaled to fit different resolutions. Can be
         // specified using percentage units.
         width: $width$,
 $endif$
 $if(height)$
+
         height: $height$,
 $endif$
 $if(margin)$
+
         // Factor of the display size that should remain empty around the content
         margin: $margin$,
 $endif$
 $if(minScale)$
+
         // Bounds for smallest/largest possible scale to apply to content
         minScale: $minScale$,
 $endif$
 $if(maxScale)$
+
         maxScale: $maxScale$,
 $endif$
-$if(zoomKey)$
-        // Modifier key used to click-zoom to part of the slide
-        zoomKey: '$zoomKey$',
-$endif$
-$if(display)$
-        // The display mode that will be used to show slides
-        display: '$display$',
-$endif$
 $if(mathjax)$
+
         math: {
           mathjax: '$mathjaxurl$',
           config: 'TeX-AMS_HTML-full',
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b1dd9a659..4d513df3b 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -317,6 +317,10 @@ pandocToHtml opts (Pandoc meta blocks) = do
                           | otherwise -> mempty
                     Nothing -> mempty
   let mCss :: Maybe [Text] = lookupContext "css" metadata
+  let true :: Text
+      true = "true"
+  let false :: Text
+      false = "false"
   let context =   (if stHighlighting st
                       then case writerHighlightStyle opts of
                                 Just sty -> defField "highlighting-css"
@@ -344,6 +348,52 @@ pandocToHtml opts (Pandoc meta blocks) = do
                         PlainMath -> defField "displaymath-css" True
                         WebTeX _  -> defField "displaymath-css" True
                         _         -> id) .
+                  (if slideVariant == RevealJsSlides
+                      then -- set boolean options explicitly, since
+                           -- template can't distinguish False/undefined
+                         defField "controls" true .
+                         defField "controlsTutorial" true .
+                         defField "controlsLayout" ("bottom-right" :: Text) .
+                         defField "controlsBackArrows" ("faded" :: Text) .
+                         defField "progress" true .
+                         defField "slideNumber" false .
+                         defField "showSlideNumber" ("all" :: Text) .
+                         defField "hashOneBasedIndex" false .
+                         defField "hash" false .
+                         defField "respondToHashChanges" true .
+                         defField "history" false .
+                         defField "keyboard" true .
+                         defField "overview" true .
+                         defField "disableLayout" false .
+                         defField "center" true .
+                         defField "touch" true .
+                         defField "loop" false .
+                         defField "rtl" false .
+                         defField "navigationMode" ("default" :: Text) .
+                         defField "shuffle" false .
+                         defField "fragments" true .
+                         defField "fragmentInURL" true .
+                         defField "embedded" false .
+                         defField "help" true .
+                         defField "pause" true .
+                         defField "showNotes" false .
+                         defField "autoPlayMedia" ("null" :: Text) .
+                         defField "preloadIframes" ("null" :: Text) .
+                         defField "autoSlide" ("0" :: Text) .
+                         defField "autoSlideStoppable" true .
+                         defField "autoSlideMethod" ("null" :: Text) .
+                         defField "defaultTiming" ("null" :: Text) .
+                         defField "mouseWheel" false .
+                         defField "display" ("block" :: Text) .
+                         defField "hideInactiveCursor" true .
+                         defField "hideCursorTime" ("5000" :: Text) .
+                         defField "previewLinks" false .
+                         defField "transition" ("slide" :: Text) .
+                         defField "transitionSpeed" ("default" :: Text) .
+                         defField "backgroundTransition" ("fade" :: Text) .
+                         defField "viewDistance" ("3" :: Text) .
+                         defField "mobileViewDistance" ("2" :: Text)
+                      else id) .
                   defField "document-css" (isNothing mCss && slideVariant == NoSlides) .
                   defField "quotes" (stQuotes st) .
                   -- for backwards compatibility we populate toc
-- 
cgit v1.2.3


From eee648447a05accb5680b5a1a6e69a7765af2db5 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 18:25:07 -0700
Subject: LaTeX writer: Use `\strut` instead of `~` before `\\` in empty line.

---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 978f94ea0..063e347fb 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do
 inlineToLaTeX LineBreak = do
   emptyLine <- gets stEmptyLine
   setEmptyLine True
-  return $ (if emptyLine then "~" else "") <> "\\\\" <> cr
+  return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr
 inlineToLaTeX SoftBreak = do
   wrapText <- gets (writerWrapText . stOptions)
   case wrapText of
-- 
cgit v1.2.3


From ed3974a254c3e0c4e7a34d5d25ddef90c25d2092 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 18:25:36 -0700
Subject: LaTeX writer: always use a minipage for cells with line breaks...

if width information is available.  Otherwise the way we treat them can
lead to content that overflows a cell.

Closes #7393.
---
 src/Text/Pandoc/Writers/LaTeX/Table.hs | 9 +++++++--
 test/command/7272.md                   | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index 8dc7d1162..abdc26649 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -26,7 +26,8 @@ import Text.DocLayout
   ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest
   , text, vcat, ($$) )
 import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow)
-import Text.Pandoc.Walk (walk)
+import Text.Pandoc.Walk (walk, query)
+import Data.Monoid (Any(..))
 import Text.Pandoc.Writers.LaTeX.Caption (getCaption)
 import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX)
 import Text.Pandoc.Writers.LaTeX.Types
@@ -261,8 +262,12 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do
         Para{}  -> True
         Plain{} -> True
         _       -> False
+  let hasLineBreak LineBreak = Any True
+      hasLineBreak _ = Any False
   result <-
-    if not hasWidths || (celltype /= HeaderCell && all isPlainOrPara blocks)
+    if not hasWidths || (celltype /= HeaderCell
+                           && all isPlainOrPara blocks
+                           && not (getAny (query hasLineBreak blocks)))
        then
          blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
        else do
diff --git a/test/command/7272.md b/test/command/7272.md
index d3a3b2137..3b9064c9c 100644
--- a/test/command/7272.md
+++ b/test/command/7272.md
@@ -18,7 +18,10 @@
   >{\raggedright\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{1.00}}@{}}
 \toprule
 \endhead
-{\vtop{\hbox{\strut  text}\hbox{\strut text2 }}} \\
+\begin{minipage}[t]{\linewidth}\raggedright
+{ text\\
+text2 }
+\end{minipage} \\
 \bottomrule
 \end{longtable}
 ```
-- 
cgit v1.2.3


From 0352f7845bfa2053797850c3639414978285b63e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 22:35:07 -0700
Subject: Improve emailAddress in Text.Pandoc.Parsing.

Previously the parser would accept characters in domains
that are illegal in domains, and this sometimes caused it
to gobble bits of the following text.

Closes #7398.

Note that this change, by itself, caused some txt2tag reader
tests to fail. txt2tags allows bare email addresses with
a following form query.  So, in addition to the change
to emailAddress, we modify the txt2tags parser so it can
still handle these cases.
---
 src/Text/Pandoc/Parsing.hs          |  7 +++----
 src/Text/Pandoc/Readers/Txt2Tags.hs | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 0bb794ba1..082d9565b 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -693,13 +693,12 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
        mailbox           = intercalate "." <$> (emailWord `sepBy1'` dot)
        domain            = intercalate "." <$> (subdomain `sepBy1'` dot)
        dot               = char '.'
-       subdomain         = many1 $ alphaNum <|> innerPunct
+       subdomain         = many1 $ alphaNum <|> innerPunct (=='-')
        -- this excludes some valid email addresses, since an
        -- email could contain e.g. '__', but gives better results
        -- for our purposes, when combined with markdown parsing:
-       innerPunct        = try (satisfy (\c -> isEmailPunct c || c == '@')
-                                 <* notFollowedBy space
-                                 <* notFollowedBy (satisfy isPunctuation))
+       innerPunct f      = try (satisfy f
+                                 <* notFollowedBy (satisfy (not . isAlphaNum)))
        -- technically an email address could begin with a symbol,
        -- but allowing this creates too many problems.
        -- See e.g. https://github.com/jgm/pandoc/issues/2940
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 6f92f0063..b5cf5a0f3 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -478,9 +478,29 @@ macro = try $ do
 -- raw URLs in text are automatically linked
 url :: T2T Inlines
 url = try $ do
-  (rawUrl, escapedUrl) <- try uri <|> emailAddress
+  (rawUrl, escapedUrl) <- try uri <|> emailAddress'
   return $ B.link rawUrl "" (B.str escapedUrl)
 
+emailAddress' :: T2T (Text, Text)
+emailAddress' = do
+  (base, mailURI) <- emailAddress
+  query <- option "" emailQuery
+  return (base <> query, mailURI <> query)
+
+emailQuery :: T2T Text
+emailQuery = do
+  char '?'
+  parts <- kv `sepBy1` (char '&')
+  return $ "?" <> T.intercalate "&" parts
+
+kv :: T2T Text
+kv = do
+  k <- T.pack <$> many1 alphaNum
+  char '='
+  let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum)
+  v <- T.pack <$> many1 vchar
+  return (k <> "=" <> v)
+
 uri :: T2T (Text, Text)
 uri = try $ do
   address <- t2tURI
-- 
cgit v1.2.3


From e2a7ecb5f73b12c8141ebf873a494652fc53babd Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 23:17:43 -0700
Subject: LaTeX writer: put a strut after a line break (`\\`).

This ensures that we have proper spacing before the next
line (which might e.g. be a table bottom border).
This gives better results in cases like test/command/7272.md.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 063e347fb..32d1bc2a5 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do
 inlineToLaTeX LineBreak = do
   emptyLine <- gets stEmptyLine
   setEmptyLine True
-  return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr
+  return $ (if emptyLine then "\\strut " else "") <> "\\\\ \\strut" <> cr
 inlineToLaTeX SoftBreak = do
   wrapText <- gets (writerWrapText . stOptions)
   case wrapText of
-- 
cgit v1.2.3


From 9867231779c32ddc2cce85d869de2ef586707244 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 23:19:40 -0700
Subject: Revert "LaTeX writer: put a strut after a line break (`\\`)."

This reverts commit e2a7ecb5f73b12c8141ebf873a494652fc53babd.
---
 src/Text/Pandoc/Writers/LaTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 32d1bc2a5..063e347fb 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do
 inlineToLaTeX LineBreak = do
   emptyLine <- gets stEmptyLine
   setEmptyLine True
-  return $ (if emptyLine then "\\strut " else "") <> "\\\\ \\strut" <> cr
+  return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr
 inlineToLaTeX SoftBreak = do
   wrapText <- gets (writerWrapText . stOptions)
   case wrapText of
-- 
cgit v1.2.3


From 8eed5b90d09a4a0c2592c92215fa96c69cf35234 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 21 Jun 2021 23:31:27 -0700
Subject: LaTeX writer: add strut at end of minipage if it contains...

line breaks.  Without them, the last line is shorter
than it should be, at least in some cases.
---
 src/Text/Pandoc/Writers/LaTeX/Table.hs | 7 +++++--
 test/command/7272.md                   | 2 +-
 test/tables/nordics.latex              | 8 ++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs
index abdc26649..27a8a0257 100644
--- a/src/Text/Pandoc/Writers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs
@@ -264,10 +264,11 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do
         _       -> False
   let hasLineBreak LineBreak = Any True
       hasLineBreak _ = Any False
+  let hasLineBreaks = getAny $ query hasLineBreak blocks
   result <-
     if not hasWidths || (celltype /= HeaderCell
                            && all isPlainOrPara blocks
-                           && not (getAny (query hasLineBreak blocks)))
+                           && not hasLineBreaks)
        then
          blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks
        else do
@@ -280,7 +281,9 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do
          let halign = literal $ alignCommand align
          return $ "\\begin{minipage}" <> valign <>
                   braces "\\linewidth" <> halign <> cr <>
-                  cellContents <> cr <>
+                  cellContents <>
+                  (if hasLineBreaks then "\\strut" else mempty)
+                  <> cr <>
                   "\\end{minipage}"
   modify $ \st -> st{ stExternalNotes = externalNotes }
   when (rowspan /= RowSpan 1) $
diff --git a/test/command/7272.md b/test/command/7272.md
index 3b9064c9c..d27b25143 100644
--- a/test/command/7272.md
+++ b/test/command/7272.md
@@ -20,7 +20,7 @@
 \endhead
 \begin{minipage}[t]{\linewidth}\raggedright
 { text\\
-text2 }
+text2 }\strut
 \end{minipage} \\
 \bottomrule
 \end{longtable}
diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex
index 1dcac7319..6f17a163e 100644
--- a/test/tables/nordics.latex
+++ b/test/tables/nordics.latex
@@ -11,10 +11,10 @@ Name
 Capital
 \end{minipage} & \begin{minipage}[b]{\linewidth}\centering
 Population\\
-(in 2018)
+(in 2018)\strut
 \end{minipage} & \begin{minipage}[b]{\linewidth}\centering
 Area\\
-(in km\textsuperscript{2})
+(in km\textsuperscript{2})\strut
 \end{minipage} \\
 \midrule
 \endfirsthead
@@ -25,10 +25,10 @@ Name
 Capital
 \end{minipage} & \begin{minipage}[b]{\linewidth}\centering
 Population\\
-(in 2018)
+(in 2018)\strut
 \end{minipage} & \begin{minipage}[b]{\linewidth}\centering
 Area\\
-(in km\textsuperscript{2})
+(in km\textsuperscript{2})\strut
 \end{minipage} \\
 \midrule
 \endhead
-- 
cgit v1.2.3


From 086790d986af35a5e6d68013a2c15ae10511db40 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 22 Jun 2021 09:49:24 -0700
Subject: Fix unneeded import

---
 src/Text/Pandoc/Parsing.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 082d9565b..09445622d 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -188,7 +188,7 @@ import Control.Monad.Identity
 import Control.Monad.Reader
     ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) )
 import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower,
-                  isPunctuation, isSpace, ord, toLower, toUpper)
+                  isSpace, ord, toLower, toUpper)
 import Data.Default ( Default(..) )
 import Data.Functor (($>))
 import Data.List (intercalate, transpose)
-- 
cgit v1.2.3


From 1b07997f4a6870650f20702ed6d962f9471e3d40 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 22 Jun 2021 09:55:50 -0700
Subject: Fix regression with comment-only YAML metadata blocks.

Closes #7400.
---
 src/Text/Pandoc/Readers/Metadata.hs | 3 +++
 test/command/7400.md                | 9 +++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 test/command/7400.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs
index 45eddf25a..cbc523b25 100644
--- a/src/Text/Pandoc/Readers/Metadata.hs
+++ b/src/Text/Pandoc/Readers/Metadata.hs
@@ -45,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do
        Right [] -> return . return $ mempty
        Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
                 -> return . return $ mempty
+       -- the following is what we get from a comment:
+       Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))]
+                -> return . return $ mempty
        Right _  -> Prelude.fail "expected YAML object"
        Left (yamlpos, err')
                 -> do pos <- getPosition
diff --git a/test/command/7400.md b/test/command/7400.md
new file mode 100644
index 000000000..d4be32d72
--- /dev/null
+++ b/test/command/7400.md
@@ -0,0 +1,9 @@
+```
+% pandoc -t native -s
+---
+# Comment only
+...
+^D
+Pandoc (Meta {unMeta = fromList []})
+[]
+```
-- 
cgit v1.2.3


From 235cdea629de95f5a6c40a043e8dff6b19a46d3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 23 Jun 2021 09:54:14 -0700
Subject: reveal.js writer: Go back to setting boolean values for variables.

In a previous commit we used strings because boolean False
wouldn't render as `false`. This is changed in the dev
version ofdoctemplates, so we can go back to the more
straightforward approach.
---
 src/Text/Pandoc/Writers/HTML.hs | 56 +++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 4d513df3b..b99b1a413 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -317,10 +317,6 @@ pandocToHtml opts (Pandoc meta blocks) = do
                           | otherwise -> mempty
                     Nothing -> mempty
   let mCss :: Maybe [Text] = lookupContext "css" metadata
-  let true :: Text
-      true = "true"
-  let false :: Text
-      false = "false"
   let context =   (if stHighlighting st
                       then case writerHighlightStyle opts of
                                 Just sty -> defField "highlighting-css"
@@ -351,43 +347,43 @@ pandocToHtml opts (Pandoc meta blocks) = do
                   (if slideVariant == RevealJsSlides
                       then -- set boolean options explicitly, since
                            -- template can't distinguish False/undefined
-                         defField "controls" true .
-                         defField "controlsTutorial" true .
+                         defField "controls" True .
+                         defField "controlsTutorial" True .
                          defField "controlsLayout" ("bottom-right" :: Text) .
                          defField "controlsBackArrows" ("faded" :: Text) .
-                         defField "progress" true .
-                         defField "slideNumber" false .
+                         defField "progress" True .
+                         defField "slideNumber" False .
                          defField "showSlideNumber" ("all" :: Text) .
-                         defField "hashOneBasedIndex" false .
-                         defField "hash" false .
-                         defField "respondToHashChanges" true .
-                         defField "history" false .
-                         defField "keyboard" true .
-                         defField "overview" true .
-                         defField "disableLayout" false .
-                         defField "center" true .
-                         defField "touch" true .
-                         defField "loop" false .
-                         defField "rtl" false .
+                         defField "hashOneBasedIndex" False .
+                         defField "hash" False .
+                         defField "respondToHashChanges" True .
+                         defField "history" False .
+                         defField "keyboard" True .
+                         defField "overview" True .
+                         defField "disableLayout" False .
+                         defField "center" True .
+                         defField "touch" True .
+                         defField "loop" False .
+                         defField "rtl" False .
                          defField "navigationMode" ("default" :: Text) .
-                         defField "shuffle" false .
-                         defField "fragments" true .
-                         defField "fragmentInURL" true .
-                         defField "embedded" false .
-                         defField "help" true .
-                         defField "pause" true .
-                         defField "showNotes" false .
+                         defField "shuffle" False .
+                         defField "fragments" True .
+                         defField "fragmentInURL" True .
+                         defField "embedded" False .
+                         defField "help" True .
+                         defField "pause" True .
+                         defField "showNotes" False .
                          defField "autoPlayMedia" ("null" :: Text) .
                          defField "preloadIframes" ("null" :: Text) .
                          defField "autoSlide" ("0" :: Text) .
-                         defField "autoSlideStoppable" true .
+                         defField "autoSlideStoppable" True .
                          defField "autoSlideMethod" ("null" :: Text) .
                          defField "defaultTiming" ("null" :: Text) .
-                         defField "mouseWheel" false .
+                         defField "mouseWheel" False .
                          defField "display" ("block" :: Text) .
-                         defField "hideInactiveCursor" true .
+                         defField "hideInactiveCursor" True .
                          defField "hideCursorTime" ("5000" :: Text) .
-                         defField "previewLinks" false .
+                         defField "previewLinks" False .
                          defField "transition" ("slide" :: Text) .
                          defField "transitionSpeed" ("default" :: Text) .
                          defField "backgroundTransition" ("fade" :: Text) .
-- 
cgit v1.2.3


From 4a7a0cff294c48d1acd690ea593f4ab2c817ec27 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 23 Jun 2021 11:39:50 -0700
Subject: ImageSize: Add Tiff constructor for ImageType.

[Minor API change]

This allows pandoc to get size information from tiff images.
Closes #7405.
---
 src/Text/Pandoc/ImageSize.hs                 | 6 +++++-
 src/Text/Pandoc/Writers/Docx.hs              | 1 +
 src/Text/Pandoc/Writers/Powerpoint/Output.hs | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index bb1aa6351..2b7d10611 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -57,7 +57,8 @@ import Codec.Picture (decodeImageWithMetadata)
 -- quick and dirty functions to get image sizes
 -- algorithms borrowed from wwwis.pl
 
-data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show
+data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf | Tiff
+                 deriving Show
 data Direction = Width | Height
 instance Show Direction where
   show Width  = "width"
@@ -100,6 +101,8 @@ imageType :: ByteString -> Maybe ImageType
 imageType img = case B.take 4 img of
                      "\x89\x50\x4e\x47" -> return Png
                      "\x47\x49\x46\x38" -> return Gif
+                     "\x49\x49\x2a\x00" -> return Tiff
+                     "\x4D\x4D\x00\x2a" -> return Tiff
                      "\xff\xd8\xff\xe0" -> return Jpeg  -- JFIF
                      "\xff\xd8\xff\xe1" -> return Jpeg  -- Exif
                      "%PDF"             -> return Pdf
@@ -124,6 +127,7 @@ imageSize opts img = checkDpi <$>
        Just Png  -> getSize img
        Just Gif  -> getSize img
        Just Jpeg -> getSize img
+       Just Tiff -> getSize img
        Just Svg  -> mbToEither "could not determine SVG size" $ svgSize opts img
        Just Eps  -> mbToEither "could not determine EPS size" $ epsSize img
        Just Pdf  -> mbToEither "could not determine PDF size" $ pdfSize img
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index e7a49ba02..d1065eb7d 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1289,6 +1289,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
             Just Eps  -> ".eps"
             Just Svg  -> ".svg"
             Just Emf  -> ".emf"
+            Just Tiff -> ".tiff"
             Nothing   -> ""
         imgpath = "media/" <> ident <> imgext
         mbMimeType = mt <|> getMimeType (T.unpack imgpath)
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index f2f54a91c..157810216 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -537,6 +537,7 @@ registerMedia fp caption = do
                  Just Eps  -> Just ".eps"
                  Just Svg  -> Just ".svg"
                  Just Emf  -> Just ".emf"
+                 Just Tiff -> Just ".tiff"
                  Nothing   -> Nothing
 
   let newGlobalId = fromMaybe (maxGlobalId + 1) (M.lookup fp globalIds)
-- 
cgit v1.2.3


From dd098d4e15090d12cc71301f91a159c5bfb29b50 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Jun 2021 11:27:37 -0700
Subject: Markdown writer: put space between Plain and following fenced Div.

Closes #4465.
---
 src/Text/Pandoc/Writers/Markdown.hs |  3 +++
 test/command/4465.md                | 15 +++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 test/command/4465.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index b13ab57ee..fda2bbcef 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -791,6 +791,9 @@ blockListToMarkdown opts blocks = do
            b1 : commentSep : fixBlocks (b2:bs)
       fixBlocks (Plain ils : bs@(RawBlock{}:_)) =
            Plain ils : fixBlocks bs
+      fixBlocks (Plain ils : bs@(Div{}:_))
+          | isEnabled Ext_fenced_divs opts =
+           Para ils : fixBlocks bs
       fixBlocks (Plain ils : bs) | inlist =
            Plain ils : fixBlocks bs
       fixBlocks (Plain ils : bs) =
diff --git a/test/command/4465.md b/test/command/4465.md
new file mode 100644
index 000000000..eaffcf7f3
--- /dev/null
+++ b/test/command/4465.md
@@ -0,0 +1,15 @@
+```
+% pandoc -f html -t markdown
+<ol>
+  <li>An ordered list can contain block-level elements ind html, it means that divs are also allowed.</li>
+  <li>Let's see the problem! <div class="example">This is an example.</div></li>
+</ol>
+^D
+1.  An ordered list can contain block-level elements ind html, it means
+    that divs are also allowed.
+2.  Let\'s see the problem!
+
+    ::: example
+    This is an example.
+    :::
+```
-- 
cgit v1.2.3


From 4262898fe9cc71abecffc1ffa165dd714c04407d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Jun 2021 13:28:02 -0700
Subject: Set proper initial source name in parsing BibTeX.

(For better error messages.)
---
 src/Text/Pandoc/Citeproc/BibTeX.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index b17240557..1fe4a2730 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -71,9 +71,11 @@ readBibtexString variant locale idpred contents = do
                       filter (\item -> idpred (identifier item) &&
                                         entryType item /= "xdata"))
            (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
-           "" (toSources contents) of
+           (initialSourceName sources) sources of
           Left err -> Left err
           Right xs -> return xs
+ where
+  sources = toSources contents
 
 -- | Write BibTeX or BibLaTeX given given a 'Reference'.
 writeBibtexString :: WriterOptions       -- ^ options (for writing LaTex)
-- 
cgit v1.2.3


From f045e59248116228093bfebf03ce9f7f4f0cc4ce Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Jun 2021 13:28:22 -0700
Subject: Text.Pandoc.Error: fix line calculations in reporting parsec errors.

Also remove a spurious initial newline in the error report.
---
 src/Text/Pandoc/Error.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 81eb41f85..9dee8356b 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -97,13 +97,13 @@ renderError e =
                 []  -> ""
                 ((pos,txt):_) ->
                   let ls = T.lines txt <> [""]
-                      ln = errLine - sourceLine pos
-                   in if length ls > ln - 1
+                      ln = (errLine - sourceLine pos) + 1
+                   in if length ls > ln && ln >= 1
                          then T.concat ["\n", ls !! (ln - 1)
                                        ,"\n", T.replicate (errColumn - 1) " "
                                        ,"^"]
                          else ""
-        in  "\nError at " <> tshow  err' <> errorInFile
+        in  "Error at " <> tshow  err' <> errorInFile
     PandocMakePDFError s -> s
     PandocOptionError s -> s
     PandocSyntaxMapError s -> s
-- 
cgit v1.2.3


From 97b0aa667cce07e6552d0abb4f93469ba14f4eb0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Jun 2021 13:34:12 -0700
Subject: Allow `$` characters in bibtex keys.

Closes #7409.
---
 src/Text/Pandoc/Citeproc/BibTeX.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 1fe4a2730..c178de6e9 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -868,7 +868,7 @@ fieldName = resolveAlias . T.toLower
 
 isBibtexKeyChar :: Char -> Bool
 isBibtexKeyChar c =
-  isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
+  isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char])
 
 bibItem :: BibParser Item
 bibItem = do
-- 
cgit v1.2.3


From 851d037b3eee4516fde50b81eb8a0fc9b2f1545b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 28 Jun 2021 22:41:14 -0700
Subject: Improve punctuation moving with `--citeproc`.

Previously, using `--citeproc` could cause punctuation to move in
quotes even when there aer no citations. This has been changed;
now, punctuation moving is limited to citations.

In addition, we only move footnotes around punctuation if the
style is a note style, even if `notes-after-punctuation` is `true`.
---
 src/Text/Pandoc/Citeproc.hs         | 29 +++++++++++++++--------------
 test/command/6890.md                |  4 ++--
 test/command/pandoc-citeproc-322.md |  2 +-
 3 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index a5b26c9b4..a2fca106a 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -90,21 +90,15 @@ processCitations (Pandoc meta bs) = do
                          walk (convertQuotes locale) .
                          insertSpace $ out)
                       (resultBibliography result)
-  let moveNotes = maybe True truish $
-                        lookupMeta "notes-after-punctuation" meta
+  let moveNotes = styleIsNoteStyle sopts &&
+           maybe True truish (lookupMeta "notes-after-punctuation" meta)
   let cits = map (walk (convertQuotes locale)) $
                resultCitations result
 
-  let fixQuotes = case localePunctuationInQuote locale of
-                    Just True ->
-                      B.toList . movePunctuationInsideQuotes .  B.fromList
-                    _ -> id
-
   let metanocites = lookupMeta "nocite" meta
   let Pandoc meta'' bs' =
          maybe id (setMeta "nocite") metanocites .
-         walk (map capitalizeNoteCitation .
-                fixQuotes .  mvPunct moveNotes locale) .
+         walk (map capitalizeNoteCitation .  mvPunct moveNotes locale) .
          walk deNote .
          evalState (walkM insertResolvedCitations $ Pandoc meta' bs)
          $ cits
@@ -375,7 +369,6 @@ formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of
 
 
 isNote :: Inline -> Bool
-isNote (Note _)          = True
 isNote (Cite _ [Note _]) = True
  -- the following allows citation styles that are "in-text" but use superscript
  -- references to be treated as if they are "notes" for the purposes of moving
@@ -388,6 +381,12 @@ isSpacy Space     = True
 isSpacy SoftBreak = True
 isSpacy _         = False
 
+movePunctInsideQuotes :: Locale -> [Inline] -> [Inline]
+movePunctInsideQuotes locale
+  | localePunctuationInQuote locale == Just True
+    = B.toList . movePunctuationInsideQuotes . B.fromList
+  | otherwise
+    = id
 
 mvPunct :: Bool -> Locale -> [Inline] -> [Inline]
 mvPunct moveNotes locale (x : xs)
@@ -400,7 +399,8 @@ mvPunct moveNotes locale (q : s : x : ys)
     in  if moveNotes
            then if T.null spunct
                    then q : x : mvPunct moveNotes locale ys
-                   else q : Str spunct : x : mvPunct moveNotes locale
+                   else movePunctInsideQuotes locale
+                        [q , Str spunct , x] ++ mvPunct moveNotes locale
                         (B.toList
                           (dropTextWhile isPunctuation (B.fromList ys)))
            else q : x : mvPunct moveNotes locale ys
@@ -412,9 +412,10 @@ mvPunct moveNotes locale (Cite cs ils : ys)
    , moveNotes
    = let s = stringify ys
          spunct = T.takeWhile isPunctuation s
-     in  Cite cs (init ils
-                  ++ [Str spunct | not (endWithPunct False (init ils))]
-                  ++ [last ils]) :
+     in  Cite cs (movePunctInsideQuotes locale $
+                    init ils
+                    ++ [Str spunct | not (endWithPunct False (init ils))]
+                    ++ [last ils]) :
          mvPunct moveNotes locale
            (B.toList (dropTextWhile isPunctuation (B.fromList ys)))
 mvPunct moveNotes locale (s : x : ys) | isSpacy s, isNote x =
diff --git a/test/command/6890.md b/test/command/6890.md
index e4129e2a9..e36c12771 100644
--- a/test/command/6890.md
+++ b/test/command/6890.md
@@ -23,12 +23,12 @@ references:
 
 @fruchtel-sozialer-2013a
 
-Some text [^1].
+Some text.[^1]
 
 [^1]: @fruchtel-sozialer-2013a
 ^D
 [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]]
-,Para [Str "Some",Space,Str "text",Str ".",Note [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]]],Str ""]
+,Para [Str "Some",Space,Str "text.",Note [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]]]]
 ,Div ("refs",["references","csl-bib-body","hanging-indent"],[])
  [Div ("ref-fruchtel-sozialer-2013a",["csl-entry"],[])
   [Para [Str "Fr\252chtel,",Space,Str "Frank,",Space,Str "Wolfgang",Space,Str "Budde,",Space,Str "and",Space,Str "Gudrun",Space,Str "Cyprian.",Space,Str "2013.",Space,Emph [Str "Sozialer",Space,Str "Raum",Space,Str "und",Space,Str "Soziale",Space,Str "Arbeit",Space,Str "Fieldbook:",Space,Str "Methoden",Space,Str "und",Space,Str "Techniken"],Str ".",Space,Str "3rd",Space,Str "ed.",Space,Str "Wiesbaden,",Space,Str "Germany:",Space,Str "Springer",Space,Str "VS."]]]]
diff --git a/test/command/pandoc-citeproc-322.md b/test/command/pandoc-citeproc-322.md
index 78494f0c4..c70eae755 100644
--- a/test/command/pandoc-citeproc-322.md
+++ b/test/command/pandoc-citeproc-322.md
@@ -19,7 +19,7 @@ references:
   type: 'article-journal'
 ---
 
-Foo[@timmory__justice_1950].
+Foo [@timmory__justice_1950].
 ^D
 Foo.[^1]
 
-- 
cgit v1.2.3


From f4ef652a4165fefe4b587882cc75d1e24971ef1f Mon Sep 17 00:00:00 2001
From: Aner Lucero <4rgento@gmail.com>
Date: Tue, 29 Jun 2021 08:49:36 -0300
Subject: Remove duplicated alt text in HTML output.

---
 src/Text/Pandoc/Writers/HTML.hs |  5 +++--
 test/command/7416.md            | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 test/command/7416.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b99b1a413..df01fc35f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1433,7 +1433,7 @@ inlineToHtml opts inline = do
                         return $ if T.null tit
                                     then link'
                                     else link' ! A.title (toValue tit)
-    (Image attr txt (s,tit)) -> do
+    (Image attr@(_, _, attrList) txt (s, tit)) -> do
                         let alternate = stringify txt
                         slideVariant <- gets stSlideVariant
                         let isReveal = slideVariant == RevealJsSlides
@@ -1446,7 +1446,8 @@ inlineToHtml opts inline = do
                               [A.title $ toValue tit | not (T.null tit)] ++
                               attrs
                             imageTag = (if html5 then H5.img else H.img
-                              , [A.alt $ toValue alternate | not (null txt)] )
+                              , [A.alt $ toValue alternate | not (null txt) &&
+                                  isNothing (lookup "alt" attrList)] )
                             mediaTag tg fallbackTxt =
                               let linkTxt = if null txt
                                             then fallbackTxt
diff --git a/test/command/7416.md b/test/command/7416.md
new file mode 100644
index 000000000..70a0257fa
--- /dev/null
+++ b/test/command/7416.md
@@ -0,0 +1,19 @@
+```
+% pandoc -f markdown -t html
+![caption](../media/rId25.jpg "title"){alt="alt"}
+
+^D
+<figure>
+<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption aria-hidden="true">caption</figcaption>
+</figure>
+```
+
+```
+% pandoc -f markdown -t html
+![caption](../media/rId25.jpg "title")
+
+^D
+<figure>
+<img src="../media/rId25.jpg" title="title" alt="caption" /><figcaption aria-hidden="true">caption</figcaption>
+</figure>
+```
-- 
cgit v1.2.3


From a3d745e48560a55d9a9ea9fa43ffdd5a8b84987f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Jun 2021 09:44:37 -0700
Subject: Docx writer: support figure numbers.

These are set up in such a way that they will work with Word's
automatic table of figures.

Closes #7392.
---
 src/Text/Pandoc/Writers/Docx.hs       |  22 +++++++++++++++++++---
 src/Text/Pandoc/Writers/Docx/Types.hs |   2 ++
 test/docx/golden/image.docx           | Bin 26647 -> 26774 bytes
 3 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index d1065eb7d..b3e008b8a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,8 @@ import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
 import Text.Collate.Lang (renderLang)
-import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm)
+import qualified Text.Pandoc.Translations as Term
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
 import Text.Pandoc.UTF8 (fromTextLazy)
@@ -854,14 +855,29 @@ blockToOpenXML' opts (Plain lst) = do
 -- title beginning with fig: indicates that the image is a figure
 blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do
   setFirstPara
+  fignum <- gets stNextFigureNum
+  modify $ \st -> st{ stNextFigureNum = fignum + 1 }
+  let figid = "fig" <> tshow fignum
+  figname <- translateTerm Term.Figure
   prop <- pStyleM $
         if null alt
         then "Figure"
         else "Captioned Figure"
   paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False)
   contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
-  captionNode <- withParaPropM (pStyleM "Image Caption")
-                 $ blockToOpenXML opts (Para alt)
+  captionNode <- if null alt
+                    then return []
+                    else withParaPropM (pStyleM "Image Caption")
+                         $ blockToOpenXML opts
+                            (Para $ Span (figid,[],[])
+                               [Str "Figure\160",
+                                RawInline (Format "openxml")
+                                ("<w:fldSimple w:instr=\"SEQ "
+                                <> figname
+                                <> " \\* ARABIC \"><w:r><w:t>"
+                                <> tshow fignum
+                                <> "</w:t></w:r></w:fldSimple>"),
+                                Str ":", Space] : alt)
   return $
     Elem (mknode "w:p" [] (map Elem paraProps ++ contents))
     : captionNode
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
index 006584c30..36ac45ad2 100644
--- a/src/Text/Pandoc/Writers/Docx/Types.hs
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -117,6 +117,7 @@ data WriterState = WriterState{
        , stDynamicParaProps :: Set.Set ParaStyleName
        , stDynamicTextProps :: Set.Set CharStyleName
        , stCurId          :: Int
+       , stNextFigureNum  :: Int
        }
 
 defaultWriterState :: WriterState
@@ -137,6 +138,7 @@ defaultWriterState = WriterState{
       , stDynamicParaProps = Set.empty
       , stDynamicTextProps = Set.empty
       , stCurId          = 20
+      , stNextFigureNum  = 1
       }
 
 setFirstPara :: PandocMonad m => WS m ()
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 48b72e283..9fe65326f 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
-- 
cgit v1.2.3


From a01ba4463f1f0d14a8032f147cddb76dadb4b853 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Jun 2021 11:15:13 -0700
Subject: Docx writer: Fixed a couple bugs in Figure numbering.

---
 src/Text/Pandoc/Writers/Docx.hs | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index b3e008b8a..a3c4b6be1 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -856,7 +856,7 @@ blockToOpenXML' opts (Plain lst) = do
 blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do
   setFirstPara
   fignum <- gets stNextFigureNum
-  modify $ \st -> st{ stNextFigureNum = fignum + 1 }
+  unless (null alt) $ modify $ \st -> st{ stNextFigureNum = fignum + 1 }
   let figid = "fig" <> tshow fignum
   figname <- translateTerm Term.Figure
   prop <- pStyleM $
@@ -870,10 +870,9 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit
                     else withParaPropM (pStyleM "Image Caption")
                          $ blockToOpenXML opts
                             (Para $ Span (figid,[],[])
-                               [Str "Figure\160",
+                               [Str (figname <> "\160"),
                                 RawInline (Format "openxml")
-                                ("<w:fldSimple w:instr=\"SEQ "
-                                <> figname
+                                ("<w:fldSimple w:instr=\"SEQ Figure"
                                 <> " \\* ARABIC \"><w:r><w:t>"
                                 <> tshow fignum
                                 <> "</w:t></w:r></w:fldSimple>"),
-- 
cgit v1.2.3


From 0948af9cc549f0ea3b85fa760aa521b8deaad2c0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Jun 2021 11:15:40 -0700
Subject: Docx writer: Add table numbering for captioned tables.

The numbers are added using fields, so that Word can
create a list of tables that will update automatically.
---
 src/Text/Pandoc/Writers/Docx/Table.hs |  31 ++++++++++++++++++++++++++++---
 src/Text/Pandoc/Writers/Docx/Types.hs |   2 ++
 test/Tests/Writers/OOXML.hs           |   4 +++-
 test/docx/golden/image.docx           | Bin 26774 -> 26776 bytes
 4 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index 49917e315..7a84c5278 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -17,7 +17,7 @@ import Control.Monad.State.Strict
 import Data.Array
 import Data.Text (Text)
 import Text.Pandoc.Definition
-import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm)
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Printf (printf)
@@ -25,6 +25,7 @@ import Text.Pandoc.Writers.GridTable hiding (Table)
 import Text.Pandoc.Writers.OOXML
 import Text.Pandoc.XML.Light as XML hiding (Attr)
 import qualified Data.Text as T
+import qualified Text.Pandoc.Translations as Term
 import qualified Text.Pandoc.Writers.GridTable as Grid
 
 tableToOpenXML :: PandocMonad m
@@ -33,15 +34,23 @@ tableToOpenXML :: PandocMonad m
                -> WS m [Content]
 tableToOpenXML blocksToOpenXML gridTable = do
   setFirstPara
-  let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) =
+  let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) =
         gridTable
   let (Caption _maybeShortCaption captionBlocks) = caption
+  tablenum <- gets stNextTableNum
+  unless (null captionBlocks) $
+    modify $ \st -> st{ stNextTableNum = tablenum + 1 }
+  let tableid = if T.null ident
+                   then "table" <> tshow tablenum
+                   else ident
+  tablename <- translateTerm Term.Table
   let captionStr = stringify captionBlocks
   let aligns = map fst $ elems colspecs
   captionXml <- if null captionBlocks
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
-                     $ blocksToOpenXML captionBlocks
+                     $ blocksToOpenXML
+                     $ addLabel tableid tablename tablenum captionBlocks
   -- We set "in table" after processing the caption, because we don't
   -- want the "Table Caption" style to be overwritten with "Compact".
   modify $ \s -> s { stInTable = True }
@@ -81,6 +90,22 @@ tableToOpenXML blocksToOpenXML gridTable = do
   modify $ \s -> s { stInTable = False }
   return $ captionXml ++ [Elem tbl]
 
+addLabel :: Text -> Text -> Int -> [Block] -> [Block]
+addLabel tableid tablename tablenum bs =
+  case bs of
+    (Para ils : rest)  -> Para (label : Space : ils) : rest
+    (Plain ils : rest) -> Plain (label : Space : ils) : rest
+    _ -> Para [label] : bs
+ where
+  label = Span (tableid,[],[])
+            [Str (tablename <> "\160"),
+             RawInline (Format "openxml")
+               ("<w:fldSimple w:instr=\"SEQ Table"
+               <> " \\* ARABIC \"><w:r><w:t>"
+               <> tshow tablenum
+               <> "</w:t></w:r></w:fldSimple>"),
+             Str ":"]
+
 -- | Parts of a table
 data RowType = HeadRow | BodyRow | FootRow
 
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
index 36ac45ad2..74b8d2753 100644
--- a/src/Text/Pandoc/Writers/Docx/Types.hs
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -118,6 +118,7 @@ data WriterState = WriterState{
        , stDynamicTextProps :: Set.Set CharStyleName
        , stCurId          :: Int
        , stNextFigureNum  :: Int
+       , stNextTableNum   :: Int
        }
 
 defaultWriterState :: WriterState
@@ -139,6 +140,7 @@ defaultWriterState = WriterState{
       , stDynamicTextProps = Set.empty
       , stCurId          = 20
       , stNextFigureNum  = 1
+      , stNextTableNum   = 1
       }
 
 setFirstPara :: PandocMonad m => WS m ()
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index c1e47622d..83f05cfec 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -55,7 +55,9 @@ testArchive :: (WriterOptions -> Pandoc -> PandocIO BL.ByteString)
             -> IO Archive
 testArchive writerFn opts fp = do
   txt <- T.readFile fp
-  bs <- runIOorExplode $ readNative def txt >>= writerFn opts
+  bs <- runIOorExplode $ do
+    setTranslations "en-US"
+    readNative def txt >>= writerFn opts
   return $ toArchive bs
 
 compareFileList :: FilePath -> Archive -> Archive -> Maybe String
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 9fe65326f..7c2d8a9ac 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
-- 
cgit v1.2.3


From cb038bb3125028e17a5b05c32495219a17fb6537 Mon Sep 17 00:00:00 2001
From: Aner Lucero <4rgento@gmail.com>
Date: Fri, 2 Jul 2021 11:17:14 -0300
Subject: HTML5 writer, remove aria-hidden when explicit atl text is provided.

---
 src/Text/Pandoc/Writers/HTML.hs | 11 +++++++----
 test/command/7416.md            |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index df01fc35f..7eb8dfe12 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -659,17 +659,20 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height
 figure :: PandocMonad m
        => WriterOptions -> Attr -> [Inline] -> (Text, Text)
        -> StateT WriterState m Html
-figure opts attr txt (s,tit) = do
+figure opts attr@(_, _, attrList) txt (s,tit) = do
   html5 <- gets stHtml5
   -- Screen-readers will normally read the @alt@ text and the figure; we
   -- want to avoid them reading the same text twice. With HTML5 we can
   -- use aria-hidden for the caption; with HTML4, we use an empty
   -- alt-text instead.
+  -- When the alt text differs from the caption both should be read.
   let alt = if html5 then txt else [Str ""]
   let tocapt = if html5
-                  then H5.figcaption !
-                       H5.customAttribute (textTag "aria-hidden")
-                                          (toValue @Text "true")
+                  then (H5.figcaption !) $
+                       if isJust (lookup "alt" attrList)
+                          then mempty
+                          else H5.customAttribute (textTag "aria-hidden")
+                                                  (toValue @Text "true")
                   else H.p ! A.class_ "caption"
   img <- inlineToHtml opts (Image attr alt (s,tit))
   capt <- if null txt
diff --git a/test/command/7416.md b/test/command/7416.md
index 70a0257fa..2f9577f10 100644
--- a/test/command/7416.md
+++ b/test/command/7416.md
@@ -4,7 +4,7 @@
 
 ^D
 <figure>
-<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption aria-hidden="true">caption</figcaption>
+<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption>caption</figcaption>
 </figure>
 ```
 
-- 
cgit v1.2.3


From ff26af59acda8d57d1f0d5faf24a2da203dc2f4e Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Jul 2021 13:18:25 -0700
Subject: Revamp note citation handling.

Use latest citeproc, which uses a Span with a class rather
than a Note for notes.  This helps us distinguish between
user notes and citation notes.

Don't put citations at the beginning of a note in parentheses.
(Closes #7394.)
---
 cabal.project               |  2 +-
 src/Text/Pandoc/Citeproc.hs | 44 ++++++++++++++++++++++++++++++--------------
 stack.yaml                  |  2 +-
 3 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/cabal.project b/cabal.project
index 99c7b3556..e18577471 100644
--- a/cabal.project
+++ b/cabal.project
@@ -10,4 +10,4 @@ source-repository-package
 source-repository-package
   type: git
   location: https://github.com/jgm/citeproc
-  tag: 85277c4baa6c0350baf29247583ae0f438f7c9c5
+  tag: 4ce0501cd6f9c86eee087afcf878c256e49b9615
diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index a2fca106a..b19494dc0 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -27,7 +27,7 @@ import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Extensions (pandocExtensions)
 import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options (ReaderOptions(..))
-import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow)
+import Text.Pandoc.Shared (stringify, ordNub, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Walk (query, walk, walkM)
 import Control.Applicative ((<|>))
@@ -49,6 +49,7 @@ import qualified Data.Text as T
 import System.FilePath (takeExtension)
 import Safe (lastMay, initSafe)
 
+
 processCitations  :: PandocMonad m => Pandoc -> m Pandoc
 processCitations (Pandoc meta bs) = do
   style <- getStyle (Pandoc meta bs)
@@ -99,7 +100,9 @@ processCitations (Pandoc meta bs) = do
   let Pandoc meta'' bs' =
          maybe id (setMeta "nocite") metanocites .
          walk (map capitalizeNoteCitation .  mvPunct moveNotes locale) .
-         walk deNote .
+         (if styleIsNoteStyle sopts
+             then walk addNote .  walk deNote
+             else id) .
          evalState (walkM insertResolvedCitations $ Pandoc meta' bs)
          $ cits
   return $ Pandoc meta''
@@ -568,33 +571,46 @@ capitalizeNoteCitation (Cite cs [Note [Para ils]]) =
               $ B.fromList ils]]
 capitalizeNoteCitation x = x
 
+addNote :: Inline -> Inline
+addNote (Span ("",["csl-note"],[]) ils) = Note [Para ils]
+addNote x = x
+
 deNote :: [Inline] -> [Inline]
 deNote [] = []
 deNote (Note bs:rest) =
-  Note (walk go bs) : deNote rest
+  case bs of
+    [Para (cit@(Cite (c:_) _) : ils)]
+       | citationMode c /= AuthorInText ->
+         -- if citation is first in note, no need to parenthesize.
+         Note [Para (walk removeNotes $ cit : walk addParens ils)]
+             : deNote rest
+    _ -> Note (walk removeNotes . walk addParens $ bs) : deNote rest
  where
-  go [] = []
-  go (Cite (c:cs) ils : zs)
+  addParens [] = []
+  addParens (Cite (c:cs) ils : zs)
     | citationMode c == AuthorInText
-      = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) : go zs
+      = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) :
+        addParens zs
     | otherwise
-      = Cite (c:cs) (concatMap noteInParens ils) : go zs
-  go (x:xs) = x : go xs
+      = Cite (c:cs) (concatMap noteInParens ils) : addParens zs
+  addParens (x:xs) = x : addParens xs
+  removeNotes (Span ("",["csl-note"],[]) ils) = Span ("",[],[]) ils
+  removeNotes x = x
   needsPeriod [] = True
   needsPeriod (Str t:_) = case T.uncons t of
                             Nothing    -> False
                             Just (c,_) -> isUpper c
   needsPeriod (Space:zs) = needsPeriod zs
   needsPeriod _ = False
-  noteInParens (Note bs')
+  noteInParens (Span ("",["csl-note"],[]) ils)
        = Space : Str "(" :
-         removeFinalPeriod (blocksToInlines bs') ++ [Str ")"]
+         removeFinalPeriod ils ++ [Str ")"]
   noteInParens x = [x]
-  noteAfterComma needsPer (Note bs')
+  noteAfterComma needsPer (Span ("",["csl-note"],[]) ils)
        = Str "," : Space :
-         (if needsPer
-             then id
-             else removeFinalPeriod) (blocksToInlines bs')
+         if needsPer
+            then ils
+            else removeFinalPeriod ils
   noteAfterComma _ x = [x]
 deNote (x:xs) = x : deNote xs
 
diff --git a/stack.yaml b/stack.yaml
index 2db0a626f..48a608db3 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -14,7 +14,7 @@ extra-deps:
 - git: https://github.com/jgm/doctemplates
   commit: 428c26d5303cf7a2b1051fe1ffd9aafe9ba71c81
 - git: https://github.com/jgm/citeproc
-  commit: 85277c4baa6c0350baf29247583ae0f438f7c9c5
+  commit: 4ce0501cd6f9c86eee087afcf878c256e49b9615
 ghc-options:
    "$locals": -fhide-source-paths -Wno-missing-home-modules
 resolver: lts-18.0
-- 
cgit v1.2.3


From 77537b17657e29d047a2f8df1feda2ed540c6e08 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Jul 2021 13:28:58 -0700
Subject: Citeproc: cleanup and efficiency improvement in deNote.

---
 src/Text/Pandoc/Citeproc.hs | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index b19494dc0..6e2d9ce7e 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -99,7 +99,7 @@ processCitations (Pandoc meta bs) = do
   let metanocites = lookupMeta "nocite" meta
   let Pandoc meta'' bs' =
          maybe id (setMeta "nocite") metanocites .
-         walk (map capitalizeNoteCitation .  mvPunct moveNotes locale) .
+         walk (mvPunct moveNotes locale) .
          (if styleIsNoteStyle sopts
              then walk addNote .  walk deNote
              else id) .
@@ -564,27 +564,28 @@ extractText (FancyVal x) = toText x
 extractText (NumVal n)   = T.pack (show n)
 extractText _            = mempty
 
-capitalizeNoteCitation :: Inline -> Inline
-capitalizeNoteCitation (Cite cs [Note [Para ils]]) =
-  Cite cs
-  [Note [Para $ B.toList $ addTextCase Nothing CapitalizeFirst
-              $ B.fromList ils]]
-capitalizeNoteCitation x = x
-
+-- Here we take the Spans with class csl-note that are left
+-- after deNote has removed nested ones, and convert them
+-- into real notes.
 addNote :: Inline -> Inline
-addNote (Span ("",["csl-note"],[]) ils) = Note [Para ils]
+addNote (Span ("",["csl-note"],[]) ils) =
+  Note [Para $
+         B.toList . addTextCase Nothing CapitalizeFirst . B.fromList $ ils]
 addNote x = x
 
-deNote :: [Inline] -> [Inline]
-deNote [] = []
-deNote (Note bs:rest) =
+-- Here we handle citation notes that occur inside footnotes
+-- or other citation notes, in a note style.  We don't want
+-- notes inside notes, so we convert these to parenthesized
+-- or comma-separated citations.
+deNote :: Inline -> Inline
+deNote (Note bs) =
   case bs of
     [Para (cit@(Cite (c:_) _) : ils)]
        | citationMode c /= AuthorInText ->
          -- if citation is first in note, no need to parenthesize.
          Note [Para (walk removeNotes $ cit : walk addParens ils)]
-             : deNote rest
-    _ -> Note (walk removeNotes . walk addParens $ bs) : deNote rest
+    _ -> Note (walk removeNotes . walk addParens $ bs)
+
  where
   addParens [] = []
   addParens (Cite (c:cs) ils : zs)
@@ -594,25 +595,30 @@ deNote (Note bs:rest) =
     | otherwise
       = Cite (c:cs) (concatMap noteInParens ils) : addParens zs
   addParens (x:xs) = x : addParens xs
+
   removeNotes (Span ("",["csl-note"],[]) ils) = Span ("",[],[]) ils
   removeNotes x = x
+
   needsPeriod [] = True
   needsPeriod (Str t:_) = case T.uncons t of
                             Nothing    -> False
                             Just (c,_) -> isUpper c
   needsPeriod (Space:zs) = needsPeriod zs
   needsPeriod _ = False
+
   noteInParens (Span ("",["csl-note"],[]) ils)
        = Space : Str "(" :
          removeFinalPeriod ils ++ [Str ")"]
   noteInParens x = [x]
+
   noteAfterComma needsPer (Span ("",["csl-note"],[]) ils)
        = Str "," : Space :
          if needsPer
             then ils
             else removeFinalPeriod ils
   noteAfterComma _ x = [x]
-deNote (x:xs) = x : deNote xs
+
+deNote x = x
 
 -- Note: we can't use dropTextWhileEnd indiscriminately,
 -- because this would remove the final period on abbreviations like Ibid.
-- 
cgit v1.2.3


From 3a31fe68efcf80888294a1d3d33751d266d4de07 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 5 Jul 2021 14:45:07 -0700
Subject: Add command test for #7394.

And fix a small bug in handling of citations in notes, which
led to commas at the end of sentences in some cases.
---
 src/Text/Pandoc/Citeproc.hs |  1 +
 test/command/7394.md        | 85 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 test/command/7394.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs
index 6e2d9ce7e..246f54516 100644
--- a/src/Text/Pandoc/Citeproc.hs
+++ b/src/Text/Pandoc/Citeproc.hs
@@ -612,6 +612,7 @@ deNote (Note bs) =
   noteInParens x = [x]
 
   noteAfterComma needsPer (Span ("",["csl-note"],[]) ils)
+    | not (null ils)
        = Str "," : Space :
          if needsPer
             then ils
diff --git a/test/command/7394.md b/test/command/7394.md
new file mode 100644
index 000000000..d6eb769b9
--- /dev/null
+++ b/test/command/7394.md
@@ -0,0 +1,85 @@
+```
+% pandoc -f markdown -t plain --citeproc
+---
+csl: command/chicago-fullnote-bibliography.csl
+references:
+- author:
+  - family: Wandt
+    given: Manfred
+  edition: 6
+  id: wandt2014ges-sv
+  issued: 2014
+  publisher: Franz Vahlen
+  publisher-place: München
+  title: Gesetzliche schuldverhältnisse
+  title-short: Gesetzl SV
+  type: book
+- author:
+  - family: Smith
+    given: Zenda
+  edition: 6
+  id: smith2015
+  issued: 2015
+  publisher: Macmillan
+  publisher-place: New York
+  title: A verb and a noun
+  type: book
+---
+
+Hi^[@wandt2014ges-sv.].
+
+Hi^[[@wandt2014ges-sv].].
+
+Hi^[[See also @wandt2014ges-sv].].
+
+Hi^[See also @wandt2014ges-sv.].
+
+Hi^[@wandt2014ges-sv [@smith2015].].
+
+Hi^[[@wandt2014ges-sv; @smith2015].].
+
+Hi [@wandt2014ges-sv].
+
+Hi [see also @wandt2014ges-sv].
+
+^D
+Hi[1].
+
+Hi[2].
+
+Hi[3].
+
+Hi[4].
+
+Hi[5].
+
+Hi[6].
+
+Hi.[7]
+
+Hi.[8]
+
+Smith, Zenda. A Verb and a Noun. 6th ed. New York: Macmillan, 2015.
+
+Wandt, Manfred. Gesetzliche Schuldverhältnisse. 6th ed. München: Franz
+Vahlen, 2014.
+
+[1] Manfred Wandt, Gesetzliche Schuldverhältnisse, 6th ed. (München:
+Franz Vahlen, 2014).
+
+[2] Wandt.
+
+[3] See also Wandt.
+
+[4] See also Wandt.
+
+[5] Wandt, Zenda Smith, A Verb and a Noun, 6th ed. (New York: Macmillan,
+2015).
+
+[6] Wandt, Gesetzl SV; Smith, A Verb and a Noun.
+
+[7] Wandt, Gesetzl SV.
+
+[8] See also Wandt.
+```
+
-- 
cgit v1.2.3


From 3ed37f00771d20a1b7516f2a37b7b424b3b2f1d8 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Jul 2021 10:21:59 -0700
Subject: HTML reader: add col, colgroup to 'closes' definitions

---
 src/Text/Pandoc/Readers/HTML/Parsing.hs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index 4c069e2c3..bd8d7c96c 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -161,10 +161,12 @@ _ `closes` "html" = False
 "li" `closes` "li" = True
 "th" `closes` t | t `elem` ["th","td"] = True
 "td" `closes` t | t `elem` ["th","td"] = True
-"tr" `closes` t | t `elem` ["th","td","tr"] = True
+"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True
 "dd" `closes` t | t `elem` ["dt", "dd"] = True
 "dt" `closes` t | t `elem` ["dt","dd"] = True
 "rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True
+"col" `closes` "col" = True
+"colgroup" `closes` "col" = True
 "optgroup" `closes` "optgroup" = True
 "optgroup" `closes` "option" = True
 "option" `closes` "option" = True
-- 
cgit v1.2.3


From f88ebf3ebf49e00ffa12778caf6817cc34459e6a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Jul 2021 10:22:07 -0700
Subject: Markdown reader:  don't try to read contents in self-closing HTML
 tag.

Previously we had problems parsing raw HTML with self-closing
tags like `<col/>`. The problem was that pandoc would look
for a closing tag to close the markdown contents, but the
closing tag had, in effect, already been parsed by `htmlTag`.

This fixes the issue described in
<https://groups.google.com/d/msgid/pandoc-discuss/297bc662-7841-4423-bcbb-534e99bbba09n%40googlegroups.com>.
---
 src/Text/Pandoc/Readers/Markdown.hs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1e9867d07..2dc7ddf52 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1121,6 +1121,7 @@ rawTeXBlock = do
 rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
 rawHtmlBlocks = do
   (TagOpen tagtype _, raw) <- htmlTag isBlockTag
+  let selfClosing = "/>" `T.isSuffixOf` raw
   -- we don't want '<td>    text' to be a code block:
   skipMany spaceChar
   indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
@@ -1134,7 +1135,9 @@ rawHtmlBlocks = do
                  gobbleAtMostSpaces indentlevel
                  notFollowedBy' closer
                  block
-  contents <- mconcat <$> many block'
+  contents <- if selfClosing
+                 then return mempty
+                 else mconcat <$> many block'
   result <-
     try
     (do gobbleAtMostSpaces indentlevel
-- 
cgit v1.2.3


From e7f8cc57866b61ed354c4c3812aaced33832a0e0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 6 Jul 2021 10:39:47 -0700
Subject: T.P.PDF, convertImage: normalize paths.

This will avoid paths on Windows with mixed path separators,
which may cause problems with SVG conversion.

See #7431.
---
 src/Text/Pandoc/PDF.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index f85ef5b1f..aae3f9806 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -215,9 +215,9 @@ convertImage opts tmpdir fname = do
                  E.catch (Right pngOut <$ JP.savePngImage pngOut img) $
                      \(e :: E.SomeException) -> return (Left (tshow e))
   where
-    pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir
-    pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir
-    svgIn = tmpdir </> fname
+    pngOut = normalise $ replaceDirectory (replaceExtension fname ".png") tmpdir
+    pdfOut = normalise $ replaceDirectory (replaceExtension fname ".pdf") tmpdir
+    svgIn = normalise $ tmpdir </> fname
     mime = getMimeType fname
     doNothing = return (Right fname)
 
-- 
cgit v1.2.3


From e56e2b0e0be9256ddef798d28f5d2af6e756508d Mon Sep 17 00:00:00 2001
From: Michael Hoffmann <brennan.brisad@gmail.com>
Date: Wed, 7 Jul 2021 01:06:29 +0200
Subject: Recognize data-external when reading HTML img tags (#7429)

Preserve all attributes in img tags.  If attributes have a `data-`
prefix, it will be stripped.  In particular, this preserves a
`data-external` attribute as an `external` attribute in the pandoc AST.
---
 src/Text/Pandoc/Readers/HTML.hs | 11 +++--------
 test/Tests/Readers/HTML.hs      |  6 ++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fc4575f2d..fdf4f28e0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -722,17 +722,12 @@ pLink = try $ do
 
 pImage :: PandocMonad m => TagParser m Inlines
 pImage = do
-  tag <- pSelfClosing (=="img") (isJust . lookup "src")
+  tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src")
   url <- canonicalizeUrl $ fromAttrib "src" tag
   let title = fromAttrib "title" tag
   let alt = fromAttrib "alt" tag
-  let uid = fromAttrib "id" tag
-  let cls = T.words $ fromAttrib "class" tag
-  let getAtt k = case fromAttrib k tag of
-                   "" -> []
-                   v  -> [(k, v)]
-  let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
-  return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
+  let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr'
+  return $ B.imageWith attr (escapeURI url) title (B.text alt)
 
 pSvg :: PandocMonad m => TagParser m Inlines
 pSvg = do
diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs
index 9bf567194..4ed1e44af 100644
--- a/test/Tests/Readers/HTML.hs
+++ b/test/Tests/Readers/HTML.hs
@@ -74,6 +74,12 @@ tests = [ testGroup "base tag"
           [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?>
             plain (spanWith ("anchor",[],[]) mempty)
           ]
+        , testGroup "img"
+          [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?>
+            plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "")
+          , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?>
+            plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "")
+          ]
         , testGroup "lang"
           [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?>
             setMeta "lang" (text "es") (doc (plain (text "hola")))
-- 
cgit v1.2.3


From 565330033a623ed7bf4d0a3b57dd14710cf27703 Mon Sep 17 00:00:00 2001
From: Michael Hoffmann <brennan.brisad@gmail.com>
Date: Wed, 7 Jul 2021 18:26:37 +0200
Subject: Don't incorporate externally linked images in EPUB documents (#7430)

Just like it is possible to avoid incorporating an image in EPUB by
passing `data-external="1"` to a raw HTML snippet, this makes the same
possible for native Images, by looking for an associated `external`
attribute.
---
 MANUAL.txt                      | 6 ++++++
 src/Text/Pandoc/Writers/EPUB.hs | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/MANUAL.txt b/MANUAL.txt
index 52563a8fa..c9dc9e62b 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -6177,6 +6177,12 @@ with the `src` attribute.  For example:
       </source>
     </audio>
 
+If the input format already is HTML then `data-external="1"` will work
+as expected for `<img>` elements. Similarly, for Markdown, external
+images can be declared with `![img](url){external=1}`. Note that this
+only works for images; the other media elements have no native
+representation in pandoc's AST and requires the use of raw HTML.
+
 # Jupyter notebooks
 
 When creating a [Jupyter notebook], pandoc will try to infer the
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 3c092a2c1..508fb6a98 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -1131,7 +1131,8 @@ transformInline  :: PandocMonad m
                  => WriterOptions
                  -> Inline
                  -> E m Inline
-transformInline _opts (Image attr lab (src,tit)) = do
+transformInline _opts (Image attr@(_,_,kvs) lab (src,tit))
+  | isNothing (lookup "external" kvs) = do
     newsrc <- modifyMediaRef $ T.unpack src
     return $ Image attr lab ("../" <> newsrc, tit)
 transformInline opts x@(Math t m)
-- 
cgit v1.2.3


From ae22b1e977cfb1357bb21fabc227e76a6adb0599 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 9 Jul 2021 12:27:41 -0700
Subject: RST reader: fix regression with code includes.

With the recent changes to include infrastructure,
included code blocks were getting an extra newline.

Closes #7436.  Added regression test.
---
 pandoc.cabal                   |  1 +
 src/Text/Pandoc/Readers/RST.hs |  6 +++++-
 test/command/7436.md           | 14 ++++++++++++++
 test/command/three.txt         |  3 +++
 4 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 test/command/7436.md
 create mode 100644 test/command/three.txt

(limited to 'src')

diff --git a/pandoc.cabal b/pandoc.cabal
index 84a04c6b6..e3cd7e54f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -213,6 +213,7 @@ extra-source-files:
                  test/command/B.txt
                  test/command/C.txt
                  test/command/D.txt
+                 test/command/three.txt
                  test/command/01.csv
                  test/command/chap1/spider.png
                  test/command/chap2/spider.png
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 35292d949..3990f0cb5 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -474,6 +474,7 @@ includeDirective top fields body = do
            case lookup "literal" fields of
              Just _  -> B.rawBlock "rst" . sourcesToText <$> getInput
              Nothing -> parseBlocks
+  let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields)
   let selectLines =
         (case trim <$> lookup "end-before" fields of
                          Just patt -> takeWhile (not . (patt `T.isInfixOf`))
@@ -482,8 +483,11 @@ includeDirective top fields body = do
                          Just patt -> drop 1 .
                                         dropWhile (not . (patt `T.isInfixOf`))
                          Nothing   -> id)
+
   let toStream t =
-        toSources [(f, T.unlines . selectLines . T.lines $ t)]
+        Sources [(initialPos f,
+                   (T.unlines . selectLines . T.lines $ t) <>
+                    if isLiteral then mempty else "\n")]  -- see #7436
   currentDir <- takeDirectory . sourceName <$> getPosition
   insertIncludedFile parser toStream [currentDir] f startLine endLine
 
diff --git a/test/command/7436.md b/test/command/7436.md
new file mode 100644
index 000000000..ad4cb8c2f
--- /dev/null
+++ b/test/command/7436.md
@@ -0,0 +1,14 @@
+```
+% pandoc -f rst -t native
+.. include:: command/three.txt
+   :code:
+
+.. include:: command/three.txt
+   :literal:
+
+.. include:: command/three.txt
+^D
+[CodeBlock ("",[""],[("code","")]) "1st line.\n2nd line.\n3rd line.\n"
+,RawBlock (Format "rst") "1st line.\n2nd line.\n3rd line.\n"
+,Para [Str "1st",Space,Str "line.",SoftBreak,Str "2nd",Space,Str "line.",SoftBreak,Str "3rd",Space,Str "line."]]
+```
diff --git a/test/command/three.txt b/test/command/three.txt
new file mode 100644
index 000000000..3ca3fdd4e
--- /dev/null
+++ b/test/command/three.txt
@@ -0,0 +1,3 @@
+1st line.
+2nd line.
+3rd line.
-- 
cgit v1.2.3


From 477a67061f06827b7e807319404cc277a417e9d0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 9 Jul 2021 14:14:19 -0700
Subject: Always use / when adding directory to image path with extractMedia.

Even on Windows.

May help with #7431.
---
 src/Text/Pandoc/Class/IO.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 169074860..f4cfc8682 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -226,6 +226,6 @@ adjustImagePath dir mediabag (Image attr lab (src, tit)) =
   case lookupMedia (T.unpack src) mediabag of
     Nothing -> Image attr lab (src, tit)
     Just item ->
-      let fullpath = dir </> mediaPath item
+      let fullpath = dir <> "/" <> mediaPath item
       in  Image attr lab (T.pack fullpath, tit)
 adjustImagePath _ _ x = x
-- 
cgit v1.2.3


From ac0a9da6d85e9b7a73973a20019caa324b2c1aff Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 11 Jul 2021 13:01:45 -0700
Subject: Improved parsing of raw LaTeX from Text streams (rawLaTeXParser).

We now use source positions from the token stream to tell us
how much of the text stream to consume.  Getting this to
work required a few other changes to make token source positions
accurate.

Closes #7434.
---
 src/Text/Pandoc/Readers/LaTeX.hs         |  9 +++-----
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 39 ++++++++++++++++++++++++++++----
 test/command/7434.md                     | 15 ++++++++++++
 3 files changed, 52 insertions(+), 11 deletions(-)
 create mode 100644 test/command/7434.md

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9e14c159a..31c8d9095 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -136,8 +136,7 @@ rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
               => ParserT Sources s m Text
 rawLaTeXBlock = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
       <|> rawLaTeXParser toks True
              (do choice (map controlSeq
@@ -167,8 +166,7 @@ rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
                => ParserT Sources s m Text
 rawLaTeXInline = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   raw <- snd <$>
           (   rawLaTeXParser toks True
               (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced))
@@ -182,8 +180,7 @@ rawLaTeXInline = do
 inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines
 inlineCommand = do
   lookAhead (try (char '\\' >> letter))
-  inp <- getInput
-  let toks = tokenizeSources inp
+  toks <- getInputTokens
   fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
           inlines
 
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index a17b1f324..9dac4d6ef 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleInstances     #-}
+{-# LANGUAGE FlexibleContexts      #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE OverloadedStrings     #-}
@@ -28,6 +29,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
   , applyMacros
   , tokenize
   , tokenizeSources
+  , getInputTokens
   , untokenize
   , untoken
   , totoks
@@ -246,18 +248,23 @@ withVerbatimMode parser = do
        updateState $ \st -> st{ sVerbatimMode = False }
        return result
 
-rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
+rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a)
                => [Tok] -> Bool -> LP m a -> LP m a
                -> ParserT Sources s m (a, Text)
 rawLaTeXParser toks retokenize parser valParser = do
   pstate <- getState
   let lstate = def{ sOptions = extractReaderOptions pstate }
   let lstate' = lstate { sMacros = extractMacros pstate }
+  let setStartPos = case toks of
+                      Tok pos _ _ : _ -> setPosition pos
+                      _ -> return ()
+  let preparser = setStartPos >> parser
   let rawparser = (,) <$> withRaw valParser <*> getState
-  res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks
+  res' <- lift $ runParserT (withRaw (preparser >> getPosition))
+                            lstate "chunk" toks
   case res' of
        Left _    -> mzero
-       Right toks' -> do
+       Right (endpos, toks') -> do
          res <- lift $ runParserT (do when retokenize $ do
                                         -- retokenize, applying macros
                                         ts <- many (satisfyTok (const True))
@@ -268,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do
               Left _    -> mzero
               Right ((val, raw), st) -> do
                 updateState (updateMacros (sMacros st <>))
-                void $ count (T.length (untokenize toks')) anyChar
+                let skipTilPos stopPos = do
+                      anyChar
+                      pos <- getPosition
+                      if pos >= stopPos
+                         then return ()
+                         else skipTilPos stopPos
+                skipTilPos endpos
                 let result = untokenize raw
                 -- ensure we end with space if input did, see #4442
                 let result' =
@@ -306,6 +319,17 @@ tokenizeSources = concatMap tokenizeSource . unSources
  where
    tokenizeSource (pos, t) = totoks pos t
 
+-- Return tokens from input sources. Ensure that starting position is
+-- correct.
+getInputTokens :: PandocMonad m => ParserT Sources s m [Tok]
+getInputTokens = do
+  pos <- getPosition
+  ss <- getInput
+  return $
+    case ss of
+      Sources [] -> []
+      Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest)
+
 tokenize :: SourceName -> Text -> [Tok]
 tokenize sourcename = totoks (initialPos sourcename)
 
@@ -433,8 +457,13 @@ parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a
 parseFromToks parser toks = do
   oldInput <- getInput
   setInput toks
+  oldpos <- getPosition
+  case toks of
+     Tok pos _ _ : _ -> setPosition pos
+     _ -> return ()
   result <- disablingWithRaw parser
   setInput oldInput
+  setPosition oldpos
   return result
 
 disablingWithRaw :: PandocMonad m => LP m a -> LP m a
@@ -458,7 +487,7 @@ satisfyTok f = do
                   | otherwise = Nothing
         updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos
         updatePos _spos _ (Tok pos _ _ : _) = pos
-        updatePos spos _ []                 = incSourceColumn spos 1
+        updatePos spos (Tok _ _ t)  []      = incSourceColumn spos (T.length t)
 
 doMacros :: PandocMonad m => LP m ()
 doMacros = do
diff --git a/test/command/7434.md b/test/command/7434.md
new file mode 100644
index 000000000..be8732bc2
--- /dev/null
+++ b/test/command/7434.md
@@ -0,0 +1,15 @@
+```
+% pandoc -f markdown -t native
+\begin{proof}
+\newcommand{\x}{\left.\right.}
+\x
+\end{proof}
+
+1234567890abcdefghi
+
+[\*\a](x)
+^D
+[RawBlock (Format "tex") "\\begin{proof}\n\\newcommand{\\x}{\\left.\\right.}\n\\left.\\right.\n\\end{proof}"
+,Para [Str "1234567890abcdefghi"]
+,Para [Link ("",[],[]) [Str "*",RawInline (Format "tex") "\\a"] ("x","")]]
+```
-- 
cgit v1.2.3


From 06408d08e5ccf06a6a04c9b77470e6a67d98e52c Mon Sep 17 00:00:00 2001
From: Jan Tojnar <jtojnar@gmail.com>
Date: Mon, 12 Jul 2021 00:28:52 +0200
Subject: DocBook reader: add support for citerefentry (#7437)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally intended for referring to UNIX manual pages, either part of the same DocBook document as refentry element, or external – hence the manvolnum element.
These days, refentry is more general, for example the element documentation pages linked below are each a refentry.

As per the *Processing expectations* section of citerefentry, the element is supposed to be a hyperlink to a refentry (when in the same document) but pandoc does not support refentry tag at the moment so that is moot.

https://tdg.docbook.org/tdg/5.1/citerefentry.html
https://tdg.docbook.org/tdg/5.1/manvolnum.html
https://tdg.docbook.org/tdg/5.1/refentry.html

This roughly corresponds to a `manpage` role in rST syntax, which produces a `Code` AST node with attributes `.interpreted-text role=manpage` but that does not fit DocBook parser.

https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage
---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++++-
 test/docbook-reader.docbook        | 3 +++
 test/docbook-reader.native         | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6ac1c99f9..a31819ece 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -97,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] chapterinfo - Meta-information for a Chapter
 [ ] citation - An inline bibliographic reference to another published work
 [ ] citebiblioid - A citation of a bibliographic identifier
-[ ] citerefentry - A citation to a reference page
+[x] citerefentry - A citation to a reference page
 [ ] citetitle - The title of a cited work
 [ ] city - The name of a city in an address
 [x] classname - The name of a class, in the object-oriented programming sense
@@ -1112,6 +1112,10 @@ parseInline (Elem e) =
         "segmentedlist" -> segmentedList
         "classname" -> codeWithLang
         "code" -> codeWithLang
+        "citerefentry" -> do
+             let title = maybe mempty strContent $ filterChild (named "refentrytitle") e
+             let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e
+             return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum)
         "filename" -> codeWithLang
         "envar" -> codeWithLang
         "literal" -> codeWithLang
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 51e62942b..f021dc8be 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -725,6 +725,9 @@ These should not be escaped:  \$ \\ \&gt; \[ \{
   <para>
     More code: <classname>Class</classname> and <type>Type</type>
   </para>
+  <para>
+    Referencing a man page: <citerefentry><refentrytitle>nix.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+  </para>
   <para>
     <emphasis role="strikethrough">This is
     <emphasis>strikeout</emphasis>.</emphasis>
diff --git a/test/docbook-reader.native b/test/docbook-reader.native
index 1961949d9..b1f5fd085 100644
--- a/test/docbook-reader.native
+++ b/test/docbook-reader.native
@@ -188,6 +188,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Sof
 ,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."]
 ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",SoftBreak,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",SoftBreak,Code ("",[],[]) "<html>",Str "."]
 ,Para [Str "More",Space,Str "code:",Space,Code ("",[],[]) "Class",Space,Str "and",Space,Code ("",[],[]) "Type"]
+,Para [Str "Referencing",Space,Str "a",Space,Str "man",Space,Str "page:",Space,Code ("",["citerefentry"],[]) "nix.conf(5)"]
 ,Para [Strikeout [Str "This",Space,Str "is",SoftBreak,Emph [Str "strikeout"],Str "."]]
 ,Para [Str "Superscripts:",Space,Str "a",Superscript [Str "bc"],Str "d",SoftBreak,Str "a",Superscript [Emph [Str "hello"]],SoftBreak,Str "a",Superscript [Str "hello\160there"],Str "."]
 ,Para [Str "Subscripts:",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",SoftBreak,Str "H",Subscript [Str "many\160of\160them"],Str "O."]
-- 
cgit v1.2.3


From 18270c7a390b5cb101e16790ccb5e50dd9f34a7a Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 11:39:02 -0700
Subject: PDF: Fix svgIn path error.

We were duplicating the temp directory; this didn't show up
on macOS or linux because there we use absolute paths for
the temp directory.

Closes #7431.
---
 src/Text/Pandoc/PDF.hs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index aae3f9806..c4e30af34 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -217,7 +217,7 @@ convertImage opts tmpdir fname = do
   where
     pngOut = normalise $ replaceDirectory (replaceExtension fname ".png") tmpdir
     pdfOut = normalise $ replaceDirectory (replaceExtension fname ".pdf") tmpdir
-    svgIn = normalise $ tmpdir </> fname
+    svgIn = normalise fname
     mime = getMimeType fname
     doNothing = return (Right fname)
 
-- 
cgit v1.2.3


From 493522c5626464fb7a27216ccee381af4a199eef Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 12:04:43 -0700
Subject: LaTeX reader: Support `\cline` in LaTeX tables.

Closes #7442.
---
 src/Text/Pandoc/Readers/LaTeX/Table.hs | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
index 7833da081..f56728fe1 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Table.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -41,6 +41,7 @@ hline :: PandocMonad m => LP m ()
 hline = try $ do
   spaces
   controlSeq "hline" <|>
+    (controlSeq "cline" <* braced) <|>
     -- booktabs rules:
     controlSeq "toprule" <|>
     controlSeq "bottomrule" <|>
-- 
cgit v1.2.3


From 46099e79defe662e541b12548200caf29063c1c6 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 16 Jul 2021 13:10:45 -0700
Subject: DocBook reader: handle images with imageobjectco elements.

Closes #7440.
---
 src/Text/Pandoc/Readers/DocBook.hs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index a31819ece..c49b82ccf 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -741,9 +741,9 @@ getMediaobject e = do
   figTitle <- gets dbFigureTitle
   ident <- gets dbFigureId
   (imageUrl, attr) <-
-    case filterChild (named "imageobject") e of
-      Nothing  -> return (mempty, nullAttr)
-      Just z   -> case filterChild (named "imagedata") z of
+    case filterElements (named "imageobject") e of
+      []  -> return (mempty, nullAttr)
+      (z:_) -> case filterChild (named "imagedata") z of
                     Nothing -> return (mempty, nullAttr)
                     Just i  -> let atVal a = attrValue a i
                                    w = case atVal "width" of
-- 
cgit v1.2.3