From 5bbd5a9e80c4f4356d71b54315cf75b5ae2f3650 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 3 Dec 2020 09:51:23 -0800
Subject: Docx writer: Support bold and italic in "complex script."

Previously bold and italics didn't work properly in LTR
text.  This commit causes the w:bCs and w:iCs attributes
to be used, in addition to w:b and w:i, for bold and
italics respectively.

Closes #6911.
---
 test/docx/golden/custom_style_preserve.docx | Bin 10573 -> 10578 bytes
 test/docx/golden/definition_list.docx       | Bin 9844 -> 9850 bytes
 test/docx/golden/inline_formatting.docx     | Bin 9959 -> 9969 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)

(limited to 'test/docx/golden')

diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index ac595cdd4..17804bb81 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 005f5dbe3..21629e208 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index ce1d16daa..e12e3b38d 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
-- 
cgit v1.2.3


From 00031fc809117cb436397aba83a41ca1d4056f61 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sun, 13 Dec 2020 14:09:59 +0100
Subject: Docx writer: keep raw openxml strings verbatim.

Closes: #6933
---
 src/Text/Pandoc/Writers/Docx.hs     |   7 +++++--
 test/Tests/Writers/Docx.hs          |  10 ++++++++++
 test/docx/golden/raw-blocks.docx    | Bin 0 -> 9888 bytes
 test/docx/golden/raw-bookmarks.docx | Bin 0 -> 10023 bytes
 test/docx/raw-blocks.native         |   6 ++++++
 test/docx/raw-bookmarks.native      |   3 +++
 6 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 test/docx/golden/raw-blocks.docx
 create mode 100644 test/docx/golden/raw-bookmarks.docx
 create mode 100644 test/docx/raw-blocks.native
 create mode 100644 test/docx/raw-bookmarks.native

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 97048e980..0174a8501 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -978,7 +978,9 @@ blockToOpenXML' opts (Para lst)
       return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)]
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
-  | format == Format "openxml" = return (parseXML str)
+  | format == Format "openxml" = return [
+        Text (CData CDataRaw (T.unpack str) Nothing)
+      ]
   | otherwise                  = do
       report $ BlockNotRendered b
       return []
@@ -1312,7 +1314,8 @@ inlineToOpenXML' opts (Strikeout lst) =
   $ inlinesToOpenXML opts lst
 inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
-  | f == Format "openxml" = return (parseXML str)
+  | f == Format "openxml" = return
+                            [Text (CData CDataRaw (T.unpack str) Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 8f051b4b7..66a5c3d36 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -128,6 +128,16 @@ tests = [ testGroup "inlines"
             def
             "docx/codeblock.native"
             "docx/golden/codeblock.docx"
+          , docxTest
+            "raw OOXML blocks"
+            def
+            "docx/raw-blocks.native"
+            "docx/golden/raw-blocks.docx"
+          , docxTest
+            "raw bookmark markers"
+            def
+            "docx/raw-bookmarks.native"
+            "docx/golden/raw-bookmarks.docx"
           ]
         , testGroup "track changes"
           [ docxTest
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
new file mode 100644
index 000000000..ae7f8f1f0
Binary files /dev/null and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
new file mode 100644
index 000000000..5e433b736
Binary files /dev/null and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/raw-blocks.native b/test/docx/raw-blocks.native
new file mode 100644
index 000000000..d7f985bc3
--- /dev/null
+++ b/test/docx/raw-blocks.native
@@ -0,0 +1,6 @@
+[Para [Str "Cell",Space,Str "compartments"]
+,RawBlock (Format "openxml") "<w:tbl>\n<w:tblPr>\n<w:tblW w:w=\"2000\" w:type=\"pct\"/>\n<w:tblBorders>\n<w:top w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:start w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:bottom w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:end w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n</w:tblBorders>\n</w:tblPr>\n<w:tblGrid>\n<w:gridCol w:w=\"1871\" />\n<w:gridCol w:w=\"1872\" />\n</w:tblGrid>\n<w:tr>\n<w:tc>"
+,Para [Str "Ribosome"]
+,RawBlock (Format "openxml") "</w:tc>\n<w:tc>"
+,Para [Str "Lysosome"]
+,RawBlock (Format "openxml") "</w:tc>\n</w:tr>\n</w:tbl>"]
diff --git a/test/docx/raw-bookmarks.native b/test/docx/raw-bookmarks.native
new file mode 100644
index 000000000..1e76655d6
--- /dev/null
+++ b/test/docx/raw-bookmarks.native
@@ -0,0 +1,3 @@
+[Para [Str "Manual",Space,Str "endnotes."]
+,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "<w:bookmarkStart w:id=\"0\" w:name=\"Aliquam\"/>",Str "Aliquam",Space,Str "posuere."]
+,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "<w:bookmarkEnd w:id=\"0\"/>",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]]
-- 
cgit v1.2.3


From c451207b08edc36fa5c2f1af5556a8d211e023ed Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 12 Jan 2021 09:49:10 -0800
Subject: Docx writer: handle table header using styles.

Instead of hard-coding the border and header cell vertical alignment,
we now let this be determined by the Table style, making use of
Word's "conditional formatting" for the table's first row.
For headerless tables, we use the tblLook element to tell Word
not to apply conditional first-row formatting.

Closes #7008.
---
 data/docx/word/styles.xml                          |  15 +++++++++
 src/Text/Pandoc/Writers/Docx.hs                    |  37 +++++++++++----------
 test/docx/golden/block_quotes.docx                 | Bin 10001 -> 10092 bytes
 test/docx/golden/codeblock.docx                    | Bin 9853 -> 9944 bytes
 test/docx/golden/comments.docx                     | Bin 10188 -> 10279 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 9952 -> 10042 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10578 -> 10666 bytes
 test/docx/golden/definition_list.docx              | Bin 9850 -> 9941 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9856 -> 9947 bytes
 test/docx/golden/document-properties.docx          | Bin 10332 -> 10423 bytes
 test/docx/golden/headers.docx                      | Bin 9989 -> 10080 bytes
 test/docx/golden/image.docx                        | Bin 26667 -> 26758 bytes
 test/docx/golden/inline_code.docx                  | Bin 9789 -> 9880 bytes
 test/docx/golden/inline_formatting.docx            | Bin 9969 -> 10060 bytes
 test/docx/golden/inline_images.docx                | Bin 26725 -> 26816 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10010 -> 10101 bytes
 test/docx/golden/links.docx                        | Bin 10185 -> 10276 bytes
 test/docx/golden/lists.docx                        | Bin 10261 -> 10352 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10052 -> 10143 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10141 -> 10232 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10053 -> 10144 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10148 -> 10239 bytes
 test/docx/golden/notes.docx                        | Bin 9955 -> 10046 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9888 -> 9980 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10023 -> 10115 bytes
 test/docx/golden/table_one_row.docx                | Bin 9834 -> 9932 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10199 -> 10249 bytes
 test/docx/golden/tables.docx                       | Bin 10225 -> 10266 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9833 -> 9924 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9816 -> 9907 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9850 -> 9941 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 9962 -> 10053 bytes
 test/docx/golden/unicode.docx                      | Bin 9774 -> 9865 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9822 -> 9913 bytes
 34 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'test/docx/golden')

diff --git a/data/docx/word/styles.xml b/data/docx/word/styles.xml
index 6bb5a3f52..832b1b25b 100644
--- a/data/docx/word/styles.xml
+++ b/data/docx/word/styles.xml
@@ -350,6 +350,21 @@
         <w:right w:w="108" w:type="dxa" />
       </w:tblCellMar>
     </w:tblPr>
+    <w:tblStylePr w:type="firstRow">
+      <w:tblPr>
+        <w:jc w:val="left"/>
+        <w:tblInd w:w="0" w:type="dxa"/>
+      </w:tblPr>
+      <w:trPr>
+        <w:jc w:val="left"/>
+      </w:trPr>
+      <w:tcPr>
+        <w:vAlign w:val="bottom"/>
+        <w:tcBorders>
+          <w:bottom w:val="single"/>
+        </w:tcBorders>
+      </w:tcPr>
+    </w:tblStylePr>
   </w:style>
   <w:style w:type="paragraph" w:customStyle="1" w:styleId="DefinitionTerm">
     <w:name w:val="Definition Term" />
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index a99e13a85..8f498775d 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1023,23 +1023,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
             _ -> es ++ [Elem $ mknode "w:p" [] ()]
   headers' <- mapM cellToOpenXML $ zip aligns headers
   rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  let borderProps = Elem $ mknode "w:tcPr" []
-                    [ mknode "w:tcBorders" []
-                      $ mknode "w:bottom" [("w:val","single")] ()
-                    , mknode "w:vAlign" [("w:val","bottom")] () ]
   compactStyle <- pStyleM "Compact"
   let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell border contents = mknode "w:tc" []
-                            $ [ borderProps | border ] ++
-                            if null contents
-                               then emptyCell'
-                               else contents
-  let mkrow border cells =
+  let mkcell contents = mknode "w:tc" []
+                            $ if null contents
+                                 then emptyCell'
+                                 else contents
+  let mkrow cells =
          mknode "w:tr" [] $
-         [ mknode "w:trPr" []
-           [ mknode "w:cnfStyle" [("w:firstRow","1")] ()]
-         | border]
-         ++ map (mkcell border) cells
+           map mkcell cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
@@ -1047,6 +1039,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
                        [("w:w", show (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
   modify $ \s -> s { stInTable = False }
+  -- for compatibility with Word <= 2007, we include a val with a bitmask
+  -- 0×0020  Apply first row conditional formatting
+  -- 0×0040  Apply last row conditional formatting
+  -- 0×0080  Apply first column conditional formatting
+  -- 0×0100  Apply last column conditional formatting
+  -- 0×0200  Do not apply row banding conditional formatting
+  -- 0×0400  Do not apply column banding conditional formattin
+  let tblLookVal :: Int
+      tblLookVal = if hasHeader then 0x20 else 0
   return $
     caption' ++
     [Elem $
@@ -1059,15 +1060,17 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
-                               ,("w:noVBand","0")] () :
+                               ,("w:noVBand","0")
+                               ,("w:val", printf "%04x" tblLookVal)
+                               ] () :
           [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
           | not (null caption) ] )
       : mknode "w:tblGrid" []
         (if all (==0) widths
             then []
             else map mkgridcol widths)
-      : [ mkrow True headers' | hasHeader ] ++
-      map (mkrow False) rows'
+      : [ mkrow headers' | hasHeader ] ++
+      map mkrow rows'
       )]
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index bbc8d8de9..3e1bf16e7 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index e20efcab4..66f055063 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index f1185da98..fb3a02a0a 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 83243ab8c..bc6c2702a 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 17804bb81..8c555a5bd 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 21629e208..c21b3a5b3 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 5cf8db0b0..92ce144e9 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 14bfab6d4..d21b67309 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index 416743aa1..3558a47bf 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index ef2940f89..606df92a3 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 479ea65ec..759269cac 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index e12e3b38d..c37777080 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 8bd57bb8c..9450b1a73 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 2c6a638fc..6f0b830e6 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 11e52c4b1..e53889cfb 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 7667990c4..5dbe298b7 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3e8c6d2b2..194181288 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 05a7cf060..6e0b634f7 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index f5ae4a384..477178e77 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index d02c77271..51110356e 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index f7fdcbe11..b6206cdf5 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index ae7f8f1f0..07b576080 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 5e433b736..d46095eb7 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index d404878c6..7caba4e93 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 79c395262..6aaa6da61 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index df9680773..5746c5ad0 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index bb73b82f6..5f22dccc6 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index 7df484aaa..ab5c4f56d 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index d717b93ab..085f33162 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 791182db2..1ac86d5c8 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index b64a7b58e..c2c443b19 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index b5116d1a8..5ea18d32e 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 8ca191604dcd13af27c11d2da225da646ebce6fc Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Mon, 8 Feb 2021 23:35:19 -0800
Subject: Add new unexported module T.P.XMLParser.

This exports functions that uses xml-conduit's parser to
produce an xml-light Element or [Content].  This allows
existing pandoc code to use a better parser without
much modification.

The new parser is used in all places where xml-light's
parser was previously used.  Benchmarks show a significant
performance improvement in parsing XML-based formats
(especially ODT and FB2).

Note that the xml-light types use String, so the
conversion from xml-conduit types involves a lot
of extra allocation.  It would be desirable to
avoid that in the future by gradually switching
to using xml-conduit directly. This can be done
module by module.

The new parser also reports errors, which we report
when possible.

A new constructor PandocXMLError has been added to
PandocError in T.P.Error [API change].

Closes #7091, which was the main stimulus.

These changes revealed the need for some changes
in the tests.  The docbook-reader.docbook test
lacked definitions for the entities it used; these
have been added. And the docx golden tests have been
updated, because the new parser does not preserve
the order of attributes.

Add entity defs to docbook-reader.docbook.

Update golden tests for docx.
---
 MANUAL.txt                                         |   1 +
 pandoc.cabal                                       |   2 +
 src/Text/Pandoc/Error.hs                           |   3 +
 src/Text/Pandoc/ImageSize.hs                       |   5 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  52 ++++++++++------
 src/Text/Pandoc/Readers/Docx/Parse.hs              |  21 ++++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  28 +++++----
 src/Text/Pandoc/Readers/EPUB.hs                    |  17 ++++--
 src/Text/Pandoc/Readers/FB2.hs                     |  10 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |   9 ++-
 src/Text/Pandoc/Readers/OPML.hs                    |  10 +++-
 src/Text/Pandoc/Readers/Odt.hs                     |  24 ++++----
 src/Text/Pandoc/Writers/EPUB.hs                    |  13 +++-
 src/Text/Pandoc/Writers/FB2.hs                     |  11 +++-
 src/Text/Pandoc/Writers/ODT.hs                     |  29 +++++----
 src/Text/Pandoc/Writers/OOXML.hs                   |   9 +--
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       |   4 +-
 src/Text/Pandoc/XMLParser.hs                       |  66 +++++++++++++++++++++
 test/Tests/Readers/JATS.hs                         |   1 +
 test/command/5321.md                               |   4 +-
 test/docbook-reader.docbook                        |   7 ++-
 test/docx/golden/block_quotes.docx                 | Bin 10092 -> 10098 bytes
 test/docx/golden/codeblock.docx                    | Bin 9944 -> 9950 bytes
 test/docx/golden/comments.docx                     | Bin 10279 -> 10285 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10042 -> 10048 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10666 -> 10673 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12434 bytes
 test/docx/golden/definition_list.docx              | Bin 9941 -> 9947 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9947 -> 9953 bytes
 test/docx/golden/document-properties.docx          | Bin 10423 -> 10429 bytes
 test/docx/golden/headers.docx                      | Bin 10080 -> 10086 bytes
 test/docx/golden/image.docx                        | Bin 26758 -> 26764 bytes
 test/docx/golden/inline_code.docx                  | Bin 9880 -> 9886 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10060 -> 10066 bytes
 test/docx/golden/inline_images.docx                | Bin 26816 -> 26822 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10101 -> 10107 bytes
 test/docx/golden/links.docx                        | Bin 10276 -> 10282 bytes
 test/docx/golden/lists.docx                        | Bin 10352 -> 10358 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10143 -> 10149 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10232 -> 10238 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10144 -> 10150 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10239 -> 10245 bytes
 test/docx/golden/notes.docx                        | Bin 10046 -> 10052 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9980 -> 9986 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10115 -> 10121 bytes
 test/docx/golden/table_one_row.docx                | Bin 9932 -> 9938 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10249 -> 10255 bytes
 test/docx/golden/tables.docx                       | Bin 10266 -> 10272 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9924 -> 9930 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9907 -> 9913 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9941 -> 9947 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10053 -> 10059 bytes
 test/docx/golden/unicode.docx                      | Bin 9865 -> 9871 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9913 -> 9919 bytes
 test/jats-reader.native                            |   2 +-
 test/jats-reader.xml                               |   1 +
 test/pptx/code-custom.pptx                         | Bin 28230 -> 28221 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395524 -> 395516 bytes
 test/pptx/code.pptx                                | Bin 28229 -> 28220 bytes
 test/pptx/code_templated.pptx                      | Bin 395522 -> 395514 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27012 -> 27004 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394298 -> 394288 bytes
 test/pptx/document-properties.pptx                 | Bin 27417 -> 27408 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394701 -> 394691 bytes
 test/pptx/endnotes.pptx                            | Bin 26969 -> 26962 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394262 -> 394253 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27892 -> 27789 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395186 -> 395083 bytes
 test/pptx/images.pptx                              | Bin 44626 -> 44619 bytes
 test/pptx/images_templated.pptx                    | Bin 411916 -> 411909 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26156 -> 26148 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393447 -> 393438 bytes
 test/pptx/lists.pptx                               | Bin 27056 -> 27049 bytes
 test/pptx/lists_templated.pptx                     | Bin 394349 -> 394340 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26948 -> 26940 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394240 -> 394231 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44073 -> 44065 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411359 -> 411352 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28582 -> 28575 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27751 -> 27744 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395045 -> 395038 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395875 -> 395868 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29539 -> 29532 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396833 -> 396826 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35444 -> 35436 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31683 -> 31675 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398964 -> 398955 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30700 -> 30691 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397988 -> 397979 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51612 -> 51604 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418903 -> 418896 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402736 -> 402728 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27031 -> 27023 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394323 -> 394314 bytes
 test/pptx/tables.pptx                              | Bin 27573 -> 27566 bytes
 test/pptx/tables_templated.pptx                    | Bin 394868 -> 394859 bytes
 test/pptx/two_column.pptx                          | Bin 26075 -> 26065 bytes
 test/pptx/two_column_templated.pptx                | Bin 393366 -> 393355 bytes
 98 files changed, 238 insertions(+), 91 deletions(-)
 create mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'test/docx/golden')

diff --git a/MANUAL.txt b/MANUAL.txt
index 5b90d039a..dc3b4ca77 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings:
    24 PandocCiteprocError
    31 PandocEpubSubdirectoryError
    43 PandocPDFError
+   44 PandocXMLError
    47 PandocPDFProgramNotFoundError
    61 PandocHttpError
    62 PandocShouldNeverHappenError
diff --git a/pandoc.cabal b/pandoc.cabal
index 72e7c2da5..e56456c68 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -493,6 +493,7 @@ library
                  unicode-transforms    >= 0.3      && < 0.4,
                  unordered-containers  >= 0.2      && < 0.3,
                  xml                   >= 1.3.12   && < 1.4,
+                 xml-conduit           >= 1.7      && < 1.10,
                  zip-archive           >= 0.2.3.4  && < 0.5,
                  zlib                  >= 0.5      && < 0.7
   if os(windows) && arch(i386)
@@ -686,6 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
+                   Text.Pandoc.XMLParser,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 204cf15ca..831405f42 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError
                  | PandocFailOnWarningError
                  | PandocPDFProgramNotFoundError Text
                  | PandocPDFError Text
+                 | PandocXMLError Text Text
                  | PandocFilterError Text Text
                  | PandocLuaError Text
                  | PandocCouldNotFindDataFileError Text
@@ -103,6 +104,8 @@ handleError (Left e) =
     PandocPDFProgramNotFoundError pdfprog -> err 47 $
         pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog
     PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg
+    PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <>
+        (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg
     PandocFilterError filtername msg -> err 83 $ "Error running filter " <>
         filtername <> ":\n" <> msg
     PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e19958f6a..e0a1af8e8 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -45,7 +45,9 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
 import qualified Text.XML.Light as Xml
+import Text.Pandoc.XMLParser (parseXMLElement)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Control.Applicative
 import qualified Data.Attoparsec.ByteString.Char8 as A
@@ -327,7 +329,8 @@ getSize img =
 
 svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
 svgSize opts img = do
-  doc <- Xml.parseXMLDoc $ UTF8.toString img
+  doc <- either (const mzero) return $ parseXMLElement
+                                     $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
         vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
         [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ada3e98ec..ad0108843 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.DocBook ( readDocBook ) where
 import Control.Monad.State.Strict
-import Data.Char (isSpace, toUpper)
+import Data.Char (isSpace, toUpper, isLetter)
 import Data.Default
 import Data.Either (rights)
 import Data.Foldable (asum)
@@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex)
 import Data.Maybe (fromMaybe,mapMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Control.Monad.Except (throwError)
 import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import Text.Pandoc.Options
@@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 {-
 
@@ -537,22 +541,25 @@ instance Default DBState where
 
 readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readDocBook _ inp = do
-  let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
+  tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp)
   (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
 
--- We treat <?asciidoc-br?> specially (issue #1236), converting it
--- to <br/>, since xml-light doesn't parse the instruction correctly.
--- Other xml instructions are simply removed from the input stream.
+-- We treat certain processing instructions by converting them to tags
+-- beginning "pi-".
 handleInstructions :: Text -> Text
-handleInstructions = T.pack . handleInstructions' . T.unpack
-
-handleInstructions' :: String -> String
-handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
-handleInstructions' xs = case break (=='<') xs of
-                             (ys, [])     -> ys
-                             ([], '<':zs) -> '<' : handleInstructions' zs
-                             (ys, zs)     -> ys ++ handleInstructions' zs
+handleInstructions t =
+  let (x,y) = T.breakOn "<?" t
+   in if T.null y
+         then x
+         else
+           let (w,z) = T.breakOn "?>" y
+            in (if T.takeWhile (\c -> isLetter c || c == '-')
+                    (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"]
+                   then x <> "<pi-" <> T.drop 2 w <> "/>"
+                   else x <> w <> T.take 2 z) <>
+               handleInstructions (T.drop 2 z)
 
 getFigure :: PandocMonad m => Element -> DB m Blocks
 getFigure e = do
@@ -892,7 +899,11 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          codeBlockWithLang = do
@@ -964,7 +975,7 @@ parseBlock (Elem e) =
                                      cs -> map toAlignment cs
                       let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9')
                                                                    || x == '.') s)
-                      let textWidth = case filterChild (named "?dbfo") e of
+                      let textWidth = case filterChild (named "pi-dbfo") e of
                                         Just d  -> case attrValue "table-width" d of
                                                      "" -> 1.0
                                                      w  -> fromMaybe 100.0 (parseWidth w) / 100.0
@@ -1165,12 +1176,15 @@ parseInline (Elem e) =
         "title" -> return mempty
         "affiliation" -> skip
         -- Note: this isn't a real docbook tag; it's what we convert
-        -- <?asciidor-br?> to in handleInstructions, above.  A kludge to
-        -- work around xml-light's inability to parse an instruction.
-        "br" -> return linebreak
+        -- <?asciidor-br?> to in handleInstructions, above.
+        "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
+           let qn = T.pack $ qName $ elName e
+           let name = if "pi-" `T.isPrefixOf` qn
+                         then "<?" <> qn <> "?>"
+                         else qn
+           lift $ report $ IgnoredElement name
            return mempty
 
          innerInlines f = extractSpaces f . mconcat <$>
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index fdcffcc3f..056dab6c2 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
 import Text.XML.Light
 import qualified Text.XML.Light.Cursor as XMLC
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do
     Right doc -> Right (Docx doc, stateWarnings st)
     Left e    -> Left e
 
+parseXMLFromEntry :: Entry -> Maybe Element
+parseXMLFromEntry entry =
+  case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of
+    Left _   -> Nothing
+    Right el -> Just el
+
 getDocumentXmlPath :: Archive -> Maybe FilePath
 getDocumentXmlPath zf = do
   entry <- findEntryByPath "_rels/.rels" zf
-  relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  relsElem <- parseXMLFromEntry entry
   let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem
   rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e ==
                        Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument")
@@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
   docPath <- asks envDocXmlPath
   entry <- maybeToD $ findEntryByPath docPath zf
-  docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+  docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
   let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
@@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle
 archiveToNotes :: Archive -> Notes
 archiveToNotes zf =
   let fnElem = findEntryByPath "word/footnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       enElem = findEntryByPath "word/endnotes.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
@@ -415,7 +422,7 @@ archiveToNotes zf =
 archiveToComments :: Archive -> Comments
 archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
-               >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+               >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
       cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
   in
@@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath ->  [Relationship]
 filePathToRelationships ar docXmlPath fp
   | Just relType <- filePathToRelType fp docXmlPath
   , Just entry <- findEntryByPath fp ar
-  , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+  , Just relElems <- parseXMLFromEntry entry =
   mapMaybe (relElemToRelationship relType) $ elChildren relElems
 filePathToRelationships _ _ _ = []
 
@@ -527,7 +534,7 @@ archiveToNumbering' zf =
   case findEntryByPath "word/numbering.xml" zf of
     Nothing -> Just $ Numbering [] [] []
     Just entry -> do
-      numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
+      numberingElem <- parseXMLFromEntry entry
       let namespaces = elemToNameSpaces numberingElem
           numElems = findChildrenByName namespaces "w" "num" numberingElem
           absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index 236167187..edade8654 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -53,6 +53,7 @@ import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing
                            , rParentStyle = Nothing
                            }
 
-archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) =>
-                    (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
+archiveToStyles'
+  :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2)
+  => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2)
 archiveToStyles' conv1 conv2 zf =
-  let stylesElem = findEntryByPath "word/styles.xml" zf >>=
-                   (parseXMLDoc . UTF8.toStringLazy . fromEntry)
-  in
-   case stylesElem of
-     Nothing -> (M.empty, M.empty)
-     Just styElem ->
-       let namespaces = elemToNameSpaces styElem
-       in
-        ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing,
-          M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing)
+  case findEntryByPath "word/styles.xml" zf of
+    Nothing -> (M.empty, M.empty)
+    Just entry ->
+      case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+        Left _ -> (M.empty, M.empty)
+        Right styElem ->
+          let namespaces = elemToNameSpaces styElem
+          in
+           ( M.fromList $ map (\r -> (conv1 r, r)) $
+               buildBasedOnList namespaces styElem Nothing,
+             M.fromList $ map (\p -> (conv2 p, p)) $
+               buildBasedOnList namespaces styElem Nothing)
 
 isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool
 isBasedOnStyle ns element parentStyle
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 5e3326e6d..369c4f0c9 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB
   (readEPUB)
   where
 
-import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry,
+import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                           toArchiveOrFail)
 import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
@@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
-import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy)
+import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type Items = M.Map String (FilePath, MimeType)
 
@@ -181,7 +182,7 @@ renameMeta s         = T.pack s
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
   metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive
-  docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry
+  docElem <- parseXMLDocE metaEntry
   let namespaces = mapMaybe attrToNSPair (elAttribs docElem)
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
@@ -190,7 +191,7 @@ getManifest archive = do
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
-  (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest)
+  (rootdir,) <$> parseXMLDocE manifest
 
 -- Fixup
 
@@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
   mkE ("No entry on path: " ++ path) $ findEntryByPath path a
 
-parseXMLDocE :: PandocMonad m => String -> m Element
-parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc
+parseXMLDocE :: PandocMonad m => Entry -> m Element
+parseXMLDocE entry =
+  either (throwError . PandocXMLError fp) return $ parseXMLElement doc
+ where
+  doc = UTF8.toTextLazy . fromEntry $ entry
+  fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
 findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b0d2f092b..b804eab4f 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -32,6 +32,7 @@ import Data.List (intersperse)
 import qualified Data.Map as M
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Data.Default
 import Data.Maybe
 import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -42,6 +43,7 @@ import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 type FB2 m = StateT FB2State m
 
@@ -64,10 +66,10 @@ instance HasMeta FB2State where
 
 readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readFB2 _ inp =
-  case parseXMLDoc $ crFilter inp of
-    Nothing -> throwError $ PandocParseError "Not an XML document"
-    Just e ->  do
-      (bs, st) <- runStateT (parseRootElement e) def
+  case parseXMLElement $ TL.fromStrict $ crFilter inp of
+    Left msg -> throwError $ PandocXMLError "" msg
+    Right el ->  do
+      (bs, st) <- runStateT (parseRootElement el) def
       let authors = if null $ fb2Authors st
                     then id
                     else setMeta "author" (map text $ reverse $ fb2Authors st)
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index c638da519..dfd343b7a 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
+import Control.Monad.Except (throwError)
+import Text.Pandoc.Error (PandocError(..))
 import Data.Char (isDigit, isSpace, toUpper)
 import Data.Default
 import Data.Generics
@@ -22,6 +24,7 @@ import qualified Data.Map as Map
 import Data.Maybe (maybeToList, fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -29,6 +32,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -51,8 +55,9 @@ instance Default JATSState where
 
 readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readJATS _ inp = do
-  let tree = normalizeTree . parseXML
-               $ T.unpack $ crFilter inp
+  tree <- either (throwError . PandocXMLError "")
+                 (return . normalizeTree) $
+            parseXMLContents (TL.fromStrict $ crFilter inp)
   (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree
   return $ Pandoc (jatsMeta st') (toList . mconcat $ bs)
 
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 5b8996025..bdadc4dd9 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -19,14 +19,18 @@ import Data.Generics
 import Data.Maybe (fromMaybe)
 import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import Text.HTML.TagSoup.Entity (lookupEntity)
 import Text.Pandoc.Builder
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLContents)
+import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
 
@@ -49,8 +53,10 @@ instance Default OPMLState where
 readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readOPML opts inp  = do
   (bs, st') <- runStateT
-                 (mapM parseBlock $ normalizeTree $
-                    parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
+                 (case parseXMLContents (TL.fromStrict (crFilter inp)) of
+                     Left msg -> throwError $ PandocXMLError "" msg
+                     Right ns -> mapM parseBlock $ normalizeTree ns)
+                 def{ opmlOptions = opts }
   return $
     setTitle (opmlDocTitle st') $
     setAuthors (opmlDocAuthors st') $
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 9943d3147..85308deb1 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
 import qualified Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 import qualified Data.ByteString.Lazy as B
 
@@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of
 
 --
 archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
-archiveToOdt archive = either (Left. PandocParseError) Right $ do
-  let onFailure msg Nothing = Left msg
+archiveToOdt archive = do
+  let onFailure msg Nothing = Left $ PandocParseError msg
       onFailure _   (Just x) = Right x
   contentEntry <- onFailure "Could not find content.xml"
                    (findEntryByPath "content.xml" archive)
   stylesEntry <- onFailure "Could not find styles.xml"
                    (findEntryByPath "styles.xml" archive)
-  contentElem <- onFailure "Could not find content element"
-                   (entryToXmlElem contentEntry)
-  stylesElem <- onFailure "Could not find styles element"
-                   (entryToXmlElem stylesEntry)
-  styles <- either (\_ -> Left "Could not read styles") Right
+  contentElem <- entryToXmlElem contentEntry
+  stylesElem <- entryToXmlElem stylesEntry
+  styles <- either
+               (\_ -> Left $ PandocParseError "Could not read styles")
+               Right
                (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
   let filePathIsOdtMedia :: FilePath -> Bool
       filePathIsOdtMedia fp =
@@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do
         in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
   let media = filteredFilesFromArchive archive filePathIsOdtMedia
   let startState = readerState styles media
-  either (\_ -> Left "Could not convert opendocument") Right
+  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
     (runConverter' read_body startState contentElem)
 
 
 --
-entryToXmlElem :: Entry -> Maybe XML.Element
-entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry
+entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem entry =
+  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
+    Right x  -> Right x
+    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 1f16f6772..e99fa2567 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
 import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems, parseXML,
+                       add_attrs, lookupAttr, node, onlyElems,
                        ppElement, showElement, strContent, unode, unqual)
+import Text.Pandoc.XMLParser (parseXMLContents)
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -160,7 +161,12 @@ mkEntry path content = do
 getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
 getEPUBMetadata opts meta = do
   let md = metadataFromMeta opts meta
-  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
+  elts <- case writerEpubMetadata opts of
+            Nothing -> return []
+            Just t -> case parseXMLContents (TL.fromStrict t) of
+                          Left msg -> throwError $
+                            PandocXMLError "epub metadata" msg
+                          Right ns -> return (onlyElems ns)
   let md' = foldr addMetadataFromXML md elts
   let addIdentifier m =
        if null (epubIdentifier m)
@@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do
                                             : case subs of
                                                  []    -> []
                                                  (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
-          where titElements = parseXML titRendered
+          where titElements = either (const []) id $
+                                parseXMLContents (TL.fromStrict titRendered)
                 titRendered = case P.runPure
                                (writeHtmlStringForEPUB version
                                  opts{ writerTemplate = Nothing
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 25b1f28d1..9334d6e9a 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see:
 module Text.Pandoc.Writers.FB2 (writeFB2)  where
 
 import Control.Monad (zipWithM)
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
 import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
@@ -27,16 +27,18 @@ import Data.Either (lefts, rights)
 import Data.List (intercalate)
 import Data.Text (Text, pack)
 import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
 import Text.XML.Light
 import qualified Text.XML.Light as X
 import qualified Text.XML.Light.Cursor as XC
-import qualified Text.XML.Light.Input as XI
+import Text.Pandoc.XMLParser (parseXMLContents)
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Logging
 import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
@@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
                              map (el "p" . el "code" . T.unpack) . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
-    then return $ XI.parseXML str
+    then
+      case parseXMLContents (TL.fromStrict str) of
+         Left msg  -> throwError $ PandocXMLError "" msg
+         Right nds -> return nds
     else return []
 blockToXml (Div _ bs) = cMapM blockToXml bs
 blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 05dfad5eb..a32ff618c 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT.
 -}
 module Text.Pandoc.Writers.ODT ( writeODT ) where
 import Codec.Archive.Zip
-import Control.Monad.Except (catchError)
+import Control.Monad.Except (catchError, throwError)
 import Control.Monad.State.Strict
 import qualified Data.ByteString.Lazy as B
 import Data.Generics (everywhere', mkT)
@@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.ImageSize
 import Text.Pandoc.Logging
 import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
@@ -35,10 +36,11 @@ import Text.DocLayout
 import Text.Pandoc.Shared (stringify, pandocVersion, tshow)
 import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks,
                                    fixDisplayMath)
-import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy)
+import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.TeXMath
 import Text.XML.Light
 
@@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive
 updateStyleWithLang Nothing arch = return arch
 updateStyleWithLang (Just lang) arch = do
   epochtime <- floor `fmap` lift P.getPOSIXTime
-  return arch{ zEntries = [if eRelativePath e == "styles.xml"
-                              then case parseXMLDoc
-                                      (toStringLazy (fromEntry e)) of
-                                      Nothing -> e
-                                      Just d ->
-                                        toEntry "styles.xml" epochtime
-                                        ( fromStringLazy
-                                        . ppTopElement
-                                        . addLang lang $ d )
-                              else e
-                            | e <- zEntries arch] }
+  entries <- mapM (\e -> if eRelativePath e == "styles.xml"
+                            then case parseXMLElement
+                                    (toTextLazy (fromEntry e)) of
+                                    Left msg -> throwError $
+                                        PandocXMLError "styles.xml" msg
+                                    Right d -> return $
+                                      toEntry "styles.xml" epochtime
+                                      ( fromStringLazy
+                                      . ppTopElement
+                                      . addLang lang $ d )
+                            else return e) (zEntries arch)
+  return arch{ zEntries = entries }
 
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 3ac007f4e..8f60e70d5 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -35,6 +35,7 @@ import qualified Data.Text as T
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.XML.Light as XML
+import Text.Pandoc.XMLParser (parseXMLElement)
 
 mknode :: Node t => String -> [(String,String)] -> t -> Element
 mknode s attrs =
@@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath =
          findEntryByPath relpath distArchive of
             Nothing -> throwError $ PandocSomeError $
                         T.pack relpath <> " missing in reference file"
-            Just e  -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of
-                       Nothing -> throwError $ PandocSomeError $
-                                   T.pack relpath <> " corrupt in reference file"
-                       Just d  -> return d
+            Just e  -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of
+                       Left msg ->
+                         throwError $ PandocXMLError (T.pack relpath) msg
+                       Right d  -> return d
 
 -- Copied from Util
 
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 8554db622..cd092969b 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
 import Text.XML.Light
+import Text.Pandoc.XMLParser (parseXMLElement)
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer)
 getPresentationSize refArchive distArchive = do
   entry <- findEntryByPath "ppt/presentation.xml" refArchive  `mplus`
            findEntryByPath "ppt/presentation.xml" distArchive
-  presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry
+  presElement <- either (const Nothing) return $
+                   parseXMLElement $ UTF8.toTextLazy $ fromEntry entry
   let ns = elemToNameSpaces presElement
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
new file mode 100644
index 000000000..8ad22a66a
--- /dev/null
+++ b/src/Text/Pandoc/XMLParser.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XMLParser
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Bridge to allow using xml-conduit's parser with xml-light's types.
+-}
+module Text.Pandoc.XMLParser
+  ( parseXMLElement
+  , parseXMLContents
+  , module Text.XML.Light.Types
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import qualified Text.XML.Light as Light
+import Text.XML.Light.Types
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Map as M
+import Data.Maybe (mapMaybe)
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Light.Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Light.Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing | "xmlns:" `T.isPrefixOf` localName ->
+           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
+                       (Just "xmlns")
+      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
+                       (T.unpack <$> mbpref)
+
+nodeToContent :: Conduit.Node -> Maybe Light.Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Light.Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
+nodeToContent _ = Nothing
+
diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs
index 525499c86..a9c9a0586 100644
--- a/test/Tests/Readers/JATS.hs
+++ b/test/Tests/Readers/JATS.hs
@@ -88,6 +88,7 @@ tests = [ testGroup "inline code"
             "<p>\n\
             \  <inline-formula><alternatives>\n\
             \  <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\
+            \  </alternatives></inline-formula>\n\
             \</p>"
             =?> para (math "\\sigma|_{\\{x\\}}")
           , test jats "math ml only" $
diff --git a/test/command/5321.md b/test/command/5321.md
index 081abe2a0..83404632a 100644
--- a/test/command/5321.md
+++ b/test/command/5321.md
@@ -4,7 +4,7 @@
   <caption>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]]
@@ -17,7 +17,7 @@
     <title>foo</title>
     <p>bar</p>
   </caption>
-  <graphic xlink:href="foo.png" xlink:alt-text="baz">
+  <graphic xlink:href="foo.png" xlink:alt-text="baz" />
 </fig>
 ^D
 [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]]
diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook
index 02568d8de..5717d78d0 100644
--- a/test/docbook-reader.docbook
+++ b/test/docbook-reader.docbook
@@ -1,6 +1,11 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
-                  "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
+"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"
+[
+<!ENTITY GHC "GHC" >
+<!ENTITY let "LET" >
+<!ENTITY case "CASE" >
+]>
 <article>
   <articleinfo>
     <title>Pandoc Test Suite</title>
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index 3e1bf16e7..d3b16d0f2 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 66f055063..6293ef493 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index fb3a02a0a..4205a1516 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index bc6c2702a..adb3f23db 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 8c555a5bd..92c8137fe 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 5f96cc911..f53470617 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index c21b3a5b3..d6af90a72 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 92ce144e9..e18dbe853 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index d21b67309..820299043 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index 3558a47bf..ae0f41d12 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 606df92a3..94cd35dfa 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 759269cac..879f2a25b 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index c37777080..93f86478f 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 9450b1a73..967d297f2 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 6f0b830e6..c5614e2fa 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index e53889cfb..0f39a831f 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 5dbe298b7..07046f223 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 194181288..3656618e6 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 6e0b634f7..8798253d5 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 477178e77..0a24d1840 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 51110356e..52bb7a217 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index b6206cdf5..182c06c64 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 07b576080..7b69a56a3 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index d46095eb7..3d3a35701 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 7caba4e93..5ae37b406 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 6aaa6da61..c29aa6716 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 5746c5ad0..664493246 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 5f22dccc6..b6d15340e 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index ab5c4f56d..f8e1092d2 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 085f33162..b4cda82f2 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 1ac86d5c8..ee222efa0 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c2c443b19..c6f8d9c96 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 5ea18d32e..ea8146690 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/jats-reader.native b/test/jats-reader.native
index ab77dd1a0..0715ea8cc 100644
--- a/test/jats-reader.native
+++ b/test/jats-reader.native
@@ -1,4 +1,4 @@
-Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
+Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]})
 [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
 ,Header 1 ("headers",[],[]) [Str "Headers"]
 ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")]
diff --git a/test/jats-reader.xml b/test/jats-reader.xml
index f75b3e95a..f33cb9ab3 100644
--- a/test/jats-reader.xml
+++ b/test/jats-reader.xml
@@ -20,6 +20,7 @@
       <surname>MacFarlane</surname>
       <given-names>John</given-names>
     </name>
+  </contrib>
   <contrib contrib-type="author">
     <name>
       <surname>Anonymous</surname>
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index aa9b7692a..58070eb3f 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index 9aaef4cb5..db9b7e371 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index 1737ec757..c7b1ed7d5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 87fb560ef..6944d92bf 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index 961c31020..ae0d28429 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 894738ef7..37c74c69a 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 188e8d826..324e443a1 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index 253e8c0a7..c81b983e3 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index e230420d2..30ce33db6 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index 49384fd65..d6c604968 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index cdf1be4ad..000e17ecd 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index c4fcbad45..fdcd2e29b 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index 4a13b5b7f..e73126376 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index 7a6e9700e..e3f968e9e 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index 926c8ff3f..eadb9372e 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 16f48e182..8ca6bab2b 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index f47b17a74..ae188ee68 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 88109a95e..60301fa50 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 84020708f..17124a50d 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index a2f77e945..19ae7dd4e 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index 48bf7bc8a..b650b7585 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 23b134a5f..0ab029614 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index d6eebeffb..2a6e35080 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a6c76a187..a7bcf6a4b 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 1fbde815b..21b018c25 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index cb3af4aa1..4ec4772a4 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index dff386885..5983657b6 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index 43b125f5e..dd54c7082 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index 3314a1c65..b3e5ed5b9 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 27a136838..1078854bb 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 7aa3b6a87..5116c6c4e 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index d43709ca7..0c8e49bd9 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 793ea10f6..68695939d 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 2f4d3b820..7ed9b946d 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 94a221398..79fc82345 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 22040c88c..9f943c279 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index 18477380b..ac72d8ced 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 4b9d0ba4d..15c7b5469 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 1c5b54185..926c5e699 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index 1314f4de4..a37e72d2c 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 9018be36e..7f86533fe 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 35e93af67..89e3db0ab 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 14 Feb 2021 22:29:21 -0800
Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...

..and add new definitions isomorphic to xml-light's, but with
Text instead of String.  This allows us to keep most of the code in
existing readers that use xml-light, but avoid lots of unnecessary
allocation.

We also add versions of the functions from xml-light's
Text.XML.Light.Output and Text.XML.Light.Proc that operate
on our modified XML types, and functions that convert
xml-light types to our types (since some of our dependencies,
like texmath, use xml-light).

Update golden tests for docx and pptx.

OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`.

Docx: Do a manual traversal to unwrap sdt and smartTag.
This is faster, and needed to pass the tests.

Benchmarks:

A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8)
C = this commit

| Reader  |  A    | B      | C     |
| ------- | ----- | ------ | ----- |
| docbook | 18 ms | 12 ms  | 10 ms |
| opml    | 65 ms | 62 ms  | 35 ms |
| jats    | 15 ms | 11 ms  |  9 ms |
| docx    | 72 ms | 69 ms  | 44 ms |
| odt     | 78 ms | 41 ms  | 28 ms |
| epub    | 64 ms | 61 ms  | 56 ms |
| fb2     | 14 ms | 5  ms  | 4 ms  |
---
 .hlint.yaml                                        |   1 +
 pandoc.cabal                                       |   2 +-
 src/Text/Pandoc/ImageSize.hs                       |   9 +-
 src/Text/Pandoc/Readers/DocBook.hs                 |  76 +--
 src/Text/Pandoc/Readers/Docx/Parse.hs              | 163 +++---
 src/Text/Pandoc/Readers/Docx/Parse/Styles.hs       |  31 +-
 src/Text/Pandoc/Readers/Docx/Util.hs               |  27 +-
 src/Text/Pandoc/Readers/EPUB.hs                    |  65 +--
 src/Text/Pandoc/Readers/FB2.hs                     |  93 ++--
 src/Text/Pandoc/Readers/JATS.hs                    |  58 +-
 src/Text/Pandoc/Readers/OPML.hs                    |  29 +-
 src/Text/Pandoc/Readers/Odt.hs                     |   5 +-
 src/Text/Pandoc/Readers/Odt/ContentReader.hs       |  13 +-
 src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs  |   3 +-
 src/Text/Pandoc/Readers/Odt/Generic/Utils.hs       |  33 +-
 .../Pandoc/Readers/Odt/Generic/XMLConverter.hs     |  23 +-
 src/Text/Pandoc/Readers/Odt/Namespaces.hs          |  11 +-
 src/Text/Pandoc/Readers/Odt/StyleReader.hs         |  23 +-
 src/Text/Pandoc/Writers/Docx.hs                    | 263 +++++----
 src/Text/Pandoc/Writers/EPUB.hs                    | 356 ++++++-------
 src/Text/Pandoc/Writers/FB2.hs                     | 127 ++---
 src/Text/Pandoc/Writers/ODT.hs                     |  16 +-
 src/Text/Pandoc/Writers/OOXML.hs                   |  35 +-
 src/Text/Pandoc/Writers/Powerpoint/Output.hs       | 201 +++----
 src/Text/Pandoc/XML/Light.hs                       | 586 +++++++++++++++++++++
 src/Text/Pandoc/XMLParser.hs                       |  66 ---
 test/Tests/Writers/OOXML.hs                        |   3 +-
 test/docx/golden/block_quotes.docx                 | Bin 10098 -> 10071 bytes
 test/docx/golden/codeblock.docx                    | Bin 9950 -> 9920 bytes
 test/docx/golden/comments.docx                     | Bin 10285 -> 10258 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10048 -> 10021 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10673 -> 10650 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12434 -> 12403 bytes
 test/docx/golden/definition_list.docx              | Bin 9947 -> 9920 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9953 -> 9925 bytes
 test/docx/golden/document-properties.docx          | Bin 10429 -> 10404 bytes
 test/docx/golden/headers.docx                      | Bin 10086 -> 10059 bytes
 test/docx/golden/image.docx                        | Bin 26764 -> 26736 bytes
 test/docx/golden/inline_code.docx                  | Bin 9886 -> 9859 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10066 -> 10038 bytes
 test/docx/golden/inline_images.docx                | Bin 26822 -> 26793 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10107 -> 10081 bytes
 test/docx/golden/links.docx                        | Bin 10282 -> 10251 bytes
 test/docx/golden/lists.docx                        | Bin 10358 -> 10332 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10149 -> 10123 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10238 -> 10210 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10150 -> 10122 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10245 -> 10216 bytes
 test/docx/golden/notes.docx                        | Bin 10052 -> 10028 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9986 -> 9960 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10121 -> 10094 bytes
 test/docx/golden/table_one_row.docx                | Bin 9938 -> 9908 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10255 -> 10227 bytes
 test/docx/golden/tables.docx                       | Bin 10272 -> 10244 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9930 -> 9903 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9913 -> 9886 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9947 -> 9920 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10059 -> 10032 bytes
 test/docx/golden/unicode.docx                      | Bin 9871 -> 9845 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9919 -> 9892 bytes
 test/pptx/code-custom.pptx                         | Bin 28221 -> 28184 bytes
 test/pptx/code-custom_templated.pptx               | Bin 395516 -> 395477 bytes
 test/pptx/code.pptx                                | Bin 28220 -> 28183 bytes
 test/pptx/code_templated.pptx                      | Bin 395514 -> 395477 bytes
 test/pptx/document-properties-short-desc.pptx      | Bin 27004 -> 26967 bytes
 .../document-properties-short-desc_templated.pptx  | Bin 394288 -> 394253 bytes
 test/pptx/document-properties.pptx                 | Bin 27408 -> 27375 bytes
 test/pptx/document-properties_templated.pptx       | Bin 394691 -> 394656 bytes
 test/pptx/endnotes.pptx                            | Bin 26962 -> 26928 bytes
 test/pptx/endnotes_templated.pptx                  | Bin 394253 -> 394219 bytes
 test/pptx/endnotes_toc.pptx                        | Bin 27789 -> 27747 bytes
 test/pptx/endnotes_toc_templated.pptx              | Bin 395083 -> 395041 bytes
 test/pptx/images.pptx                              | Bin 44619 -> 44579 bytes
 test/pptx/images_templated.pptx                    | Bin 411909 -> 411870 bytes
 test/pptx/inline_formatting.pptx                   | Bin 26148 -> 26121 bytes
 test/pptx/inline_formatting_templated.pptx         | Bin 393438 -> 393412 bytes
 test/pptx/lists.pptx                               | Bin 27049 -> 27015 bytes
 test/pptx/lists_templated.pptx                     | Bin 394340 -> 394307 bytes
 test/pptx/raw_ooxml.pptx                           | Bin 26940 -> 26908 bytes
 test/pptx/raw_ooxml_templated.pptx                 | Bin 394231 -> 394198 bytes
 test/pptx/remove_empty_slides.pptx                 | Bin 44065 -> 44025 bytes
 test/pptx/remove_empty_slides_templated.pptx       | Bin 411352 -> 411311 bytes
 test/pptx/slide_breaks.pptx                        | Bin 28575 -> 28531 bytes
 test/pptx/slide_breaks_slide_level_1.pptx          | Bin 27744 -> 27705 bytes
 .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes
 test/pptx/slide_breaks_templated.pptx              | Bin 395868 -> 395825 bytes
 test/pptx/slide_breaks_toc.pptx                    | Bin 29532 -> 29481 bytes
 test/pptx/slide_breaks_toc_templated.pptx          | Bin 396826 -> 396776 bytes
 test/pptx/speaker_notes.pptx                       | Bin 35436 -> 35360 bytes
 test/pptx/speaker_notes_after_metadata.pptx        | Bin 31675 -> 31636 bytes
 .../speaker_notes_after_metadata_templated.pptx    | Bin 398955 -> 398915 bytes
 test/pptx/speaker_notes_afterheader.pptx           | Bin 30691 -> 30657 bytes
 test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes
 test/pptx/speaker_notes_afterseps.pptx             | Bin 51604 -> 51548 bytes
 test/pptx/speaker_notes_afterseps_templated.pptx   | Bin 418896 -> 418834 bytes
 test/pptx/speaker_notes_templated.pptx             | Bin 402728 -> 402650 bytes
 test/pptx/start_numbering_at.pptx                  | Bin 27023 -> 26991 bytes
 test/pptx/start_numbering_at_templated.pptx        | Bin 394314 -> 394283 bytes
 test/pptx/tables.pptx                              | Bin 27566 -> 27532 bytes
 test/pptx/tables_templated.pptx                    | Bin 394859 -> 394827 bytes
 test/pptx/two_column.pptx                          | Bin 26065 -> 26038 bytes
 test/pptx/two_column_templated.pptx                | Bin 393355 -> 393327 bytes
 102 files changed, 1388 insertions(+), 930 deletions(-)
 create mode 100644 src/Text/Pandoc/XML/Light.hs
 delete mode 100644 src/Text/Pandoc/XMLParser.hs

(limited to 'test/docx/golden')

diff --git a/.hlint.yaml b/.hlint.yaml
index d5ebffd34..e482b2b37 100644
--- a/.hlint.yaml
+++ b/.hlint.yaml
@@ -14,6 +14,7 @@
 - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained
 - ignore: {name: "Use &&&"}
 - ignore: {name: "Use String"}
+- ignore: {name: "Use camelCase"}
 - ignore: {name: "Use fmap"}  # specific for GHC 7.8 compat
 - ignore: {name: "Use isDigit"}
 
diff --git a/pandoc.cabal b/pandoc.cabal
index 3c7063f6c..22aebd55e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -687,7 +687,7 @@ library
                    Text.Pandoc.Lua.PandocLua,
                    Text.Pandoc.Lua.Util,
                    Text.Pandoc.Lua.Walk,
-                   Text.Pandoc.XMLParser,
+                   Text.Pandoc.XML.Light,
                    Text.Pandoc.CSS,
                    Text.Pandoc.CSV,
                    Text.Pandoc.RoffChar,
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
index e0a1af8e8..bb1aa6351 100644
--- a/src/Text/Pandoc/ImageSize.hs
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -44,8 +44,7 @@ import Numeric (showFFloat)
 import Text.Pandoc.Definition
 import Text.Pandoc.Options
 import qualified Text.Pandoc.UTF8 as UTF8
-import qualified Text.XML.Light as Xml
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light hiding (Attr)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
@@ -332,12 +331,12 @@ svgSize opts img = do
   doc <- either (const mzero) return $ parseXMLElement
                                      $ TL.fromStrict $ UTF8.toText img
   let viewboxSize = do
-        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
-        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
+        vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc
+        [_,_,w,h] <- mapM safeRead (T.words vb)
         return (w,h)
   let dpi = fromIntegral $ writerDpi opts
   let dirToInt dir = do
-        dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
+        dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim
         return $ inPixel opts dim
   w <- dirToInt "width" <|> (fst <$> viewboxSize)
   h <- dirToInt "height" <|> (snd <$> viewboxSize)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ad0108843..e201b54fe 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -31,8 +31,7 @@ import Text.Pandoc.Options
 import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 
 {-
 
@@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <>
+                                    convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- convenience function
 named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
 
 --
 
@@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool
 isBlockElement (Elem e) = qName (elName e) `elem` blockTags
 isBlockElement _ = False
 
-blockTags :: [String]
+blockTags :: [Text]
 blockTags =
   [ "abstract"
   , "ackno"
@@ -721,7 +721,7 @@ blockTags =
   , "variablelist"
   ] ++ admonitionTags
 
-admonitionTags :: [String]
+admonitionTags :: [Text]
 admonitionTags = ["important","caution","note","tip","warning"]
 
 -- Trim leading and trailing newline characters
@@ -779,10 +779,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> DB m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "toc"   -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -837,7 +837,7 @@ parseBlock (Elem e) =
         "refsect2" -> sect 2
         "refsect3" -> sect 3
         "refsection" -> gets dbSectionLevel >>= sect . (+1)
-        l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+        l | l `elem` admonitionTags -> parseAdmonition l
         "area" -> skip
         "areaset" -> skip
         "areaspec" -> skip
@@ -899,7 +899,7 @@ parseBlock (Elem e) =
         "subtitle" -> return mempty  -- handled in parent element
         _       -> skip >> getBlocks e
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -911,7 +911,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ T.pack $ strContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -965,7 +965,7 @@ parseBlock (Elem e) =
                             w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x ->
                                                      (x >= '0' && x <= '9')
-                                                      || x == '.') (T.pack w)
+                                                      || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = case bodyrows of
                                          [] -> 0
@@ -1048,12 +1048,12 @@ parseMixed container conts = do
       x <- parseMixed container rs
       return $ p <> b <> x
 
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
 parseRow cn = do
   let isEntry x  = named "entry" x || named "td" x || named "th" x
   mapM (parseEntry cn) . filterChildren isEntry
 
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
 parseEntry cn el = do
   let colDistance sa ea = do
         let iStrt = elemIndex sa cn
@@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
 getInlines e' = trimInlines . mconcat <$>
                  mapM parseInline (elContent e')
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
 parseInline (CRef ref) =
-  return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+  return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
 parseInline (Elem e) =
   case qName (elName e) of
         "anchor" -> do
@@ -1138,7 +1138,7 @@ parseInline (Elem e) =
         "userinput" -> codeWithLang
         "systemitem" -> codeWithLang
         "varargs" -> return $ code "(...)"
-        "keycap" -> return (str $ T.pack $ strContent e)
+        "keycap" -> return (str $ strContent e)
         "keycombo" -> keycombo <$>
                          mapM parseInline (elContent e)
         "menuchoice" -> menuchoice <$>
@@ -1150,17 +1150,17 @@ parseInline (Elem e) =
             let title = case attrValue "endterm" e of
                             ""      -> maybe "???" xrefTitleByElem
                                          (findElementById linkend content)
-                            endterm -> maybe "???" (T.pack . strContent)
+                            endterm -> maybe "???" strContent
                                          (findElementById endterm content)
             return $ link ("#" <> linkend) "" (text title)
-        "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
-                          $ str $ T.pack $ strContent e
-        "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+        "email" -> return $ link ("mailto:" <> strContent e) ""
+                          $ str $ strContent e
+        "uri" -> return $ link (strContent e) "" $ str $ strContent e
         "ulink" -> innerInlines (link (attrValue "url" e) "")
         "link" -> do
              ils <- innerInlines id
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "linkend" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1180,7 +1180,7 @@ parseInline (Elem e) =
         "pi-asciidoc-br" -> return linebreak
         _          -> skip >> innerInlines id
    where skip = do
-           let qn = T.pack $ qName $ elName e
+           let qn = qName $ elName e
            let name = if "pi-" `T.isPrefixOf` qn
                          then "<?" <> qn <> "?>"
                          else qn
@@ -1193,7 +1193,7 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
          simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
                          (filterChildren (named "member") e)
          segmentedList = do
@@ -1234,10 +1234,10 @@ parseInline (Elem e) =
                   "sect5"        -> descendantContent "title" el
                   "cmdsynopsis"  -> descendantContent "command" el
                   "funcsynopsis" -> descendantContent "function" el
-                  _              -> T.pack $ qName (elName el) ++ "_title"
+                  _              -> qName (elName el) <> "_title"
           where
             xrefLabel = attrValue "xreflabel" el
-            descendantContent name = maybe "???" (T.pack . strContent)
+            descendantContent name = maybe "???" strContent
                                    . filterElementName (\n -> qName n == name)
 
 -- | Extract a math equation from an element
@@ -1258,7 +1258,7 @@ equation e constructor =
     mathMLEquations :: [Text]
     mathMLEquations = map writeTeX $ rights $ readMath
       (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
-      (readMathML . T.pack . showElement)
+      (readMathML . showElement)
 
     latexEquations :: [Text]
     latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1272,8 +1272,8 @@ equation e constructor =
 -- | Get the actual text stored in a CData block. 'showContent'
 -- returns the text still surrounded by the [[CDATA]] tags.
 showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
 
 
 -- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 056dab6c2..c76f3c171 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar)
 import Data.List
 import qualified Data.Map as M
 import qualified Data.Text as T
+import Data.Text (Text)
 import Data.Maybe
 import System.FilePath
 import Text.Pandoc.Readers.Docx.Util
@@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
 import Text.TeXMath (Exp)
 import Text.TeXMath.Readers.OMML (readOMML)
 import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 data ReaderEnv = ReaderEnv { envNotes         :: Notes
                            , envComments      :: Comments
@@ -128,37 +127,23 @@ mapD f xs =
   in
    concatMapM handler xs
 
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
   | isElem ns "w" "sdt" element
   , Just sdtContent <- findChildByName ns "w" "sdtContent" element
-  = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+  = concatMap (unwrapElement ns) (elChildren sdtContent)
   | isElem ns "w" "smartTag" element
-  = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+  = concatMap (unwrapElement ns) (elChildren element)
+  | otherwise
+  = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
 
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
-  Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
 
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
-  let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
-  in
-    case XMLC.nextDF modifiedCur of
-      Just cur' -> walkDocument' ns cur'
-      Nothing   -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
 walkDocument ns element =
-  let cur = XMLC.fromContent (Elem element)
-      cur' = walkDocument' ns cur
-  in
-    case XMLC.toTree cur' of
-      Elem element' -> Just element'
-      _             -> Nothing
-
+  element{ elContent = concatMap (unwrapContent ns) (elContent element) }
 
 newtype Docx = Docx Document
           deriving Show
@@ -361,9 +346,9 @@ getDocumentXmlPath zf = do
   fp <- findAttr (QName "Target" Nothing Nothing) rel
   -- sometimes there will be a leading slash, which windows seems to
   -- have trouble with.
-  return $ case fp of
+  return $ case T.unpack fp of
     '/' : fp' -> fp'
-    _         -> fp
+    fp'       -> fp'
 
 archiveToDocument :: Archive -> D Document
 archiveToDocument zf = do
@@ -372,7 +357,7 @@ archiveToDocument zf = do
   docElem <- maybeToD $ parseXMLFromEntry entry
   let namespaces = elemToNameSpaces docElem
   bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
-  let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+  let bodyElem' = walkDocument namespaces bodyElem
   body <- elemToBody namespaces bodyElem'
   return $ Document namespaces body
 
@@ -414,8 +399,8 @@ archiveToNotes zf =
       fn_namespaces = maybe [] elemToNameSpaces fnElem
       en_namespaces = maybe [] elemToNameSpaces enElem
       ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
-      fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
-      en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+      fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+      en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
   in
    Notes ns fn en
 
@@ -424,7 +409,8 @@ archiveToComments zf =
   let cmtsElem = findEntryByPath "word/comments.xml" zf
                >>= parseXMLFromEntry
       cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
-      cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+      cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+               cmtsElem
   in
     case cmts of
       Just c  -> Comments cmts_namespaces c
@@ -443,8 +429,8 @@ filePathToRelType path docXmlPath =
 relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
 relElemToRelationship relType element | qName (elName element) == "Relationship" =
   do
-    relId <- findAttrText (QName "Id" Nothing Nothing) element
-    target <- findAttrText (QName "Target" Nothing Nothing) element
+    relId <- findAttr (QName "Id" Nothing Nothing) element
+    target <- findAttr (QName "Target" Nothing Nothing) element
     return $ Relationship relType relId target
 relElemToRelationship _ _ = Nothing
 
@@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
 loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
 loElemToLevelOverride ns element
   | isElem ns "w" "lvlOverride" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       let startOverride = findChildByName ns "w" "startOverride" element
                           >>= findAttrByName ns "w" "val"
-                          >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                          >>= stringToInteger
           lvl = findChildByName ns "w" "lvl" element
                 >>= levelElemToLevel ns
       return $ LevelOverride ilvl startOverride lvl
@@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing
 numElemToNum :: NameSpaces -> Element -> Maybe Numb
 numElemToNum ns element
   | isElem ns "w" "num" element = do
-      numId <- findAttrTextByName ns "w" "numId" element
+      numId <- findAttrByName ns "w" "numId" element
       absNumId <- findChildByName ns "w" "abstractNumId" element
-                  >>= findAttrTextByName ns "w" "val"
+                  >>= findAttrByName ns "w" "val"
       let lvlOverrides = mapMaybe
                          (loElemToLevelOverride ns)
                          (findChildrenByName ns "w" "lvlOverride" element)
@@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing
 absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
 absNumElemToAbsNum ns element
   | isElem ns "w" "abstractNum" element = do
-      absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+      absNumId <- findAttrByName ns "w" "abstractNumId" element
       let levelElems = findChildrenByName ns "w" "lvl" element
           levels = mapMaybe (levelElemToLevel ns) levelElems
       return $ AbstractNumb absNumId levels
@@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing
 levelElemToLevel :: NameSpaces -> Element -> Maybe Level
 levelElemToLevel ns element
   | isElem ns "w" "lvl" element = do
-      ilvl <- findAttrTextByName ns "w" "ilvl" element
+      ilvl <- findAttrByName ns "w" "ilvl" element
       fmt <- findChildByName ns "w" "numFmt" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       txt <- findChildByName ns "w" "lvlText" element
-             >>= findAttrTextByName ns "w" "val"
+             >>= findAttrByName ns "w" "val"
       let start = findChildByName ns "w" "start" element
                   >>= findAttrByName ns "w" "val"
-                  >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+                  >>= stringToInteger
       return (Level ilvl fmt txt start)
 levelElemToLevel _ _ = Nothing
 
@@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering
 archiveToNumbering archive =
   fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
 
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
 elemToNotes ns notetype element
   | isElem ns "w" (notetype <> "s") element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" notetype element)
       in
@@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
 elemToComments ns element
   | isElem ns "w" "comments" element =
       let pairs = mapMaybe
-                  (\e -> findAttrTextByName ns "w" "id" e >>=
+                  (\e -> findAttrByName ns "w" "id" e >>=
                          (\a -> Just (a, e)))
                   (findChildrenByName ns "w" "comment" element)
       in
@@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
       stringToInteger
     , hangingParIndent =
       findAttrByName ns "w" "hanging" element >>=
-      stringToInteger}
+      stringToInteger }
 elemToParIndentation _ _ = Nothing
 
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
 testBitMask bitMaskS n =
-  case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+  case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
     []            -> False
     ((n', _) : _) -> (n' .|. n) /= 0
 
@@ -642,7 +628,7 @@ elemToBodyPart ns element
   | isElem ns "w" "p" element
   , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
       do
-        expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
+        expsLst <- eitherToD $ readOMML $ showElement c
         return $ OMathPara expsLst
 elemToBodyPart ns element
   | isElem ns "w" "p" element
@@ -666,7 +652,7 @@ elemToBodyPart ns element
   | isElem ns "w" "tbl" element = do
     let caption' = findChildByName ns "w" "tblPr" element
                    >>= findChildByName ns "w" "tblCaption"
-                   >>= findAttrTextByName ns "w" "val"
+                   >>= findAttrByName ns "w" "val"
         caption = fromMaybe "" caption'
         grid' = case findChildByName ns "w" "tblGrid" element of
           Just g  -> elemToTblGrid ns g
@@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
 getTitleAndAlt ns element =
   let mbDocPr = findChildByName ns "wp" "inline" element >>=
                 findChildByName ns "wp" "docPr"
-      title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
-      alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+      title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+      alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
   in (title, alt)
 
 elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -718,7 +704,7 @@ elemToParPart ns element
   = let (title, alt) = getTitleAndAlt ns drawingElem
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrTextByName ns "r" "embed"
+                  >>= findAttrByName ns "r" "embed"
     in
      case drawing of
        Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -728,7 +714,7 @@ elemToParPart ns element
   | isElem ns "w" "r" element
   , Just _ <- findChildByName ns "w" "pict" element =
     let drawing = findElement (elemName ns "v" "imagedata") element
-                  >>= findAttrTextByName ns "r" "id"
+                  >>= findAttrByName ns "r" "id"
     in
      case drawing of
        -- Todo: check out title and attr for deprecated format.
@@ -797,7 +783,7 @@ elemToParPart ns element
       fldCharState <- gets stateFldCharState
       case fldCharState of
         FldCharOpen -> do
-          info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+          info <- eitherToD $ parseFieldInfo $ strContent instrText
           modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
           return NullParPart
         _ -> return NullParPart
@@ -818,48 +804,48 @@ elemToParPart ns element
       return $ ChangedRuns change runs
 elemToParPart ns element
   | isElem ns "w" "bookmarkStart" element
-  , Just bmId <- findAttrTextByName ns "w" "id" element
-  , Just bmName <- findAttrTextByName ns "w" "name" element =
+  , Just bmId <- findAttrByName ns "w" "id" element
+  , Just bmName <- findAttrByName ns "w" "name" element =
     return $ BookMark bmId bmName
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just relId <- findAttrTextByName ns "r" "id" element = do
+  , Just relId <- findAttrByName ns "r" "id" element = do
     location <- asks envLocation
     runs <- mapD (elemToRun ns) (elChildren element)
     rels <- asks envRelationships
     case lookupRelationship location relId rels of
       Just target ->
-         case findAttrTextByName ns "w" "anchor" element of
+         case findAttrByName ns "w" "anchor" element of
              Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
              Nothing -> return $ ExternalHyperLink target runs
       Nothing     -> return $ ExternalHyperLink "" runs
 elemToParPart ns element
   | isElem ns "w" "hyperlink" element
-  , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+  , Just anchor <- findAttrByName ns "w" "anchor" element = do
     runs <- mapD (elemToRun ns) (elChildren element)
     return $ InternalHyperLink anchor runs
 elemToParPart ns element
   | isElem ns "w" "commentRangeStart" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element = do
       (Comments _ commentMap) <- asks envComments
       case M.lookup cmtId commentMap of
         Just cmtElem -> elemToCommentStart ns cmtElem
         Nothing      -> throwError WrongElem
 elemToParPart ns element
   | isElem ns "w" "commentRangeEnd" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element =
+  , Just cmtId <- findAttrByName ns "w" "id" element =
     return $ CommentEnd cmtId
 elemToParPart ns element
   | isElem ns "m" "oMath" element =
-    fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+    fmap PlainOMath (eitherToD $ readOMML $ showElement element)
 elemToParPart _ _ = throwError WrongElem
 
 elemToCommentStart :: NameSpaces -> Element -> D ParPart
 elemToCommentStart ns element
   | isElem ns "w" "comment" element
-  , Just cmtId <- findAttrTextByName ns "w" "id" element
-  , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
-  , cmtDate <- findAttrTextByName ns "w" "date" element = do
+  , Just cmtId <- findAttrByName ns "w" "id" element
+  , Just cmtAuthor <- findAttrByName ns "w" "author" element
+  , cmtDate <- findAttrByName ns "w" "date" element = do
       bps <- mapD (elemToBodyPart ns) (elChildren element)
       return $ CommentStart cmtId cmtAuthor cmtDate bps
 elemToCommentStart _ _ = throwError WrongElem
@@ -878,7 +864,7 @@ elemToExtent drawingElem =
     where
       wp_ns  = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
       getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
-                    >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+                    >>= findAttr (QName at Nothing Nothing) >>= safeRead
 
 
 childElemToRun :: NameSpaces -> Element -> D Run
@@ -889,7 +875,7 @@ childElemToRun ns element
   = let (title, alt) = getTitleAndAlt ns element
         a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
         drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
-                  >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+                  >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
     in
      case drawing of
        Just s -> expandDrawingId s >>=
@@ -902,7 +888,7 @@ childElemToRun ns element
   = return InlineChart
 childElemToRun ns element
   | isElem ns "w" "footnoteReference" element
-  , Just fnId <- findAttrTextByName ns "w" "id" element = do
+  , Just fnId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupFootnote fnId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -910,7 +896,7 @@ childElemToRun ns element
       Nothing  -> return $ Footnote []
 childElemToRun ns element
   | isElem ns "w" "endnoteReference" element
-  , Just enId <- findAttrTextByName ns "w" "id" element = do
+  , Just enId <- findAttrByName ns "w" "id" element = do
     notes <- asks envNotes
     case lookupEndnote enId notes of
       Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing
 getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
 getTrackedChange ns element
   | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange ns element
   | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
-  , Just cId <- findAttrTextByName ns "w" "id" element
-  , Just cAuthor <- findAttrTextByName ns "w" "author" element
-  , mcDate <- findAttrTextByName ns "w" "date" element =
+  , Just cId <- findAttrByName ns "w" "id" element
+  , Just cAuthor <- findAttrByName ns "w" "author" element
+  , mcDate <- findAttrByName ns "w" "date" element =
       Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
 getTrackedChange _ _ = Nothing
 
@@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty
   | Just pPr <- findChildByName ns "w" "pPr" element =
     let style =
           mapMaybe
-          (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+          (fmap ParaStyleId . findAttrByName ns "w" "val")
           (findChildrenByName ns "w" "pStyle" pPr)
     in ParagraphStyle
       {pStyle = mapMaybe (`M.lookup` sty) style
@@ -1012,7 +998,7 @@ elemToRunStyleD ns element
     charStyles <- asks envCharStyles
     let parentSty =
           findChildByName ns "w" "rStyle" rPr >>=
-          findAttrTextByName ns "w" "val" >>=
+          findAttrByName ns "w" "val" >>=
           flip M.lookup charStyles . CharStyleId
     return $ elemToRunStyle ns element parentSty
 elemToRunStyleD _ _ = return defaultRunStyle
@@ -1022,7 +1008,7 @@ elemToRunElem ns element
   | isElem ns "w" "t" element
     || isElem ns "w" "delText" element
     || isElem ns "m" "t" element = do
-    let str = T.pack $ strContent element
+    let str = strContent element
     font <- asks envFont
     case font of
       Nothing -> return $ TextRun str
@@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
 getSymChar ns element
   | Just s <- lowerFromPrivate <$> getCodepoint
   , Just font <- getFont =
-    case readLitChar ("\\x" ++ s) of
+    case readLitChar ("\\x" ++ T.unpack s) of
          [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
          _           -> TextRun ""
   where
     getCodepoint = findAttrByName ns "w" "char" element
-    getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
-    lowerFromPrivate ('F':xs) = '0':xs
-    lowerFromPrivate xs       = xs
+    getFont = textToFont =<< findAttrByName ns "w" "font" element
+    lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+                       | otherwise             = t
 getSymChar _ _ = TextRun ""
 
 elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1061,8 +1047,9 @@ elemToRunElems ns element
        let qualName = elemName ns "w"
        let font = do
                     fontElem <- findElement (qualName "rFonts") element
-                    textToFont . T.pack =<<
-                       foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+                    textToFont =<<
+                       foldr ((<|>) . (flip findAttr fontElem . qualName))
+                         Nothing ["ascii", "hAnsi"]
        local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
 elemToRunElems _ _ = throwError WrongElem
 
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index edade8654..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,12 +48,13 @@ import Data.Function (on)
 import Data.String (IsString(..))
 import qualified Data.Map as M
 import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
 import Data.Maybe
 import Data.Coerce
 import Text.Pandoc.Readers.Docx.Util
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 newtype CharStyleId   = CharStyleId T.Text
   deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold       :: Maybe Bool
                          , isRTL        :: Maybe Bool
                          , isForceCTL   :: Maybe Bool
                          , rVertAlign   :: Maybe VertAlign
-                         , rUnderline   :: Maybe String
+                         , rUnderline   :: Maybe Text
                          , rParentStyle :: Maybe CharStyle
                          }
                 deriving Show
@@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle
   , Just styleType <- findAttrByName ns "w" "type" element
   , styleType == cStyleType parentStyle
   , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
-                       findAttrTextByName ns "w" "val"
+                       findAttrByName ns "w" "val"
   , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
   | isElem ns "w" "style" element
   , Just styleType <- findAttrByName ns "w" "type" element
@@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle
   | otherwise = False
 
 class HasStyleId a => ElemToStyle a where
-  cStyleType  :: Maybe a -> String
+  cStyleType  :: Maybe a -> Text
   elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
 
 class FromStyleId (StyleId a) => HasStyleId a where
@@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle =
     stys -> stys ++
             concatMap (buildBasedOnList ns element . Just) stys
 
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+                      Right (x,_) -> Just x
+                      Left _      -> Nothing
 
 checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
 checkOnOff ns rPr tag
@@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing
 elemToCharStyle :: NameSpaces
                 -> Element -> Maybe CharStyle -> Maybe CharStyle
 elemToCharStyle ns element parentStyle
-  = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+  = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
               <*> getElementStyleName ns element
               <*> Just (elemToRunStyle ns element parentStyle)
 
@@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
 getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
 getHeaderLevel ns element
   | Just styleName <- getElementStyleName ns element
-  , Just n <- stringToInteger . T.unpack =<<
+  , Just n <- stringToInteger =<<
               (T.stripPrefix "heading " . T.toLower $
                 fromStyleName styleName)
   , n > 0 = Just (styleName, fromInteger n)
@@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing
 
 getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
 getElementStyleName ns el = coerce <$>
-  ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
-  <|> findAttrTextByName ns "w" "styleId" el)
+  ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+  <|> findAttrByName ns "w" "styleId" el)
 
 getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
 getNumInfo ns element = do
@@ -298,15 +301,15 @@ getNumInfo ns element = do
               findChildByName ns "w" "numPr"
       lvl = fromMaybe "0" (numPr >>=
                            findChildByName ns "w" "ilvl" >>=
-                           findAttrTextByName ns "w" "val")
+                           findAttrByName ns "w" "val")
   numId <- numPr >>=
            findChildByName ns "w" "numId" >>=
-           findAttrTextByName ns "w" "val"
+           findAttrByName ns "w" "val"
   return (numId, lvl)
 
 elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
 elemToParStyleData ns element parentStyle
-  | Just styleId <- findAttrTextByName ns "w" "styleId" element
+  | Just styleId <- findAttrByName ns "w" "styleId" element
   , Just styleName <- getElementStyleName ns element
   = Just $ ParStyle
       {
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f9c9a8e26..21df03d9e 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Docx.StyleMaps
    Copyright   : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
@@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util (
                                       , elemToNameSpaces
                                       , findChildByName
                                       , findChildrenByName
-                                      , findAttrText
                                       , findAttrByName
-                                      , findAttrTextByName
                                       ) where
 
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
 findChildByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChild (elemName ns' pref name) el
 
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
 findChildrenByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findChildren (elemName ns' pref name) el
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
 findAttrByName ns pref name el =
   let ns' = ns ++ elemToNameSpaces el
   in  findAttr (elemName ns' pref name) el
 
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 369c4f0c9..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq)
 import Control.Monad (guard, liftM, liftM2, mplus)
 import Control.Monad.Except (throwError)
 import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Map as M (Map, elems, fromList, lookup)
 import Data.Maybe (mapMaybe)
 import qualified Data.Text.Lazy as TL
@@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.Options (ReaderOptions (..))
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
 import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
 
 readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
 readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
 imageMimes :: [MimeType]
 imageMimes = ["image/gif", "image/jpeg", "image/png"]
 
-type CoverId = String
+type CoverId = Text
 
 type CoverImage = FilePath
 
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
 parseManifest content coverId = do
   manifest <- findElementE (dfName "manifest") content
   let items = findChildren (dfName "item") manifest
   r <- mapM parseItem items
   let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
-  return (cover `mplus` coverId, M.fromList r)
+  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
   where
-    findCover e = maybe False (isInfixOf "cover-image")
+    findCover e = maybe False (T.isInfixOf "cover-image")
                   (findAttr (emptyName "properties") e)
                || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
     parseItem e = do
       uid <- findAttrE (emptyName "id") e
       href <- findAttrE (emptyName "href") e
       mime <- findAttrE (emptyName "media-type") e
-      return (uid, (href, T.pack mime))
+      return (uid, (T.unpack href, mime))
 
 parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
 parseSpine is e = do
@@ -173,11 +173,11 @@ parseMeta content = do
 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
 parseMetaItem :: Element -> Meta -> Meta
 parseMetaItem e@(stripNamespace . elName -> field) meta =
-  addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+  addMetaField (renameMeta field) (B.str $ strContent e) meta
 
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
 renameMeta "creator" = "author"
-renameMeta s         = T.pack s
+renameMeta s         = s
 
 getManifest :: PandocMonad m => Archive -> m (String, Element)
 getManifest archive = do
@@ -187,7 +187,7 @@ getManifest archive = do
   ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
   as <- fmap (map attrToPair . elAttribs)
     (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
-  manifestFile <- mkE "Root not found" (lookup "full-path" as)
+  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
   let rootdir = dropFileName manifestFile
   --mime <- lookup "media-type" as
   manifest <- findEntryByPathE manifestFile archive
@@ -201,7 +201,8 @@ fixInternalReferences pathToFile =
   . walk (fixBlockIRs filename)
   . walk (fixInlineIRs filename)
   where
-    (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+    (root, T.unpack . escapeURI . T.pack -> filename) =
+      splitFileName pathToFile
 
 fixInlineIRs :: String -> Inline -> Inline
 fixInlineIRs s (Span as v) =
@@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) =
   Link (fixAttrs s as) is t
 fixInlineIRs _ v = v
 
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
 prependHash ps l@(Link attr is (url, tit))
   | or [s `T.isPrefixOf` url | s <- ps] =
     Link attr is ("#" <> url, tit)
@@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
 fixBlockIRs _ b = b
 
 fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+  (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
 
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
 addHash _ ""    = ""
 addHash s ident = T.pack (takeFileName s) <> "#" <> ident
 
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
 
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
 isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
 
 -- Library
@@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c
 
 -- Utility
 
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
 stripNamespace (QName v _ _) = v
 
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
 attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
 attrToNSPair _                              = Nothing
 
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
 attrToPair (Attr (QName name _ _) val) = (name, val)
 
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
 defaultNameSpace = Just "http://www.idpf.org/2007/opf"
 
-dfName :: String -> QName
+dfName :: Text -> QName
 dfName s = QName s defaultNameSpace Nothing
 
-emptyName :: String -> QName
+emptyName :: Text -> QName
 emptyName s = QName s Nothing Nothing
 
 -- Convert Maybe interface to Either
 
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
 findAttrE q e = mkE "findAttr" $ findAttr q e
 
 findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
 findEntryByPathE (normalise . unEscapeString -> path) a =
-  mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+  mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
 
 parseXMLDocE :: PandocMonad m => Entry -> m Element
 parseXMLDocE entry =
@@ -293,7 +295,8 @@ parseXMLDocE entry =
   fp  = T.pack $ eRelativePath entry
 
 findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+  mkE ("Unable to find element: " <> tshow e) $ findElement e x
 
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b804eab4f..66e390bd7 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,7 +25,6 @@ TODO:
 module Text.Pandoc.Readers.FB2 ( readFB2 ) where
 import Control.Monad.Except (throwError)
 import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
 import Data.ByteString.Base64.Lazy
 import Data.Functor
 import Data.List (intersperse)
@@ -42,8 +41,8 @@ import Text.Pandoc.Error
 import Text.Pandoc.Logging
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
 
 type FB2 m = StateT FB2State m
 
@@ -85,12 +84,12 @@ removeHash t = case T.uncons t of
   Just ('#', xs) -> xs
   _              -> t
 
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
 
 parseInline :: PandocMonad m => Content -> FB2 m Inlines
 parseInline (Elem e) =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -98,12 +97,12 @@ parseInline (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement name
       pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
 parseInline (CRef r) = pure $ str $ convertEntity r
 
 parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
 
 parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
 parseRootElement e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "FictionBook" -> do
       -- Parse notes before parsing the rest of the content.
       case filterChild isNotesBody e of
@@ -146,7 +145,7 @@ parseNote e =
     Just sectionId -> do
       content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
       oldNotes <- gets fb2Notes
-      modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+      modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
       pure ()
   where
     isTitle x = qName (elName x) == "title"
@@ -158,7 +157,7 @@ parseNote e =
 -- | Parse a child of @\<FictionBook>@ element.
 parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
 parseFictionBookChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "stylesheet" -> pure mempty -- stylesheet is ignored
     "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
     "body" -> if isNotesBody e
@@ -170,7 +169,7 @@ parseFictionBookChild e =
 -- | Parse a child of @\<description>@ element.
 parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
 parseDescriptionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title-info" -> mapM_ parseTitleInfoChild (elChildren e)
     "src-title-info" -> pure () -- ignore
     "document-info" -> pure ()
@@ -184,7 +183,7 @@ parseDescriptionChild e =
 -- | Parse a child of @\<body>@ element.
 parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
 parseBodyChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "image" -> parseImageElement e
     "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
     "epigraph" -> parseEpigraph e
@@ -198,7 +197,10 @@ parseBinaryElement e =
     (Nothing, _) -> report $ IgnoredElement "binary without id attribute"
     (Just _, Nothing) ->
       report $ IgnoredElement "binary without content-type attribute"
-    (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+    (Just filename, contentType) ->
+      insertMedia (T.unpack filename) contentType
+                    (decodeLenient
+                      (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
 
 -- * Type parsers
 
@@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
 
 parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
 parseAuthorChild e =
-  case T.pack $ qName $ elName e of
-    "first-name" -> pure $ Just $ T.pack $ strContent e
-    "middle-name" -> pure $ Just $ T.pack $ strContent e
-    "last-name" -> pure $ Just $ T.pack $ strContent e
-    "nickname" -> pure $ Just $ T.pack $ strContent e
-    "home-page" -> pure $ Just $ T.pack $ strContent e
-    "email" -> pure $ Just $ T.pack $ strContent e
+  case qName $ elName e of
+    "first-name" -> pure $ Just $ strContent e
+    "middle-name" -> pure $ Just $ strContent e
+    "last-name" -> pure $ Just $ strContent e
+    "nickname" -> pure $ Just $ strContent e
+    "home-page" -> pure $ Just $ strContent e
+    "email" -> pure $ Just $ strContent e
     name -> do
       report $ IgnoredElement $ name <> " in author"
       pure Nothing
@@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing
 parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
 parseImageElement e =
   case href of
-    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+    Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
     Nothing -> do
       report $ IgnoredElement " image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        title = maybe "" T.pack $ findAttr (unqual "title") e
-        imgId = maybe "" T.pack $ findAttr (unqual "id") e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        title = fromMaybe "" $ findAttr (unqual "title") e
+        imgId = fromMaybe "" $ findAttr (unqual "id") e
         href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
 
 -- | Parse @pType@
@@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
 -- | Parse @citeType@ child
 parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
 parseCiteChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "empty-line" -> pure horizontalRule
@@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
 
 parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
 parsePoemChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "epigraph" -> parseEpigraph e
     "stanza" -> parseStanza e
     "text-author" -> para <$> parsePType e
-    "date" -> pure $ para $ text $ T.pack $ strContent e
+    "date" -> pure $ para $ text $ strContent e
     name -> report (UnexpectedXmlElement name "poem") $> mempty
 
 parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -292,7 +294,7 @@ joinLineBlocks [] = []
 
 parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
 parseStanzaChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseTitle e
     "subtitle" -> parseSubtitle e
     "v" -> lineBlock . (:[]) <$> parsePType e
@@ -302,11 +304,11 @@ parseStanzaChild e =
 parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraph e =
   divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
-  where divId = maybe "" T.pack $ findAttr (unqual "id") e
+  where divId = fromMaybe "" $ findAttr (unqual "id") e
 
 parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
 parseEpigraphChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
 
 parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
 parseAnnotationChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "p" -> para <$> parsePType e
     "poem" -> parsePoem e
     "cite" -> parseCite e
@@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
 parseSection e = do
   n <- gets fb2SectionLevel
   modify $ \st -> st{ fb2SectionLevel = n + 1 }
-  let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+  let sectionId = fromMaybe "" $ findAttr (unqual "id") e
   bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
   modify $ \st -> st{ fb2SectionLevel = n }
   pure bs
 
 parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
 parseSectionChild e =
-  case T.pack $ qName $ elName e of
+  case qName $ elName e of
     "title" -> parseBodyChild e
     "epigraph" -> parseEpigraph e
     "image" -> parseImageElement e
@@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
 parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
 parseNamedStyle e = do
   content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
-  let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+  let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
   case findAttr (unqual "name") e of
-    Just name -> pure $ spanWith ("", [T.pack name], lang) content
+    Just name -> pure $ spanWith ("", [name], lang) content
     Nothing -> do
       report $ IgnoredElement "link without required name"
       pure mempty
 
 parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
 parseNamedStyleChild (Elem e) =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "strong" -> strong <$> parseStyleType e
     "emphasis" -> emph <$> parseStyleType e
     "style" -> parseNamedStyle e
@@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) =
     "strikethrough" -> strikeout <$> parseStyleType e
     "sub" -> subscript <$> parseStyleType e
     "sup" -> superscript <$> parseStyleType e
-    "code" -> pure $ code $ T.pack $ strContent e
+    "code" -> pure $ code $ strContent e
     "image" -> parseInlineImageElement e
     name -> do
       report $ IgnoredElement $ name <> " in style"
@@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
 parseLinkType e = do
   content <- mconcat <$> mapM parseStyleLinkType (elContent e)
   notes <- gets fb2Notes
-  case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+  case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
     Just href -> case findAttr (QName "type" Nothing Nothing) e of
                    Just "note" -> case M.lookup href notes of
                                     Nothing -> pure $ link href "" content
@@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
 -- | Parse @title-infoType@
 parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
 parseTitleInfoChild e =
-  case T.pack $ qName (elName e) of
+  case qName (elName e) of
     "genre" -> pure ()
     "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
-    "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+    "book-title" -> modify (setMeta "title" (text $ strContent e))
     "annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
     "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
-                                                                      $ T.pack
                                                                       $ strContent e))
-    "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+    "date" -> modify (setMeta "date" (text $ strContent e))
     "coverpage" -> parseCoverPage e
     "lang" -> pure ()
     "src-lang" -> pure ()
@@ -441,7 +442,7 @@ parseCoverPage e =
     Just img -> case href of
                   Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
                   Nothing -> pure ()
-                where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+                where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
     Nothing -> pure ()
 
 -- | Parse @inlineImageType@ element
@@ -454,5 +455,5 @@ parseInlineImageElement e =
     Nothing -> do
       report $ IgnoredElement "inline image without href"
       pure mempty
-  where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
-        href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+  where alt = maybe mempty str $ findAttr (unqual "alt") e
+        href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index dfd343b7a..5353f2001 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where
 import Control.Monad.State.Strict
 import Control.Monad.Except (throwError)
 import Text.Pandoc.Error (PandocError(..))
-import Data.Char (isDigit, isSpace, toUpper)
+import Data.Char (isDigit, isSpace)
 import Data.Default
 import Data.Generics
 import Data.List (foldl', intersperse)
@@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Options
 import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
 import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import qualified Data.Set as S (fromList, member)
 import Data.Set ((\\))
 
@@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr =
   fromMaybe "" . maybeAttrValue attr
 
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
 maybeAttrValue attr elt =
-  T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+  lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
 
 -- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
 named s e = qName (elName e) == s
 
 --
@@ -155,10 +154,10 @@ getBlocks e =  mconcat <$>
 
 parseBlock :: PandocMonad m => Content -> JATS m Blocks
 parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
                                      then return mempty
-                                     else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+                                     else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
 parseBlock (Elem e) =
   case qName (elName e) of
         "p" -> parseMixed para (elContent e)
@@ -207,7 +206,7 @@ parseBlock (Elem e) =
                                 "" -> []
                                 x  -> [x]
            return $ codeBlockWith (attrValue "id" e, classes', [])
-                  $ trimNl $ textContentRecursive e
+                  $ trimNl $ strContentRecursive e
          parseBlockquote = do
             attrib <- case filterChild (named "attribution") e of
                              Nothing  -> return mempty
@@ -271,7 +270,7 @@ parseBlock (Elem e) =
                                                 Just "center" -> AlignCenter
                                                 _             -> AlignDefault
                       let toWidth c = do
-                            w <- findAttrText (unqual "colwidth") c
+                            w <- findAttr (unqual "colwidth") c
                             n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
                             if n > 0 then Just n else Nothing
                       let numrows = foldl' max 0 $ map length bodyrows
@@ -442,16 +441,10 @@ parseRef e = do
        Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
        -- TODO handle mixed-citation
 
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
 textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
 
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
 strContentRecursive = strContent .
   (\e' -> e'{ elContent = map elementToStr $ elContent e' })
 
@@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
 elementToStr x = x
 
 parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
-  return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return . text . convertEntity $ ref
 parseInline (Elem e) =
   case qName (elName e) of
         "italic" -> innerInlines emph
@@ -507,9 +499,9 @@ parseInline (Elem e) =
                         else linkWith attr ("#" <> rid) "" ils
         "ext-link" -> do
              ils <- innerInlines id
-             let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+             let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
              let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
-                               Just h -> T.pack h
+                               Just h -> h
                                _      -> "#" <> attrValue "rid" e
              let ils' = if ils == mempty then str href else ils
              let attr = (attrValue "id" e, [], [])
@@ -529,7 +521,7 @@ parseInline (Elem e) =
    where innerInlines f = extractSpaces f . mconcat <$>
                           mapM parseInline (elContent e)
          mathML x =
-            case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+            case readMathML . showElement $ everywhere (mkT removePrefix) x of
                 Left _ -> mempty
                 Right m -> writeTeX m
          formula constructor = do
@@ -547,4 +539,4 @@ parseInline (Elem e) =
            let classes' = case attrValue "language" e of
                                "" -> []
                                l  -> [l]
-           return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+           return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index bdadc4dd9..184d5a63f 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document.
 
 module Text.Pandoc.Readers.OPML ( readOPML ) where
 import Control.Monad.State.Strict
-import Data.Char (toUpper)
 import Data.Default
 import Data.Generics
 import Data.Maybe (fromMaybe)
@@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..))
 import Text.Pandoc.Readers.HTML (readHtml)
 import Text.Pandoc.Readers.Markdown (readMarkdown)
 import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Control.Monad.Except (throwError)
 
 type OPML m = StateT OPMLState m
@@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go)
   where go :: [Content] -> [Content]
         go (Text (CData CDataRaw _ _):xs) = xs
         go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
-           Text (CData CDataText (s1 ++ s2) z):xs
+           Text (CData CDataText (s1 <> s2) z):xs
         go (Text (CData CDataText s1 z):CRef r:xs) =
-           Text (CData CDataText (s1 ++ convertEntity r) z):xs
+           Text (CData CDataText (s1 <> convertEntity r) z):xs
         go (CRef r:Text (CData CDataText s1 z):xs) =
-             Text (CData CDataText (convertEntity r ++ s1) z):xs
+             Text (CData CDataText (convertEntity r <> s1) z):xs
         go (CRef r1:CRef r2:xs) =
-             Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+             Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
         go xs = xs
 
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
 
 -- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
 attrValue attr elt =
-  maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+  fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
 
 -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
 -- exceptT = either throwError return
@@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
 parseBlock (Elem e) =
   case qName (elName e) of
         "ownerName"    -> mempty <$ modify (\st ->
-                              st{opmlDocAuthors = [text $ textContent e]})
+                              st{opmlDocAuthors = [text $ strContent e]})
         "dateModified" -> mempty <$ modify (\st ->
-                              st{opmlDocDate = text $ textContent e})
+                              st{opmlDocDate = text $ strContent e})
         "title"        -> mempty <$ modify (\st ->
-                              st{opmlDocTitle = text $ textContent e})
+                              st{opmlDocTitle = text $ strContent e})
         "outline" -> gets opmlSectionLevel >>= sect . (+1)
         "?xml"  -> return mempty
         _       -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 85308deb1..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -14,8 +14,7 @@ Entry point to the odt reader.
 module Text.Pandoc.Readers.Odt ( readOdt ) where
 
 import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
 import qualified Data.ByteString.Lazy as B
 
@@ -91,7 +90,7 @@ archiveToOdt archive = do
 
 
 --
-entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem :: Entry -> Either PandocError Element
 entryToXmlElem entry =
   case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
     Right x  -> Right x
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..df90880fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -29,14 +29,14 @@ import Control.Monad ((<=<))
 
 import qualified Data.ByteString.Lazy as B
 import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
 import qualified Data.Map as M
 import qualified Data.Text as T
 import Data.Maybe
 import Data.Semigroup (First(..), Option(..))
 
 import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import Text.Pandoc.Builder hiding (underline)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -557,7 +557,7 @@ read_plain_text =  fst ^&&& read_plain_text' >>% recover
                        >>?% mappend
     --
     extractText     :: XML.Content -> Fallible T.Text
-    extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+    extractText (XML.Text cData) = succeedWith (XML.cdData cData)
     extractText         _        = failEmpty
 
 read_text_seq :: InlineMatcher
@@ -777,14 +777,14 @@ read_frame_img =
       ""   -> returnV mempty -< ()
       src' -> do
         let exts = extensionsFromList [Ext_auto_identifiers]
-        resource   <- lookupResource                          -< src'
+        resource   <- lookupResource                          -< T.unpack src'
         _          <- updateMediaWithResource                 -< resource
         w          <- findAttrText' NsSVG "width"             -< ()
         h          <- findAttrText' NsSVG "height"            -< ()
         titleNodes <- matchChildContent' [ read_frame_title ] -< ()
         alt        <- matchChildContent [] read_plain_text    -< ()
         arr (firstMatch . uncurry4 imageWith)                 -<
-          (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+          (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
 
 read_frame_title :: InlineMatcher
 read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +804,8 @@ read_frame_mathml =
     case fold src of
       ""   -> returnV mempty -< ()
       src' -> do
-        let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+        let path = T.unpack $
+                    fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
         (_, mathml) <- lookupResource -< path
         case readMathML (UTF8.toText $ B.toStrict mathml) of
           Left _     -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
 module Text.Pandoc.Readers.Odt.Generic.Namespaces where
 
 import qualified Data.Map as M
+import Data.Text (Text)
 
 --
-type NameSpaceIRI          = String
+type NameSpaceIRI          = Text
 
 --
 type NameSpaceIRIs     nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 , reverseComposition
 , tryToRead
 , Lookupable(..)
-, readLookupables
 , readLookupable
 , readPercent
 , findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
 
 import Control.Category (Category, (<<<), (>>>))
 import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
 import qualified Data.Foldable as F (Foldable, foldr)
 import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
 
 -- | Equivalent to
 -- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
 -- (nobody wants that) while the latter returns "to much" for simple purposes.
 -- This function instead applies 'reads' and returns the first match (if any)
 -- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
 
 -- | A version of 'reads' that requires a '%' sign after the number
 readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i   , r ) <- reads s
 -- | Data that can be looked up.
 -- This is mostly a utility to read data with kind *.
 class Lookupable a where
-  lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- >   readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
-                                 a <- maybeToList (lookup word lookupTable)
-                    ]
+  lookupTable :: [(Text, a)]
 
 -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
-                 $ map ((`lookup` lookupTable).fst)
-                 $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+  lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
 
 uncurry3 :: (a->b->c                -> z) -> (a,b,c          ) -> z
 uncurry4 :: (a->b->c->d             -> z) -> (a,b,c,d        ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE TupleSections   #-}
 {-# LANGUAGE GADTs           #-}
 {-# LANGUAGE LambdaCase      #-}
@@ -60,11 +61,11 @@ import           Control.Arrow
 import           Data.Bool ( bool )
 import           Data.Either ( rights )
 import qualified Data.Map             as M
-import qualified Data.Text            as T
+import           Data.Text (Text)
 import           Data.Default
 import           Data.Maybe
 
-import qualified Text.XML.Light       as XML
+import qualified Text.Pandoc.XML.Light as XML
 
 import           Text.Pandoc.Readers.Odt.Arrows.State
 import           Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import           Text.Pandoc.Readers.Odt.Generic.Fallible
 --------------------------------------------------------------------------------
 
 --
-type ElementName           = String
-type AttributeName         = String
-type AttributeValue        = String
-type TextAttributeValue    = T.Text
+type ElementName           = Text
+type AttributeName         = Text
+type AttributeValue        = Text
+type TextAttributeValue    = Text
 
 --
-type NameSpacePrefix       = String
+type NameSpacePrefix       = Text
 
 --
 type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr     :: (NameSpaceID nsID, Lookupable a, Default a)
 lookupDefaultingAttr nsID attrName
                          = lookupAttrWithDefault nsID attrName def
 
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
 findAttr'               :: (NameSpaceID nsID)
                         => nsID -> AttributeName
                         -> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
                         =         qualifyName nsID attrName
                               &&& getCurrentElement
                           >>% XML.findAttr
-                          >>^ fmap T.pack
 
 -- | Return value as string or fail
 findAttr               :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText           :: (NameSpaceID nsID)
                        -> FallibleXMLConverter nsID extraState x TextAttributeValue
 findAttrText nsID attrName
                        = findAttr' nsID attrName
-                         >>^ fmap T.pack
                          >>> maybeToChoice
 
 -- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
                         -> XMLConverter nsID extraState x TextAttributeValue
 findAttrTextWithDefault nsID attrName deflt
                        = findAttr' nsID attrName
-                         >>^ maybe deflt T.pack
+                         >>^ fromMaybe deflt
 
 -- | Read and return value or fail
 readAttr               :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
 -- Internals
 --------------------------------------------------------------------------------
 
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
 stringToBool' val | val `elem` trueValues  = Just True
                   | val `elem` falseValues = Just False
                   | otherwise              = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Reader.Odt.Namespaces
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
 module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
                                           ) where
 
-import Data.List (isPrefixOf)
 import qualified Data.Map as M (empty, insert)
 import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Pandoc.Readers.Odt.Generic.Namespaces
 
 
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
 
 
 findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
 
 nsIDmap :: NameSpaceIRIs Namespace
 nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
                  -- Core XML (basically only for the 'id'-attribute)
                | NsXML
                  -- Fallback
-               | NsOther String
+               | NsOther Text
   deriving ( Eq, Ord, Show )
 
 -- | Not the actual iri's, but large prefixes of them - this way there are
 -- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
 nsIDs = [
   ("urn:oasis:names:tc:opendocument:xmlns:animation"        , NsAnim         ),
   ("urn:oasis:names:tc:opendocument:xmlns:chart"            , NsChart        ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..5e10f896c 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE Arrows          #-}
 {-# LANGUAGE RecordWildCards #-}
 {-# LANGUAGE TupleSections   #-}
+{-# LANGUAGE OverloadedStrings #-}
 {- |
    Module      : Text.Pandoc.Readers.Odt.StyleReader
    Copyright   : Copyright (C) 2015 Martin Linnemann
@@ -46,11 +47,13 @@ import qualified Data.Foldable as F
 import Data.List (unfoldr)
 import qualified Data.Map as M
 import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
 import qualified Data.Set as S
 
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
 
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
 
 import Text.Pandoc.Readers.Odt.Arrows.Utils
 
@@ -90,7 +93,7 @@ instance Default FontPitch where
 --
 -- Thus, we want
 
-type FontFaceName = String
+type FontFaceName = Text
 
 type FontPitches = M.Map FontFaceName FontPitch
 
@@ -151,7 +154,7 @@ findPitch =     ( lookupAttr NsStyle "font-pitch"
 -- Definitions of main data
 --------------------------------------------------------------------------------
 
-type StyleName        = String
+type StyleName        = Text
 
 -- | There are two types of styles: named styles with a style family and an
 -- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
   -- \^ simpler, but in general less efficient
 
 data ListLevelStyle = ListLevelStyle { listLevelType  :: ListLevelType
-                                     , listItemPrefix :: Maybe String
-                                     , listItemSuffix :: Maybe String
+                                     , listItemPrefix :: Maybe Text
+                                     , listItemSuffix :: Maybe Text
                                      , listItemFormat :: ListItemNumberFormat
                                      , listItemStart  :: Int
                                      }
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
   show ListLevelStyle{..} =    "<LLS|"
                             ++ show listLevelType
                             ++ "|"
-                            ++ maybeToString listItemPrefix
+                            ++ maybeToString (T.unpack <$> listItemPrefix)
                             ++ show listItemFormat
-                            ++ maybeToString listItemSuffix
+                            ++ maybeToString (T.unpack <$> listItemSuffix)
                             ++ ">"
     where maybeToString = fromMaybe ""
 
@@ -471,7 +474,7 @@ readTextProperties =
      )
   where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
         isFontBold = ("normal",False):("bold",True)
-                    :map ((,True).show) ([100,200..900]::[Int])
+                    :map ((,True) . tshow) ([100,200..900]::[Int])
 
 readUnderlineMode     :: StyleReaderSafe _x (Maybe UnderlineMode)
 readUnderlineMode     = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
 readStrikeThroughMode = readLineMode "text-line-through-mode"
                                      "text-line-through-style"
 
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
 readLineMode modeAttr styleAttr = proc x -> do
   isUL <- searchAttr  NsStyle styleAttr False isLinePresent -< x
   mode <- lookupAttr' NsStyle  modeAttr                     -< x
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index da990e4d3..89c71d773 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -31,6 +31,7 @@ import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
@@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
 import Text.TeXMath
-import Text.XML.Light as XML
-import Text.XML.Light.Cursor as XMLC
 import Text.Pandoc.Writers.OOXML
+import Text.Pandoc.XML.Light as XML
+import Data.Generics (mkT, everywhere)
 
 data ListMarker = NoMarker
                 | BulletMarker
                 | NumberMarker ListNumberStyle ListNumberDelim Int
                 deriving (Show, Read, Eq, Ord)
 
-listMarkerToId :: ListMarker -> String
+listMarkerToId :: ListMarker -> Text
 listMarkerToId NoMarker = "990"
 listMarkerToId BulletMarker = "991"
-listMarkerToId (NumberMarker sty delim n) =
+listMarkerToId (NumberMarker sty delim n) = T.pack $
   '9' : '9' : styNum : delimNum : show n
   where styNum = case sty of
                       DefaultStyle -> '2'
@@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps
                           , envListLevel      :: Int
                           , envListNumId      :: Int
                           , envInDel          :: Bool
-                          , envChangesAuthor  :: T.Text
-                          , envChangesDate    :: T.Text
+                          , envChangesAuthor  :: Text
+                          , envChangesDate    :: Text
                           , envPrintWidth     :: Integer
                           }
 
@@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty
 
 data WriterState = WriterState{
          stFootnotes      :: [Element]
-       , stComments       :: [([(T.Text, T.Text)], [Inline])]
-       , stSectionIds     :: Set.Set T.Text
-       , stExternalLinks  :: M.Map String String
+       , stComments       :: [([(Text, Text)], [Inline])]
+       , stSectionIds     :: Set.Set Text
+       , stExternalLinks  :: M.Map Text Text
        , stImages         :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString)
        , stLists          :: [ListMarker]
        , stInsId          :: Int
@@ -164,18 +165,18 @@ defaultWriterState = WriterState{
 
 type WS m = ReaderT WriterEnv (StateT WriterState m)
 
-renumIdMap :: Int -> [Element] -> M.Map String String
+renumIdMap :: Int -> [Element] -> M.Map Text Text
 renumIdMap _ [] = M.empty
 renumIdMap n (e:es)
   | Just oldId <- findAttr (QName "Id" Nothing Nothing) e =
-      M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es)
+      M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es)
   | otherwise = renumIdMap n es
 
-replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr]
+replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr]
 replaceAttr f val = map $
     \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a
 
-renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element
+renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element
 renumId f renumMap e
   | Just oldId <- findAttrBy f e
   , Just newId <- M.lookup oldId renumMap =
@@ -184,18 +185,12 @@ renumId f renumMap e
      e { elAttribs = attrs' }
   | otherwise = e
 
-renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element]
+renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element]
 renumIds f renumMap = map (renumId f renumMap)
 
-findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text
-findAttrTextBy x = fmap T.pack . findAttrBy x
-
-lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text
-lookupAttrTextBy x = fmap T.pack . lookupAttrBy x
-
 -- | Certain characters are invalid in XML even if escaped.
 -- See #1992
-stripInvalidChars :: T.Text -> T.Text
+stripInvalidChars :: Text -> Text
 stripInvalidChars = T.filter isValidChar
 
 -- | See XML reference
@@ -234,11 +229,11 @@ writeDocx opts doc = do
 
   -- Gets the template size
   let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz"))
-  let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs
+  let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs
 
   let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar"))
-  let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs
-  let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs
+  let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs
+  let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs
 
   -- Get the available area (converting the size and the margins to int and
   -- doing the difference
@@ -250,24 +245,21 @@ writeDocx opts doc = do
 
   -- styles
   mblang <- toLang $ getLang opts meta
+  -- TODO FIXME avoid this generic traversal!
+  -- lang is in w:docDefaults /  w:rPr  /  w:lang
   let addLang :: Element -> Element
-      addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $
-                                 XMLC.fromElement e) <$> mblang of
-                    Just (Elem e') -> e'
-                    _              -> e -- return original
-        where go :: String -> Cursor -> Cursor
-              go l cursor = case XMLC.findRec (isLangElt . current) cursor of
-                              Nothing -> cursor
-                              Just t  -> XMLC.modifyContent (setval l) t
-              setval :: String -> Content -> Content
-              setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $
-                                               elAttribs e' }
-              setval _ x         = x
-              setvalattr :: String -> XML.Attr -> XML.Attr
-              setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
-              setvalattr _ x                                 = x
-              isLangElt (Elem e') = qName (elName e') == "lang"
-              isLangElt _         = False
+      addLang = case mblang of
+                  Nothing -> id
+                  Just l  -> everywhere (mkT (go (renderLang l)))
+        where
+          go :: Text -> Element -> Element
+          go l e'
+            | qName (elName e') == "lang"
+                = e'{ elAttribs = map (setvalattr l) $ elAttribs e' }
+            | otherwise = e'
+
+          setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l
+          setvalattr _ x                                 = x
 
   let stylepath = "word/styles.xml"
   styledoc <- addLang <$> parseXml refArchive distArchive stylepath
@@ -337,12 +329,13 @@ writeDocx opts doc = do
 
   -- [Content_Types].xml
   let mkOverrideNode (part', contentType') = mknode "Override"
-               [("PartName",part'),("ContentType",contentType')] ()
+               [("PartName", T.pack part')
+               ,("ContentType", contentType')] ()
   let mkImageOverride (_, imgpath, mbMimeType, _) =
-          mkOverrideNode ("/word/" ++ imgpath,
-                          maybe "application/octet-stream" T.unpack mbMimeType)
+          mkOverrideNode ("/word/" <> imgpath,
+                          fromMaybe "application/octet-stream" mbMimeType)
   let mkMediaOverride imgpath =
-          mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath)
+          mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath)
   let overrides = map mkOverrideNode (
                   [("/word/webSettings.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
@@ -369,13 +362,14 @@ writeDocx opts doc = do
                   ,("/word/footnotes.xml",
                     "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml")
                   ] ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++
-                  map (\x -> (maybe "" ("/word/" ++) $ extractTarget x,
+                  map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x),
                        "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++
                     map mkImageOverride imgs ++
-                    [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive
-                                        , "word/media/" `isPrefixOf` eRelativePath e ]
+                    [ mkMediaOverride (eRelativePath e)
+                        | e <- zEntries refArchive
+                        , "word/media/" `isPrefixOf` eRelativePath e ]
 
   let defaultnodes = [mknode "Default"
               [("Extension","xml"),("ContentType","application/xml")] (),
@@ -421,7 +415,7 @@ writeDocx opts doc = do
   let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers
   let renumFooters = renumIds (\q -> qName q == "Id") idMap footers
   let baserels = baserels' ++ renumHeaders ++ renumFooters
-  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] ()
+  let toImgRel (ident,path,_,_) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] ()
   let imgrels = map toImgRel imgs
   let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
   let linkrels = map toLinkRel $ M.toList $ stExternalLinks st
@@ -489,10 +483,10 @@ writeDocx opts doc = do
   numbering <- parseXml refArchive distArchive numpath
   let newNumElts = mkNumbering (stLists st)
   let pandocAdded e =
-       case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of
+       case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of
          Just numid -> numid >= (990 :: Int)
          Nothing    ->
-           case findAttrTextBy ((== "numId") . qName) e >>= safeRead of
+           case findAttrBy ((== "numId") . qName) e >>= safeRead of
              Just numid -> numid >= (1000 :: Int)
              Nothing    -> False
   let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering)
@@ -514,7 +508,7 @@ writeDocx opts doc = do
   let extraCoreProps = ["subject","lang","category","description"]
   let extraCorePropsMap = M.fromList $ zip extraCoreProps
                        ["dc:subject","dc:language","cp:category","dc:description"]
-  let lookupMetaString' :: T.Text -> Meta -> T.Text
+  let lookupMetaString' :: Text -> Meta -> Text
       lookupMetaString' key' meta' =
         case key' of
              "description"    -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta')
@@ -530,21 +524,21 @@ writeDocx opts doc = do
           : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta))
           : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta)
             | k <- M.keys (unMeta meta), k `elem` extraCoreProps]
-          ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords)
+          ++ mknode "cp:keywords" [] (T.intercalate ", " keywords)
           : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
                    , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-                   ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+                   ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
   let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps
 
   -- docProps/custom.xml
-  let customProperties :: [(String, String)]
-      customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta)
+  let customProperties :: [(Text, Text)]
+      customProperties = [ (k, lookupMetaString k meta)
                          | k <- M.keys (unMeta meta)
                          , k `notElem` (["title", "author", "keywords"]
                                        ++ extraCoreProps)]
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
+         ,("pid", tshow pid)
          ,("name", k)] $ mknode "vt:lpwstr" [] v
   let customPropsPath = "docProps/custom.xml"
   let customProps = mknode "Properties"
@@ -594,7 +588,8 @@ writeDocx opts doc = do
   fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml"
   webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml"
   headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $
-                     mapMaybe extractTarget (headers ++ footers)
+                         mapMaybe (fmap T.unpack . extractTarget)
+                         (headers ++ footers)
   let miscRelEntries = [ e | e <- zEntries refArchive
                        , "word/_rels/" `isPrefixOf` eRelativePath e
                        , ".xml.rels" `isSuffixOf` eRelativePath e
@@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "paragraph")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyText")] ()
      , mknode "w:qFormat" [] ()
      ]
@@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) =
   let styleId = T.filter (not . isSpace) s
   in mknode "w:style" [ ("w:type", "character")
                       , ("w:customStyle", "1")
-                      , ("w:styleId", T.unpack styleId)]
-     [ mknode "w:name" [("w:val", T.unpack s)] ()
+                      , ("w:styleId", styleId)]
+     [ mknode "w:name" [("w:val", s)] ()
      , mknode "w:basedOn" [("w:val","BodyTextChar")] ()
      ]
 
@@ -643,13 +638,14 @@ styleToOpenXml sm style =
         toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing
                         | otherwise = Just $
                           mknode "w:style" [("w:type","character"),
-                           ("w:customStyle","1"),("w:styleId",show toktype)]
-                             [ mknode "w:name" [("w:val",show toktype)] ()
+                           ("w:customStyle","1"),("w:styleId", tshow toktype)]
+                             [ mknode "w:name" [("w:val", tshow toktype)] ()
                              , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
                              , mknode "w:rPr" [] $
-                               [ mknode "w:color" [("w:val",tokCol toktype)] ()
+                               [ mknode "w:color" [("w:val", tokCol toktype)] ()
                                  | tokCol toktype /= "auto" ] ++
-                               [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+                               [ mknode "w:shd" [("w:val","clear")
+                                                ,("w:fill",tokBg toktype)] ()
                                  | tokBg toktype /= "auto" ] ++
                                [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
                                [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
@@ -657,10 +653,10 @@ styleToOpenXml sm style =
                              ]
         tokStyles = tokenStyles style
         tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles
-        tokCol toktype = maybe "auto" (drop 1 . fromColor)
+        tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenColor =<< M.lookup toktype tokStyles)
                            `mplus` defaultColor style
-        tokBg toktype = maybe "auto" (drop 1 . fromColor)
+        tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor)
                          $ (tokenBackground =<< M.lookup toktype tokStyles)
                            `mplus` backgroundColor style
         parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing
@@ -673,10 +669,11 @@ styleToOpenXml sm style =
                              , mknode "w:pPr" []
                                $ mknode "w:wordWrap" [("w:val","off")] ()
                                :
-                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style)
+                         maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style)
                              ]
 
-copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry
+copyChildren :: (PandocMonad m)
+             => Archive -> Archive -> String -> Integer -> [Text] -> m Entry
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
@@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do
     }
   where
     strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p++":"++name
+      | Just p <- prefix = p <> ":" <> name
       | otherwise        = name
     shouldCopy = (`elem` elNames) . strName
     cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
@@ -706,35 +703,35 @@ maxListLevel = 8
 
 mkNum :: ListMarker -> Int -> Element
 mkNum marker numid =
-  mknode "w:num" [("w:numId",show numid)]
+  mknode "w:num" [("w:numId",tshow numid)]
    $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] ()
    : case marker of
        NoMarker     -> []
        BulletMarker -> []
        NumberMarker _ _ start ->
-          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
-              $ mknode "w:startOverride" [("w:val",show start)] ())
+          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))]
+              $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
 mkAbstractNum :: ListMarker -> Integer -> Element
 mkAbstractNum marker nsid =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
 
 mkLvl :: ListMarker -> Int -> Element
 mkLvl marker lvl =
-  mknode "w:lvl" [("w:ilvl",show lvl)] $
+  mknode "w:lvl" [("w:ilvl",tshow lvl)] $
     [ mknode "w:start" [("w:val",start)] ()
       | marker /= NoMarker && marker /= BulletMarker ] ++
     [ mknode "w:numFmt" [("w:val",fmt)] ()
-    , mknode "w:lvlText" [("w:val",lvltxt)] ()
+    , mknode "w:lvlText" [("w:val", lvltxt)] ()
     , mknode "w:lvlJc" [("w:val","left")] ()
     , mknode "w:pPr" []
-      [ mknode "w:ind" [ ("w:left",show $ lvl * step + step)
-                       , ("w:hanging",show (hang :: Int))
+      [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step)
+                       , ("w:hanging",tshow (hang :: Int))
                        ] ()
       ]
     ]
@@ -743,8 +740,8 @@ mkLvl marker lvl =
                  NoMarker             -> ("bullet"," ","1")
                  BulletMarker         -> ("bullet",bulletFor lvl,"1")
                  NumberMarker st de n -> (styleFor st lvl
-                                         ,patternFor de ("%" ++ show (lvl + 1))
-                                         ,show n)
+                                         ,patternFor de ("%" <> tshow (lvl + 1))
+                                         ,tshow n)
           step = 720
           hang = 480
           bulletFor 0 = "\x2022"  -- filled circle
@@ -767,9 +764,9 @@ mkLvl marker lvl =
           styleFor DefaultStyle 5 = "lowerRoman"
           styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6)
           styleFor _ _            = "decimal"
-          patternFor OneParen s  = s ++ ")"
-          patternFor TwoParens s = "(" ++ s ++ ")"
-          patternFor _ s         = s ++ "."
+          patternFor OneParen s  = s <> ")"
+          patternFor TwoParens s = "(" <> s <> ")"
+          patternFor _ s         = s <> "."
 
 getNumId :: (PandocMonad m) => WS m Int
 getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
@@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists
 
 makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element]
 makeTOC opts = do
-  let depth = "1-"++show (writerTOCDepth opts)
-  let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u"
+  let depth = "1-" <> tshow (writerTOCDepth opts)
+  let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u"
   tocTitle <- gets stTocTitle
   title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle])
   return
@@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do
   let toComment (kvs, ils) = do
         annotation <- inlinesToOpenXML opts ils
         return $
-          mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs]
+          mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs]
             [ mknode "w:p" [] $
               map Elem
               [ mknode "w:pPr" []
@@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element
 pStyleM styleName = do
   pStyleMap <- gets (smParaStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName pStyleMap
-  return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] ()
 
 rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element
 rStyleM styleName = do
   cStyleMap <- gets (smCharStyle . stStyleMaps)
   let sty' = getStyleIdFromName styleName cStyleMap
-  return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] ()
+  return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] ()
 
-getUniqueId :: (PandocMonad m) => WS m String
+getUniqueId :: (PandocMonad m) => WS m Text
 -- the + 20 is to ensure that there are no clashes with the rIds
 -- already in word/document.xml.rel
 getUniqueId = do
   n <- gets stCurId
   modify $ \st -> st{stCurId = n + 1}
-  return $ show n
+  return $ tshow n
 
 -- | Key for specifying user-defined docx styles.
-dynamicStyleKey :: T.Text
+dynamicStyleKey :: Text
 dynamicStyleKey = "custom-style"
 
 -- | Convert a Pandoc block element to OpenXML.
@@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst)
 blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns
 blockToOpenXML' _ b@(RawBlock format str)
   | format == Format "openxml" = return [
-        Text (CData CDataRaw (T.unpack str) Nothing)
+        Text (CData CDataRaw str Nothing)
       ]
   | otherwise                  = do
       report $ BlockNotRendered b
@@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
   let fullrow = 5000 -- 100% specified in pct
   let rowwidth = fullrow * sum widths
   let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", show (floor (textwidth * w) :: Integer))] ()
+                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
   modify $ \s -> s { stInTable = False }
   -- for compatibility with Word <= 2007, we include a val with a bitmask
@@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
      mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () :
+            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
                                ,("w:noVBand","0")
-                               ,("w:val", printf "%04x" tblLookVal)
+                               ,("w:val", T.pack $ printf "%04x" tblLookVal)
                                ] () :
-          [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] ()
+          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
           | not (null caption) ] )
       : mknode "w:tblGrid" []
         (if all (==0) widths
@@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do
   modify $ \st -> st{ stInList = oldInList }
   return $ first'' ++ rest''
 
-alignmentToString :: Alignment -> [Char]
+alignmentToString :: Alignment -> Text
 alignmentToString alignment = case alignment of
                                  AlignLeft    -> "left"
                                  AlignRight   -> "right"
@@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do
   listLevel <- asks envListLevel
   numid <- asks envListNumId
   let listPr = [mknode "w:numPr" []
-                [ mknode "w:ilvl" [("w:val",show listLevel)] ()
-                , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara]
+                [ mknode "w:ilvl" [("w:val",tshow listLevel)] ()
+                , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara]
   return $ case listPr ++ squashProps props of
                 [] -> []
                 ps -> [mknode "w:pPr" [] ps]
@@ -1185,7 +1182,7 @@ withParaPropM md p = do
   d <- md
   withParaProp d p
 
-formattedString :: PandocMonad m => T.Text -> WS m [Element]
+formattedString :: PandocMonad m => Text -> WS m [Element]
 formattedString str =
   -- properly handle soft hyphens
   case splitTextBy (=='\173') str of
@@ -1194,7 +1191,7 @@ formattedString str =
          sh <- formattedRun [mknode "w:softHyphen" [] ()]
          intercalate sh <$> mapM formattedString' ws
 
-formattedString' :: PandocMonad m => T.Text -> WS m [Element]
+formattedString' :: PandocMonad m => Text -> WS m [Element]
 formattedString' str = do
   inDel <- asks envInDel
   formattedRun [ mktnode (if inDel then "w:delText" else "w:t")
@@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
      mknode "w:r" []
      (mknode "w:t"
        [("xml:space","preserve")]
-       ("\t" :: String))] ++)
+       ("\t" :: Text))] ++)
     <$> inlinesToOpenXML opts ils
 inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
   inlinesToOpenXML opts ils
@@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
   let ident' = fromMaybe ident (lookup "id" kvs)
       kvs' = filter (("id" /=) . fst) kvs
   modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st }
-  return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ]
+  return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ]
 inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) =
   -- prefer the "id" in kvs, since that is the one produced by the docx
   -- reader.
   let ident' = fromMaybe ident (lookup "id" kvs)
   in return . map Elem $
-     [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] ()
+     [ mknode "w:commentRangeEnd" [("w:id", ident')] ()
      , mknode "w:r" []
        [ mknode "w:rPr" []
          [ mknode "w:rStyle" [("w:val", "CommentReference")] () ]
-       , mknode "w:commentReference" [("w:id", T.unpack ident')] () ]
+       , mknode "w:commentReference" [("w:id", ident')] () ]
      ]
 inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
   stylemod <- case lookup dynamicStyleKey kvs of
@@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
         defaultAuthor <- asks envChangesAuthor
         let author = fromMaybe defaultAuthor (lookup "author" kvs)
         let mdate = lookup "date" kvs
-        return $ ("w:author", T.unpack author) :
-                   maybe [] (\date -> [("w:date", T.unpack date)]) mdate
+        return $ ("w:author", author) :
+                   maybe [] (\date -> [("w:date", date)]) mdate
   insmod <- if "insertion" `elem` classes
                then do
                  changeAuthorDate <- getChangeAuthorDate
@@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                    x <- f
                    return [Elem $
                            mknode "w:ins"
-                             (("w:id", show insId) : changeAuthorDate) x]
+                             (("w:id", tshow insId) : changeAuthorDate) x]
                else return id
   delmod <- if "deletion" `elem` classes
                then do
@@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do
                  return $ \f -> local (\env->env{envInDel=True}) $ do
                    x <- f
                    return [Elem $ mknode "w:del"
-                             (("w:id", show delId) : changeAuthorDate) x]
+                             (("w:id", tshow delId) : changeAuthorDate) x]
                else return id
   contents <- insmod $ delmod $ dirmod $ stylemod $ pmod
                      $ inlinesToOpenXML opts ils
@@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) =
 inlineToOpenXML' _ LineBreak = return [Elem br]
 inlineToOpenXML' _ il@(RawInline f str)
   | f == Format "openxml" = return
-                            [Text (CData CDataRaw (T.unpack str) Nothing)]
+                            [Text (CData CDataRaw str Nothing)]
   | otherwise             = do
       report $ InlineNotRendered il
       return []
@@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do
   when (mathType == DisplayMath) setFirstPara
   res <- (lift . lift) (convertMath writeOMML mathType str)
   case res of
-       Right r -> return [Elem r]
+       Right r -> return [Elem $ fromXLElement r]
        Left il -> inlineToOpenXML' opts il
 inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst
 inlineToOpenXML' opts (Code attrs str) = do
@@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do
         mknode "w:r" []
           [ mknode "w:rPr" [] $
             maybeToList (lookup toktype tokTypesMap)
-            , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ]
+            , mknode "w:t" [("xml:space","preserve")] tok ]
   withTextPropM (rStyleM "Verbatim Char")
     $ if isNothing (writerHighlightStyle opts)
           then unhighlighted
@@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do
   let notemarker = mknode "w:r" []
                    [ mknode "w:rPr" [] footnoteStyle
                    , mknode "w:footnoteRef" [] () ]
-  let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker
+  let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker
   let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs
       insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : Space : ils) : xs
       insertNoteRef xs               = Para [notemarkerXml] : xs
@@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do
 inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   return
-    [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ]
+    [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ]
 -- external link:
 inlineToOpenXML' opts (Link _ txt (src,_)) = do
   contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt
   extlinks <- gets stExternalLinks
-  id' <- case M.lookup (T.unpack src) extlinks of
+  id' <- case M.lookup src extlinks of
             Just i   -> return i
             Nothing  -> do
-              i <- ("rId"++) `fmap` getUniqueId
+              i <- ("rId" <>) <$> getUniqueId
               modify $ \st -> st{ stExternalLinks =
-                        M.insert (T.unpack src) i extlinks }
+                        M.insert src i extlinks }
               return i
   return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ]
 inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
@@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
                                              ,("noChangeAspect","1")] ()
         nvPicPr  = mknode "pic:nvPicPr" []
                         [ mknode "pic:cNvPr"
-                            [("descr",T.unpack src),("id","0"),("name","Picture")] ()
+                            [("descr",src),("id","0"),("name","Picture")] ()
                         , cNvPicPr ]
         blipFill = mknode "pic:blipFill" []
-          [ mknode "a:blip" [("r:embed",ident)] ()
+          [ mknode "a:blip" [("r:embed",T.pack ident)] ()
           , mknode "a:stretch" [] $
               mknode "a:fillRect" [] ()
           ]
         xfrm =    mknode "a:xfrm" []
                         [ mknode "a:off" [("x","0"),("y","0")] ()
-                        , mknode "a:ext" [("cx",show xemu)
-                                         ,("cy",show yemu)] () ]
+                        , mknode "a:ext" [("cx",tshow xemu)
+                                         ,("cy",tshow yemu)] () ]
         prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                          mknode "a:avLst" [] ()
         ln =      mknode "a:ln" [("w","9525")]
@@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
         imgElt = mknode "w:r" [] $
           mknode "w:drawing" [] $
             mknode "wp:inline" []
-              [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+              [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] ()
               , mknode "wp:effectExtent"
                 [("b","0"),("l","0"),("r","0"),("t","0")] ()
               , mknode "wp:docPr"
-                [ ("descr", T.unpack $ stringify alt)
-                , ("title", T.unpack title)
+                [ ("descr", stringify alt)
+                , ("title", title)
                 , ("id","1")
                 , ("name","Picture")
                 ] ()
@@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
     Just imgData -> return [Elem $ generateImgElt imgData]
     Nothing -> ( do --try
       (img, mt) <- P.fetchItem src
-      ident <- ("rId"++) `fmap` getUniqueId
+      ident <- ("rId" <>) <$> getUniqueId
 
       let
         imgext = case mt >>= extensionFromMimeType of
@@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do
             Just Svg  -> ".svg"
             Just Emf  -> ".emf"
             Nothing   -> ""
-        imgpath = "media/" <> ident <> T.unpack imgext
-        mbMimeType = mt <|> getMimeType imgpath
+        imgpath = "media/" <> ident <> imgext
+        mbMimeType = mt <|> getMimeType (T.unpack imgpath)
 
-        imgData = (ident, imgpath, mbMimeType, img)
+        imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img)
 
       if T.null imgext
          then -- without an extension there is no rule for content type
@@ -1538,20 +1535,20 @@ withDirection x = do
                                     , envTextProperties = EnvProps textStyle textProps'
                                     }
 
-wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content]
+wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content]
 wrapBookmark "" contents = return contents
 wrapBookmark ident contents = do
   id' <- getUniqueId
   let bookmarkStart = mknode "w:bookmarkStart"
                        [("w:id", id')
-                       ,("w:name", T.unpack $ toBookmarkName ident)] ()
+                       ,("w:name", toBookmarkName ident)] ()
       bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
   return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd]
 
 -- Word imposes a 40 character limit on bookmark names and requires
 -- that they begin with a letter.  So we just use a hash of the
 -- identifier when otherwise we'd have an illegal bookmark name.
-toBookmarkName :: T.Text -> T.Text
+toBookmarkName :: Text -> Text
 toBookmarkName s
   | Just (c, _) <- T.uncons s
   , isLetter c
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 171ffe582..3f10cb437 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
                                    gets, lift, modify)
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Char8 as B8
-import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
+import Data.Char (isAlphaNum, isAscii, isDigit)
 import Data.List (isInfixOf, isPrefixOf)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
 import qualified Data.Set as Set
-import qualified Data.Text as TS
+import qualified Data.Text as T
+import Data.Text (Text)
 import qualified Data.Text.Lazy as TL
 import Network.HTTP (urlEncode)
 import System.FilePath (takeExtension, takeFileName, makeRelative)
@@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
                             ObfuscationMethod (NoObfuscation), WrapOption (..),
                             WriterOptions (..))
 import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags',
-                           safeRead, stringify, trim, uniqueIdent, tshow)
+                           stringify, uniqueIdent, tshow)
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.UUID (getRandomUUID)
 import Text.Pandoc.Walk (query, walk, walkM)
 import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
 import Text.Printf (printf)
-import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
-                       add_attrs, lookupAttr, node, onlyElems,
-                       ppElement, showElement, strContent, unode, unqual)
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
 import Text.Pandoc.XML (escapeStringForXML)
 import Text.DocTemplates (FromContext(lookupContext), Context(..),
                           ToContext(toVal), Val(..))
@@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block]
 data EPUBState = EPUBState {
         stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
       , stMediaNextId :: Int
-      , stEpubSubdir  :: String
+      , stEpubSubdir  :: FilePath
       }
 
 type E m = StateT EPUBState m
@@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{
     epubIdentifier          :: [Identifier]
   , epubTitle               :: [Title]
   , epubDate                :: [Date]
-  , epubLanguage            :: String
+  , epubLanguage            :: Text
   , epubCreator             :: [Creator]
   , epubContributor         :: [Creator]
-  , epubSubject             :: [String]
-  , epubDescription         :: Maybe String
-  , epubType                :: Maybe String
-  , epubFormat              :: Maybe String
-  , epubPublisher           :: Maybe String
-  , epubSource              :: Maybe String
-  , epubRelation            :: Maybe String
-  , epubCoverage            :: Maybe String
-  , epubRights              :: Maybe String
-  , epubBelongsToCollection :: Maybe String
-  , epubGroupPosition       :: Maybe String
-  , epubCoverImage          :: Maybe String
+  , epubSubject             :: [Text]
+  , epubDescription         :: Maybe Text
+  , epubType                :: Maybe Text
+  , epubFormat              :: Maybe Text
+  , epubPublisher           :: Maybe Text
+  , epubSource              :: Maybe Text
+  , epubRelation            :: Maybe Text
+  , epubCoverage            :: Maybe Text
+  , epubRights              :: Maybe Text
+  , epubBelongsToCollection :: Maybe Text
+  , epubGroupPosition       :: Maybe Text
+  , epubCoverImage          :: Maybe FilePath
   , epubStylesheets         :: [FilePath]
   , epubPageDirection       :: Maybe ProgressionDirection
-  , epubIbooksFields        :: [(String, String)]
-  , epubCalibreFields       :: [(String, String)]
+  , epubIbooksFields        :: [(Text, Text)]
+  , epubCalibreFields       :: [(Text, Text)]
   } deriving Show
 
 data Date = Date{
-    dateText  :: String
-  , dateEvent :: Maybe String
+    dateText  :: Text
+  , dateEvent :: Maybe Text
   } deriving Show
 
 data Creator = Creator{
-    creatorText   :: String
-  , creatorRole   :: Maybe String
-  , creatorFileAs :: Maybe String
+    creatorText   :: Text
+  , creatorRole   :: Maybe Text
+  , creatorFileAs :: Maybe Text
   } deriving Show
 
 data Identifier = Identifier{
-    identifierText   :: String
-  , identifierScheme :: Maybe String
+    identifierText   :: Text
+  , identifierScheme :: Maybe Text
   } deriving Show
 
 data Title = Title{
-    titleText   :: String
-  , titleFileAs :: Maybe String
-  , titleType   :: Maybe String
+    titleText   :: Text
+  , titleFileAs :: Maybe Text
+  , titleType   :: Maybe Text
   } deriving Show
 
 data ProgressionDirection = LTR | RTL deriving Show
 
-dcName :: String -> QName
+dcName :: Text -> QName
 dcName n = QName n Nothing (Just "dc")
 
-dcNode :: Node t => String -> t -> Element
+dcNode :: Node t => Text -> t -> Element
 dcNode = node . dcName
 
-opfName :: String -> QName
+opfName :: Text -> QName
 opfName n = QName n Nothing (Just "opf")
 
-toId :: FilePath -> String
-toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
+toId :: FilePath -> Text
+toId = T.pack .
+       map (\x -> if isAlphaNum x || x == '-' || x == '_'
                      then x
                      else '_') . takeFileName
 
@@ -141,8 +140,8 @@ removeNote :: Inline -> Inline
 removeNote (Note _) = Str ""
 removeNote x        = x
 
-toVal' :: String -> Val TS.Text
-toVal' = toVal . TS.pack
+toVal' :: Text -> Val T.Text
+toVal' = toVal
 
 mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
 mkEntry path content = do
@@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do
        if null (epubIdentifier m)
           then do
             randomId <- getRandomUUID
-            return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] }
+            return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] }
           else return m
   let addLanguage m =
-       if null (epubLanguage m)
+       if T.null (epubLanguage m)
           then case lookupContext "lang" (writerVariables opts) of
-                     Just x  -> return m{ epubLanguage = TS.unpack x }
+                     Just x  -> return m{ epubLanguage = x }
                      Nothing -> do
                        mLang <- lift $ P.lookupEnv "LANG"
                        let localeLang =
                              case mLang of
                                Just lang ->
-                                 TS.map (\c -> if c == '_' then '-' else c) $
-                                 TS.takeWhile (/='.') lang
+                                 T.map (\c -> if c == '_' then '-' else c) $
+                                 T.takeWhile (/='.') lang
                                Nothing -> "en-US"
-                       return m{ epubLanguage = TS.unpack localeLang }
+                       return m{ epubLanguage = localeLang }
           else return m
   let fixDate m =
        if null (epubDate m)
@@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do
           then return m
           else do
             let authors' = map stringify $ docAuthors meta
-            let toAuthor name = Creator{ creatorText = TS.unpack name
+            let toAuthor name = Creator{ creatorText = name
                                        , creatorRole = Just "aut"
                                        , creatorFileAs = Nothing }
             return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
@@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
   where getAttr n = lookupAttr (opfName n) attrs
 addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
   case getAttr "property" of
-       Just s | "ibooks:" `isPrefixOf` s ->
-                md{ epubIbooksFields = (drop 7 s, strContent e) :
+       Just s | "ibooks:" `T.isPrefixOf` s ->
+                md{ epubIbooksFields = (T.drop 7 s, strContent e) :
                        epubIbooksFields md }
        _ -> case getAttr "name" of
-                 Just s | "calibre:" `isPrefixOf` s ->
+                 Just s | "calibre:" `T.isPrefixOf` s ->
                    md{ epubCalibreFields =
-                         (drop 8 s, fromMaybe "" $ getAttr "content") :
+                         (T.drop 8 s, fromMaybe "" $ getAttr "content") :
                           epubCalibreFields md }
                  _ -> md
   where getAttr n = lookupAttr (unqual n) attrs
 addMetadataFromXML _ md = md
 
-metaValueToString :: MetaValue -> String
-metaValueToString (MetaString s)    = TS.unpack s
-metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils
-metaValueToString (MetaBlocks bs)   = TS.unpack $ stringify bs
+metaValueToString :: MetaValue -> Text
+metaValueToString (MetaString s)    = s
+metaValueToString (MetaInlines ils) = stringify ils
+metaValueToString (MetaBlocks bs)   = stringify bs
 metaValueToString (MetaBool True)   = "true"
 metaValueToString (MetaBool False)  = "false"
 metaValueToString _                 = ""
 
 metaValueToPaths :: MetaValue -> [FilePath]
-metaValueToPaths (MetaList xs) = map metaValueToString xs
-metaValueToPaths x             = [metaValueToString x]
+metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs
+metaValueToPaths x             = [T.unpack $ metaValueToString x]
 
-getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a]
+getList :: T.Text -> Meta -> (MetaValue -> a) -> [a]
 getList s meta handleMetaValue =
   case lookupMeta s meta of
        Just (MetaList xs) -> map handleMetaValue xs
@@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue
                 , titleType = metaValueToString <$> M.lookup "type" m }
         handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
 
-getCreator :: TS.Text -> Meta -> [Creator]
+getCreator :: T.Text -> Meta -> [Creator]
 getCreator s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
@@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue
                   , creatorRole = metaValueToString <$> M.lookup "role" m }
         handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
 
-getDate :: TS.Text -> Meta -> [Date]
+getDate :: T.Text -> Meta -> [Date]
 getDate s meta = getList s meta handleMetaValue
   where handleMetaValue (MetaMap m) =
            Date{ dateText = fromMaybe "" $
@@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue
         handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
                                   , dateEvent = Nothing }
 
-simpleList :: TS.Text -> Meta -> [String]
+simpleList :: T.Text -> Meta -> [Text]
 simpleList s meta =
   case lookupMeta s meta of
        Just (MetaList xs) -> map metaValueToString xs
@@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{
     , epubCoverage             = coverage
     , epubRights               = rights
     , epubBelongsToCollection  = belongsToCollection
-    , epubGroupPosition        = groupPosition 
+    , epubGroupPosition        = groupPosition
     , epubCoverImage           = coverImage
     , epubStylesheets          = stylesheets
     , epubPageDirection        = pageDirection
@@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{
         coverage = metaValueToString <$> lookupMeta "coverage" meta
         rights = metaValueToString <$> lookupMeta "rights" meta
         belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta
-        groupPosition = metaValueToString <$> lookupMeta "group-position" meta       
-        coverImage =
-            (TS.unpack <$> lookupContext "epub-cover-image"
-                              (writerVariables opts))
+        groupPosition = metaValueToString <$> lookupMeta "group-position" meta
+        coverImage = T.unpack <$>
+            lookupContext "epub-cover-image" (writerVariables opts)
             `mplus` (metaValueToString <$> lookupMeta "cover-image" meta)
         mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta
         stylesheets = maybe [] metaValueToPaths mCss ++
                       case lookupContext "css" (writerVariables opts) of
-                         Just xs -> map TS.unpack xs
+                         Just xs -> map T.unpack xs
                          Nothing ->
                            case lookupContext "css" (writerVariables opts) of
-                             Just x  -> [TS.unpack x]
+                             Just x  -> [T.unpack x]
                              Nothing -> []
-        pageDirection = case map toLower . metaValueToString <$>
+        pageDirection = case T.toLower . metaValueToString <$>
                              lookupMeta "page-progression-direction" meta of
                               Just "ltr" -> Just LTR
                               Just "rtl" -> Just RTL
                               _          -> Nothing
         ibooksFields = case lookupMeta "ibooks" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
         calibreFields = case lookupMeta "calibre" meta of
                             Just (MetaMap mp)
-                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
+                               -> M.toList $ M.map metaValueToString mp
                             _  -> []
 
 -- | Produce an EPUB2 file from a Pandoc document.
@@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m
 writeEPUB epubVersion opts doc = do
   let epubSubdir = writerEpubSubdirectory opts
   -- sanity check on epubSubdir
-  unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
+  unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
     throwError $ PandocEpubSubdirectoryError epubSubdir
-  let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir }
+  let initState = EPUBState { stMediaPaths = []
+                            , stMediaNextId = 0
+                            , stEpubSubdir = T.unpack epubSubdir }
   evalStateT (pandocToEPUB epubVersion opts doc) initState
 
 pandocToEPUB :: PandocMonad m
@@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do
                         [] -> case epubTitle metadata of
                                    []    -> "UNTITLED"
                                    (x:_) -> titleText x
-                        x  -> TS.unpack $ stringify x
+                        x  -> stringify x
 
   -- stylesheet
   stylesheets <- case epubStylesheets metadata of
@@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do
                            (ListVal $ map
                              (\e -> toVal' $
                                 (if useprefix then "../" else "") <>
-                                makeRelative epubSubdir (eRelativePath e))
+                                T.pack
+                                 (makeRelative epubSubdir (eRelativePath e)))
                              stylesheetEntries)
                              mempty
 
@@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do
                              case imageSize opts' (B.toStrict imgContent) of
                                Right sz  -> return $ sizeInPixels sz
                                Left err' -> (0, 0) <$ report
-                                 (CouldNotDetermineImageSize (TS.pack img) err')
+                                 (CouldNotDetermineImageSize (T.pack img) err')
                        cpContent <- lift $ writeHtml
                             opts'{ writerVariables =
                                    Context (M.fromList [
                                     ("coverpage", toVal' "true"),
                                     ("pagetitle", toVal $
-                                      escapeStringForXML $ TS.pack plainTitle),
-                                    ("cover-image", toVal' coverImageName),
+                                      escapeStringForXML plainTitle),
+                                    ("cover-image",
+                                       toVal' $ T.pack coverImageName),
                                     ("cover-image-width", toVal' $
-                                       show coverImageWidth),
+                                       tshow coverImageWidth),
                                     ("cover-image-height", toVal' $
-                                       show coverImageHeight)]) <>
+                                       tshow coverImageHeight)]) <>
                                      cssvars True <> vars }
                             (Pandoc meta [])
                        coverEntry <- mkEntry "text/cover.xhtml" cpContent
@@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do
                                         ("titlepage", toVal' "true"),
                                         ("body-type",  toVal' "frontmatter"),
                                         ("pagetitle", toVal $
-                                            escapeStringForXML $ TS.pack plainTitle)])
+                                            escapeStringForXML plainTitle)])
                                       <> cssvars True <> vars }
                                (Pandoc meta [])
   tpEntry <- mkEntry "text/title_page.xhtml" tpContent
@@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do
   let matchingGlob f = do
         xs <- lift $ P.glob f
         when (null xs) $
-          report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files"
+          report $ CouldNotFetchResource (T.pack f) "glob did not match any font files"
         return xs
   let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
                         lift (P.readFileLazy f)
@@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do
 
   let chapters' = secsToChapters secs
 
-  let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
+  let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)]
       extractLinkURL' num (Span (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Link (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (Image (ident, _, _) _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL' num (RawInline fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL' _ _ = []
 
-  let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
+  let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)]
       extractLinkURL num (Div (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Header _ (ident, _, _) _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
-        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
+        | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)]
       extractLinkURL num (RawBlock fmt raw)
         | isHtmlFormat fmt
         = foldr (\tag ->
@@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do
                      TagOpen{} ->
                        case fromAttrib "id" tag of
                          "" -> id
-                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
+                         x  -> ((x, showChapter num <> "#" <> x):)
                      _ -> id)
             [] (parseTags raw)
       extractLinkURL num b = query (extractLinkURL' num) b
@@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do
 
   let fixInternalReferences :: Inline -> Inline
       fixInternalReferences (Link attr lab (src, tit))
-        | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of
+        | Just ('#', xs) <- T.uncons src = case lookup xs reftable of
              Just ys -> Link attr lab (ys, tit)
              Nothing -> Link attr lab (src, tit)
       fixInternalReferences x = x
@@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do
                  chapters'
 
   let chapToEntry num (Chapter bs) =
-        mkEntry ("text/" ++ showChapter num) =<<
+        mkEntry ("text/" ++ T.unpack (showChapter num)) =<<
         writeHtml opts'{ writerVariables =
                             Context (M.fromList
                                      [("body-type", toVal' bodyType),
@@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do
   let chapterNode ent = unode "item" !
                            ([("id", toId $ makeRelative epubSubdir
                                          $ eRelativePath ent),
-                             ("href", makeRelative epubSubdir
+                             ("href", T.pack $ makeRelative epubSubdir
                                       $ eRelativePath ent),
                              ("media-type", "application/xhtml+xml")]
                             ++ case props ent of
                                     [] -> []
-                                    xs -> [("properties", unwords xs)])
+                                    xs -> [("properties", T.unwords xs)])
                         $ ()
 
   let chapterRefNode ent = unode "itemref" !
@@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do
   let pictureNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
                             ("media-type",
-                               maybe "application/octet-stream" TS.unpack
+                               fromMaybe "application/octet-stream"
                                $ mediaTypeOf $ eRelativePath ent)] $ ()
   let fontNode ent = unode "item" !
                            [("id", toId $ makeRelative epubSubdir
                                         $ eRelativePath ent),
-                            ("href", makeRelative epubSubdir
+                            ("href", T.pack $ makeRelative epubSubdir
                                      $ eRelativePath ent),
-                            ("media-type", maybe "" TS.unpack $
+                            ("media-type", fromMaybe "" $
                                   getMimeType $ eRelativePath ent)] $ ()
 
   let tocTitle = maybe plainTitle
@@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do
             (x:_) -> return $ identifierText x  -- use first identifier as UUID
             []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
   currentTime <- lift P.getTimestamp
-  let contentsData = UTF8.fromStringLazy $ ppTopElement $
+  let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $
         unode "package" !
           ([("version", case version of
                              EPUB2 -> "2.0"
@@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do
                                ,("media-type","application/xhtml+xml")] ++
                                [("properties","nav") | epub3 ]) $ ()
              ] ++
-             [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp)
+             [ unode "item" ! [("id","stylesheet" <> tshow n)
+                              , ("href", T.pack fp)
                               ,("media-type","text/css")] $ () |
                              (n :: Int, fp) <- zip [1..] (map
                                (makeRelative epubSubdir . eRelativePath)
@@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do
   let tocLevel = writerTOCDepth opts
 
   let navPointNode :: PandocMonad m
-                   => (Int -> [Inline] -> TS.Text -> [Element] -> Element)
+                   => (Int -> [Inline] -> T.Text -> [Element] -> Element)
                    -> Block -> StateT Int m [Element]
       navPointNode formatter (Div (ident,_,_)
                                 (Header lvl (_,_,kvs) ils : children)) =
@@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do
              n <- get
              modify (+1)
              let num = fromMaybe "" $ lookup "number" kvs
-             let tit = if writerNumberSections opts && not (TS.null num)
+             let tit = if writerNumberSections opts && not (T.null num)
                           then Span ("", ["section-header-number"], [])
                                 [Str num] : Space : ils
                           else ils
@@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do
         concat <$> mapM (navPointNode formatter) bs
       navPointNode _ _ = return []
 
-  let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navMapFormatter n tit src subs = unode "navPoint" !
-               [("id", "navPoint-" ++ show n)] $
-                  [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit
-                  , unode "content" ! [("src", "text/" <> TS.unpack src)] $ ()
+               [("id", "navPoint-" <> tshow n)] $
+                  [ unode "navLabel" $ unode "text" $ stringify tit
+                  , unode "content" ! [("src", "text/" <> src)] $ ()
                   ] ++ subs
 
   let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
-                  [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta)
+                  [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta)
                   , unode "content" ! [("src", "text/title_page.xhtml")]
                   $ () ]
 
   navMap <- lift $ evalStateT
              (concat <$> mapM (navPointNode navMapFormatter) secs) 1
-  let tocData = UTF8.fromStringLazy $ ppTopElement $
+  let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "ncx" ! [("version","2005-1")
                        ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
           [ unode "head" $
@@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do
           ]
   tocEntry <- mkEntry "toc.ncx" tocData
 
-  let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
+  let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element
       navXhtmlFormatter n tit src subs = unode "li" !
-                                       [("id", "toc-li-" ++ show n)] $
+                                       [("id", "toc-li-" <> tshow n)] $
                                             (unode "a" !
-                                                [("href", "text/" <> TS.unpack src)]
+                                                [("href", "text/" <> src)]
                                              $ titElements)
                                             : case subs of
                                                  []    -> []
@@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do
                                      , writerVariables =
                                          Context (M.fromList
                                            [("pagetitle", toVal $
-                                             escapeStringForXML $ TS.pack plainTitle)])
+                                             escapeStringForXML plainTitle)])
                                        <> writerVariables opts}
                                  (Pandoc nullMeta
                                    [Plain $ walk clean tit])) of
@@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do
   tocBlocks <- lift $ evalStateT
                  (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
   let navBlocks = [RawBlock (Format "html")
-                  $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces
+                  $ showElement $ -- prettyprinting introduces bad spaces
                    unode navtag ! ([("epub:type","toc") | epub3] ++
                                    [("id","toc")]) $
                     [ unode "h1" ! [("id","toc-title")] $ tocTitle
@@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do
                                 [ unode "a" ! [("href",
                                                   "text/title_page.xhtml")
                                                ,("epub:type", "titlepage")] $
-                                  ("Title Page" :: String) ] :
+                                  ("Title Page" :: Text) ] :
                               [ unode "li"
                                 [ unode "a" ! [("href", "text/cover.xhtml")
                                               ,("epub:type", "cover")] $
-                                  ("Cover" :: String)] |
+                                  ("Cover" :: Text)] |
                                   isJust (epubCoverImage metadata)
                               ] ++
                               [ unode "li"
                                 [ unode "a" ! [("href", "#toc")
                                               ,("epub:type", "toc")] $
-                                    ("Table of Contents" :: String)
+                                    ("Table of Contents" :: Text)
                                 ] | writerTableOfContents opts
                               ]
                          else []
-  let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $
+  let landmarks = [RawBlock (Format "html") $ ppElement $
                     unode "nav" ! [("epub:type","landmarks")
                                   ,("id","landmarks")
                                   ,("hidden","hidden")] $
@@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do
                         UTF8.fromStringLazy "application/epub+zip"
 
   -- container.xml
-  let containerData = UTF8.fromStringLazy $ ppTopElement $
+  let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $
        unode "container" ! [("version","1.0")
               ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
          unode "rootfiles" $
            unode "rootfile" ! [("full-path",
                     (if null epubSubdir
                         then ""
-                        else epubSubdir ++ "/") ++ "content.opf")
+                        else T.pack epubSubdir <> "/") <> "content.opf")
                ,("media-type","application/oebps-package+xml")] $ ()
   containerEntry <- mkEntry "META-INF/container.xml" containerData
 
   -- com.apple.ibooks.display-options.xml
-  let apple = UTF8.fromStringLazy $ ppTopElement $
+  let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $
         unode "display_options" $
           unode "platform" ! [("name","*")] $
-            unode "option" ! [("name","specified-fonts")] $ ("true" :: String)
+            unode "option" ! [("name","specified-fonts")] $ ("true" :: Text)
   appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
 
   -- construct archive
@@ -947,7 +950,8 @@ metadataElement version md currentTime =
                   ++ publisherNodes ++ sourceNodes ++ relationNodes
                   ++ coverageNodes ++ rightsNodes ++ coverImageNodes
                   ++ modifiedNodes ++ belongsToCollectionNodes
-        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
+        withIds base f = concat . zipWith f (map (\x -> base <>
+                                                        T.cons '-' (tshow x))
                          ([1..] :: [Int]))
         identifierNodes = withIds "epub-id" toIdentifierNode $
                           epubIdentifier md
@@ -961,9 +965,9 @@ metadataElement version md currentTime =
                                  (x:_) -> [dcNode "date" ! [("id","epub-date")]
                                             $ dateText x]
         ibooksNodes = map ibooksNode (epubIbooksFields md)
-        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
+        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v
         calibreNodes = map calibreNode (epubCalibreFields md)
-        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k),
+        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k),
                                              ("content", v)] $ ()
         languageNodes = [dcTag "language" $ epubLanguage md]
         creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
@@ -989,12 +993,12 @@ metadataElement version md currentTime =
             maybe []
                 (\belongsToCollection -> (unode "meta" !  [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection )
                 :
-                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ])
+                [unode "meta" !  [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ])
                 (epubBelongsToCollection md)++
             maybe []
                 (\groupPosition -> [unode "meta" !  [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ])
                 (epubGroupPosition md)
-        dcTag n s = unode ("dc:" ++ n) s
+        dcTag n s = unode ("dc:" <> n) s
         dcTag' n s = [dcTag n s]
         toIdentifierNode id' (Identifier txt scheme)
           | version == EPUB2 = [dcNode "identifier" !
@@ -1002,7 +1006,7 @@ metadataElement version md currentTime =
               txt]
           | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) :
               maybe [] ((\x -> [unode "meta" !
-                                [ ("refines",'#':id')
+                                [ ("refines","#" <> id')
                                 , ("property","identifier-type")
                                 , ("scheme","onix:codelist5")
                                 ]
@@ -1018,10 +1022,10 @@ metadataElement version md currentTime =
                (creatorRole creator >>= toRelator)) $ creatorText creator]
           | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (creatorFileAs creator) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","role"),
+                   [("refines","#" <> id'),("property","role"),
                      ("scheme","marc:relators")] $ x])
                    (creatorRole creator >>= toRelator)
         toTitleNode id' title
@@ -1033,16 +1037,16 @@ metadataElement version md currentTime =
           | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
               ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","file-as")] $ x])
+                   [("refines","#" <> id'),("property","file-as")] $ x])
                    (titleFileAs title) ++
               maybe [] (\x -> [unode "meta" !
-                   [("refines",'#':id'),("property","title-type")] $ x])
+                   [("refines","#" <> id'),("property","title-type")] $ x])
                    (titleType title)
         toDateNode id' date = [dcNode "date" !
              (("id",id') :
                 maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
                  dateText date]
-        schemeToOnix :: String -> String
+        schemeToOnix :: Text -> Text
         schemeToOnix "ISBN-10"              = "02"
         schemeToOnix "GTIN-13"              = "03"
         schemeToOnix "UPC"                  = "04"
@@ -1060,48 +1064,48 @@ metadataElement version md currentTime =
         schemeToOnix "OLCC"                 = "28"
         schemeToOnix _                      = "01"
 
-showDateTimeISO8601 :: UTCTime -> String
-showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
+showDateTimeISO8601 :: UTCTime -> Text
+showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ"
 
 transformTag :: PandocMonad m
-             => Tag TS.Text
-             -> E m (Tag TS.Text)
+             => Tag T.Text
+             -> E m (Tag T.Text)
 transformTag tag@(TagOpen name attr)
   | name `elem` ["video", "source", "img", "audio"] &&
     isNothing (lookup "data-external" attr) = do
   let src = fromAttrib "src" tag
   let poster = fromAttrib "poster" tag
-  newsrc <- modifyMediaRef $ TS.unpack src
-  newposter <- modifyMediaRef $ TS.unpack poster
+  newsrc <- modifyMediaRef $ T.unpack src
+  newposter <- modifyMediaRef $ T.unpack poster
   let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
-              [("src", "../" <> newsrc) | not (TS.null newsrc)] ++
-              [("poster", "../" <> newposter) | not (TS.null newposter)]
+              [("src", "../" <> newsrc) | not (T.null newsrc)] ++
+              [("poster", "../" <> newposter) | not (T.null newposter)]
   return $ TagOpen name attr'
 transformTag tag = return tag
 
 modifyMediaRef :: PandocMonad m
                => FilePath
-               -> E m TS.Text
+               -> E m T.Text
 modifyMediaRef "" = return ""
 modifyMediaRef oldsrc = do
   media <- gets stMediaPaths
   case lookup oldsrc media of
-         Just (n,_) -> return $ TS.pack n
+         Just (n,_) -> return $ T.pack n
          Nothing    -> catchError
-           (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc
-               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack
+           (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc
+               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack
                          (("." <>) <$> (mbMime >>= extensionFromMimeType))
                newName <- getMediaNextNewName ext
                let newPath = "media/" ++ newName
                entry <- mkEntry newPath (B.fromChunks . (:[]) $ img)
                modify $ \st -> st{ stMediaPaths =
                             (oldsrc, (newPath, Just entry)):media}
-               return $ TS.pack newPath)
+               return $ T.pack newPath)
            (\e -> do
-                report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e)
-                return $ TS.pack oldsrc)
+                report $ CouldNotFetchResource (T.pack oldsrc) (tshow e)
+                return $ T.pack oldsrc)
 
-getMediaNextNewName :: PandocMonad m => String -> E m String
+getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath
 getMediaNextNewName ext = do
   nextId <- gets stMediaNextId
   modify $ \st -> st { stMediaNextId = nextId + 1 }
@@ -1128,11 +1132,11 @@ transformInline  :: PandocMonad m
                  -> Inline
                  -> E m Inline
 transformInline _opts (Image attr lab (src,tit)) = do
-    newsrc <- modifyMediaRef $ TS.unpack src
+    newsrc <- modifyMediaRef $ T.unpack src
     return $ Image attr lab ("../" <> newsrc, tit)
 transformInline opts x@(Math t m)
   | WebTeX url <- writerHTMLMathMethod opts = do
-    newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m))
+    newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m))
     let mathclass = if t == DisplayMath then "display" else "inline"
     return $ Span ("",["math",mathclass],[])
                 [Image nullAttr [x] ("../" <> newsrc, "")]
@@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw)
   return $ RawInline fmt (renderTags' tags')
 transformInline _ x = return x
 
-(!) :: (t -> Element) -> [(String, String)] -> t -> Element
+(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element
 (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
 
--- | Version of 'ppTopElement' that specifies UTF-8 encoding.
-ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
-  -- unEntity removes numeric  entities introduced by ppElement
-  -- (kindlegen seems to choke on these).
-  where unEntity [] = ""
-        unEntity ('&':'#':xs) =
-                   let (ds,ys) = break (==';') xs
-                       rest = drop 1 ys
-                   in  case safeRead (TS.pack $ "'\\" <> ds <> "'") of
-                          Just x  -> x : unEntity rest
-                          Nothing -> '&':'#':unEntity xs
-        unEntity (x:xs) = x : unEntity xs
-
 mediaTypeOf :: FilePath -> Maybe MimeType
 mediaTypeOf x =
   let mediaPrefixes = ["image", "video", "audio"] in
   case getMimeType x of
-    Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y
+    Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y
     _      -> Nothing
 
 -- Returns filename for chapter number.
-showChapter :: Int -> String
-showChapter = printf "ch%03d.xhtml"
+showChapter :: Int -> Text
+showChapter = T.pack . printf "ch%03d.xhtml"
 
 -- Add identifiers to any headers without them.
 addIdentifiers :: WriterOptions -> [Block] -> [Block]
 addIdentifiers opts bs = evalState (mapM go bs) Set.empty
  where go (Header n (ident,classes,kvs) ils) = do
          ids <- get
-         let ident' = if TS.null ident
+         let ident' = if T.null ident
                          then uniqueIdent (writerExtensions opts) ils ids
                          else ident
          modify $ Set.insert ident'
@@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty
        go x = return x
 
 -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
-normalizeDate' :: String -> Maybe String
-normalizeDate' = fmap TS.unpack . go . trim . TS.pack
+normalizeDate' :: Text -> Maybe Text
+normalizeDate' = go . T.strip
   where
     go xs
-      | TS.length xs == 4            -- YYY
-      , TS.all isDigit xs = Just xs
-      | (y, s) <- TS.splitAt 4 xs    -- YYY-MM
-      , Just ('-', m) <- TS.uncons s
-      , TS.length m == 2
-      , TS.all isDigit y && TS.all isDigit m = Just xs
+      | T.length xs == 4            -- YYY
+      , T.all isDigit xs = Just xs
+      | (y, s) <- T.splitAt 4 xs    -- YYY-MM
+      , Just ('-', m) <- T.uncons s
+      , T.length m == 2
+      , T.all isDigit y && T.all isDigit m = Just xs
       | otherwise = normalizeDate xs
 
-toRelator :: String -> Maybe String
+toRelator :: Text -> Maybe Text
 toRelator x
   | x `elem` relators = Just x
-  | otherwise         = lookup (map toLower x) relatorMap
+  | otherwise         = lookup (T.toLower x) relatorMap
 
-relators :: [String]
+relators :: [Text]
 relators = map snd relatorMap
 
-relatorMap :: [(String, String)]
+relatorMap :: [(Text, Text)]
 relatorMap =
            [("abridger", "abr")
            ,("actor", "act")
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 9334d6e9a..3b5d04427 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode)
 import Data.Char (isAscii, isControl, isSpace)
 import Data.Either (lefts, rights)
 import Data.List (intercalate)
-import Data.Text (Text, pack)
+import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Encoding as TE
 import Network.HTTP (urlEncode)
-import Text.XML.Light
-import qualified Text.XML.Light as X
-import qualified Text.XML.Light.Cursor as XC
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light as X
 
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
 import qualified Text.Pandoc.Class.PandocMonad as P
@@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
 import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
                            makeSections, tshow, stringify)
 import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
+import Data.Generics (everywhere, mkT)
 
 -- | Data to be written at the end of the document:
 -- (foot)notes, URLs, references, images.
@@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do
      (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch)
      let body' = replaceImagesWithAlt missing body
      let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
-     return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
+     return $ xml_head <> showContent fb2_xml <> "\n"
   where
   xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
   fb2_attrs =
@@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do
 description :: PandocMonad m => Meta -> FBM m Content
 description meta' = do
   let genre = case lookupMetaString "genre" meta' of
-                "" -> el "genre" ("unrecognised" :: String)
-                s  -> el "genre" (T.unpack s)
+                "" -> el "genre" ("unrecognised" :: Text)
+                s  -> el "genre" s
   bt <- booktitle meta'
   let as = authors meta'
   dd <- docdate meta'
@@ -112,7 +110,7 @@ description meta' = do
                Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
                Just (MetaString s)        -> [el "lang" $ iso639 s]
                _                          -> []
-             where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
+             where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639
   let coverimage url = do
         let img = Image nullAttr mempty (url, "")
         im <- insertImage InlineImage img
@@ -124,7 +122,7 @@ description meta' = do
   return $ el "description"
     [ el "title-info" (genre :
                       (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
-    , el "document-info" [el "program-used" ("pandoc" :: String)]
+    , el "document-info" [el "program-used" ("pandoc" :: Text)]
     ]
 
 booktitle :: PandocMonad m => Meta -> FBM m [Content]
@@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta')
 
 author :: [Inline] -> [Content]
 author ss =
-  let ws = words . cMap plain $ ss
-      email = el "email" <$> take 1 (filter ('@' `elem`) ws)
-      ws' = filter ('@' `notElem`) ws
+  let ws = T.words $ mconcat $ map plain ss
+      email = el "email" <$> take 1 (filter (T.any (=='@')) ws)
+      ws' = filter (not . T.any (== '@')) ws
       names = case ws' of
                 [nickname] -> [ el "nickname" nickname ]
                 [fname, lname] -> [ el "first-name" fname
                                     , el "last-name" lname ]
                 (fname:rest) -> [ el "first-name" fname
-                                , el "middle-name" (concat . init $ rest)
+                                , el "middle-name" (T.concat . init $ rest)
                                 , el "last-name" (last rest) ]
                 [] -> []
   in  list $ el "author" (names ++ email)
@@ -206,7 +204,7 @@ renderFootnotes = do
          el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
   where
     renderFN (n, idstr, cs) =
-        let fn_texts = el "title" (el "p" (show n)) : cs
+        let fn_texts = el "title" (el "p" (tshow n)) : cs
         in  el "section" ([uattr "id" idstr], fn_texts)
 
 -- | Fetch images and encode them for the FictionBook XML.
@@ -282,7 +280,7 @@ isMimeType s =
  where
    types =  ["text","image","audio","video","application","message","multipart"]
    valid c = isAscii c && not (isControl c) && not (isSpace c) &&
-             c `notElem` ("()<>@,;:\\\"/[]?=" :: String)
+             c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char])
 
 footnoteID :: Int -> Text
 footnoteID i = "n" <> tshow i
@@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)])
   = insertImage NormalImage (Image atr alt (src,tit))
 blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss
 blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
-                             map (el "p" . el "code" . T.unpack) . T.lines $ s
+                             map (el "p" . el "code") . T.lines $ s
 blockToXml (RawBlock f str) =
   if f == Format "fb2"
     then
@@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do
     c <- el "emphasis" <$> cMapM toXml caption
     return [el "table" (hd <> bd), el "p" c]
     where
-      mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content
+      mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content
       mkrow tag cells aligns' =
         el "tr" <$> mapM (mkcell tag) (zip cells aligns')
       --
-      mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content
+      mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content
       mkcell tag (cell, align) = do
         cblocks <- cMapM blockToXml cell
         return $ el tag ([align_attr align], cblocks)
@@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do
   inner <- cMapM toXml ss
   return $ [txt "“"] ++ inner ++ [txt "”"]
 toXml (Cite _ ss) = cMapM toXml ss  -- FIXME: support citation styles
-toXml (Code _ s) = return [el "code" $ T.unpack s]
+toXml (Code _ s) = return [el "code" s]
 toXml Space = return [txt " "]
 toXml SoftBreak = return [txt "\n"]
 toXml LineBreak = return [txt "\n"]
@@ -456,7 +454,7 @@ insertMath immode formula = do
        let imgurl = url <> T.pack (urlEncode $ T.unpack formula)
        let img = Image nullAttr alt (imgurl, "")
        insertImage immode img
-    _ -> return [el "code" $ T.unpack formula]
+    _ -> return [el "code" formula]
 
 insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
 insertImage immode (Image _ alt (url,ttl)) = do
@@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do
          el "image" $
             [ attr ("l","href") ("#" <> fname)
             , attr ("l","type") (tshow immode)
-            , uattr "alt" (T.pack $ cMap plain alt) ]
+            , uattr "alt" (mconcat $ map plain alt) ]
             ++ ttlattr
 insertImage _ _ = error "unexpected inline instead of image"
 
 replaceImagesWithAlt :: [Text] -> Content -> Content
-replaceImagesWithAlt missingHrefs body =
-  let cur = XC.fromContent body
-      cur' = replaceAll cur
-  in  XC.toTree . XC.root $ cur'
+replaceImagesWithAlt missingHrefs = everywhere (mkT go)
   where
-  --
-    replaceAll :: XC.Cursor -> XC.Cursor
-    replaceAll c =
-        let n = XC.current c
-            c' = if isImage n && isMissing n
-                 then XC.modifyContent replaceNode c
-                 else c
-        in  case XC.nextDF c' of
-              (Just cnext) -> replaceAll cnext
-              Nothing      -> c'  -- end of document
-  --
-    isImage :: Content -> Bool
-    isImage (Elem e) = elName e == uname "image"
-    isImage _        = False
-  --
+    go c = if isMissing c
+              then replaceNode c
+              else c
     isMissing (Elem img@Element{}) =
         let imgAttrs = elAttribs img
             badAttrs = map (attr ("l","href")) missingHrefs
@@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body =
     replaceNode :: Content -> Content
     replaceNode n@(Elem img@Element{}) =
         let attrs = elAttribs img
-            alt = getAttrVal attrs (uname "alt")
+            alt = getAttrVal attrs (unqual "alt")
             imtype = getAttrVal attrs (qname "l" "type")
         in case (alt, imtype) of
              (Just alt', Just imtype') ->
-                 if imtype' == show NormalImage
+                 if imtype' == tshow NormalImage
                  then el "p" alt'
-                 else txt $ T.pack alt'
-             (Just alt', Nothing) -> txt $ T.pack alt'  -- no type attribute
+                 else txt alt'
+             (Just alt', Nothing) -> txt alt'  -- no type attribute
              _ -> n   -- don't replace if alt text is not found
     replaceNode n = n
   --
-    getAttrVal :: [X.Attr] -> QName -> Maybe String
+    getAttrVal :: [X.Attr] -> QName -> Maybe Text
     getAttrVal attrs name =
         case filter ((name ==) . attrKey) attrs of
            (a:_) -> Just (attrVal a)
@@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body =
 
 
 -- | Wrap all inlines with an XML tag (given its unqualified name).
-wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
+wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content
 wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
 
 -- " Create a singleton list.
@@ -532,31 +515,31 @@ list :: a -> [a]
 list = (:[])
 
 -- | Convert an 'Inline' to plaintext.
-plain :: Inline -> String
-plain (Str s)               = T.unpack s
-plain (Emph ss)             = cMap plain ss
-plain (Underline ss)        = cMap plain ss
-plain (Span _ ss)           = cMap plain ss
-plain (Strong ss)           = cMap plain ss
-plain (Strikeout ss)        = cMap plain ss
-plain (Superscript ss)      = cMap plain ss
-plain (Subscript ss)        = cMap plain ss
-plain (SmallCaps ss)        = cMap plain ss
-plain (Quoted _ ss)         = cMap plain ss
-plain (Cite _ ss)           = cMap plain ss  -- FIXME
-plain (Code _ s)            = T.unpack s
+plain :: Inline -> Text
+plain (Str s)               = s
+plain (Emph ss)             = mconcat $ map plain ss
+plain (Underline ss)        = mconcat $ map plain ss
+plain (Span _ ss)           = mconcat $ map plain ss
+plain (Strong ss)           = mconcat $ map plain ss
+plain (Strikeout ss)        = mconcat $ map plain ss
+plain (Superscript ss)      = mconcat $ map plain ss
+plain (Subscript ss)        = mconcat $ map plain ss
+plain (SmallCaps ss)        = mconcat $ map plain ss
+plain (Quoted _ ss)         = mconcat $ map plain ss
+plain (Cite _ ss)           = mconcat $ map plain ss  -- FIXME
+plain (Code _ s)            = s
 plain Space                 = " "
 plain SoftBreak             = " "
 plain LineBreak             = "\n"
-plain (Math _ s)            = T.unpack s
+plain (Math _ s)            = s
 plain (RawInline _ _)       = ""
-plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"])
-plain (Image _ alt _)       = cMap plain alt
+plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"])
+plain (Image _ alt _)       = mconcat $ map plain alt
 plain (Note _)              = ""  -- FIXME
 
 -- | Create an XML element.
 el :: (Node t)
-   => String   -- ^ unqualified element name
+   => Text     -- ^ unqualified element name
    -> t        -- ^ node contents
    -> Content  -- ^ XML content
 el name cs = Elem $ unode name cs
@@ -569,22 +552,18 @@ spaceBeforeAfter cs =
 
 -- | Create a plain-text XML content.
 txt :: Text -> Content
-txt s = Text $ CData CDataText (T.unpack s) Nothing
+txt s = Text $ CData CDataText s Nothing
 
 -- | Create an XML attribute with an unqualified name.
-uattr :: String -> Text -> Text.XML.Light.Attr
-uattr name = Attr (uname name) . T.unpack
+uattr :: Text -> Text -> X.Attr
+uattr name = Attr (unqual name)
 
 -- | Create an XML attribute with a qualified name from given namespace.
-attr :: (String, String) -> Text -> Text.XML.Light.Attr
-attr (ns, name) = Attr (qname ns name) . T.unpack
-
--- | Unqualified name
-uname :: String -> QName
-uname name = QName name Nothing Nothing
+attr :: (Text, Text) -> Text -> X.Attr
+attr (ns, name) = Attr (qname ns name)
 
 -- | Qualified name
-qname :: String -> String -> QName
+qname :: Text -> Text -> QName
 qname ns name = QName name Nothing (Just ns)
 
 -- | Abbreviation for 'concatMap'.
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 06369b4db..101b236aa 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy)
 import Text.Pandoc.Walk
 import Text.Pandoc.Writers.OpenDocument (writeOpenDocument)
 import Text.Pandoc.XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 import Text.TeXMath
-import Text.XML.Light
+import qualified Text.XML.Light as XL
 
 newtype ODTState = ODTState { stEntries :: [Entry]
                          }
@@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do
                                         PandocXMLError "styles.xml" msg
                                     Right d -> return $
                                       toEntry "styles.xml" epochtime
-                                      ( fromStringLazy
+                                      ( fromTextLazy
+                                      . TL.fromStrict
                                       . ppTopElement
                                       . addLang lang $ d )
                             else return e) (zEntries arch)
   return arch{ zEntries = entries }
 
+-- TODO FIXME avoid this generic traversal!
 addLang :: Lang -> Element -> Element
 addLang lang = everywhere' (mkT updateLangAttr)
     where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langLanguage lang)
+                           = Attr n (langLanguage lang)
           updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
-                           = Attr n (T.unpack $ langRegion lang)
+                           = Attr n (langRegion lang)
           updateLangAttr x = x
 
 -- | transform both Image and Math elements
@@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do
   case writeMathML dt <$> readTeX math of
        Left  _ -> return $ Math t math
        Right r -> do
-         let conf = useShortEmptyTags (const False) defaultConfigPP
-         let mathml = ppcTopElement conf r
+         let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP
+         let mathml = XL.ppcTopElement conf r
          epochtime <- floor `fmap` lift P.getPOSIXTime
          let dirname = "Formula-" ++ show (length entries) ++ "/"
          let fname = dirname ++ "content.xml"
diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs
index 8f60e70d5..0533d6c12 100644
--- a/src/Text/Pandoc/Writers/OOXML.hs
+++ b/src/Text/Pandoc/Writers/OOXML.hs
@@ -29,33 +29,32 @@ import Control.Monad.Except (throwError)
 import Text.Pandoc.Error
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Char8 as BL8
 import Data.Maybe (mapMaybe)
 import qualified Data.Text as T
+import Data.Text (Text)
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
 
-mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode :: Node t => Text -> [(Text,Text)] -> t -> Element
 mknode s attrs =
   add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) .  node (nodename s)
 
-mktnode :: String -> [(String,String)] -> T.Text -> Element
-mktnode s attrs = mknode s attrs . T.unpack
+mktnode :: Text -> [(Text,Text)] -> T.Text -> Element
+mktnode s attrs = mknode s attrs
 
-nodename :: String -> QName
+nodename :: Text -> QName
 nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix }
- where (name, prefix) = case break (==':') s of
-                             (xs,[])    -> (xs, Nothing)
-                             (ys, _:zs) -> (zs, Just ys)
+ where (name, prefix) = case T.break (==':') s of
+                          (xs,ys) -> case T.uncons ys of
+                                       Nothing     -> (xs, Nothing)
+                                       Just (_,zs) -> (zs, Just xs)
 
 toLazy :: B.ByteString -> BL.ByteString
 toLazy = BL.fromChunks . (:[])
 
 renderXml :: Element -> BL.ByteString
-renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <>
-  UTF8.fromStringLazy (showElement elt)
+renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt))
 
 parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element
 parseXml refArchive distArchive relpath =
@@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath =
 
 -- Copied from Util
 
-attrToNSPair :: XML.Attr -> Maybe (String, String)
-attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
+attrToNSPair :: Attr -> Maybe (Text, Text)
+attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
 attrToNSPair _                                     = Nothing
 
 
 elemToNameSpaces :: Element -> NameSpaces
 elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
 
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
 elemName ns prefix name =
-  QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+  QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
 
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
 isElem ns prefix name element =
   let ns' = ns ++ elemToNameSpaces element
   in qName (elName element) == name &&
      qURI (elName element) == lookup prefix ns'
 
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
 
 -- | Scales the image to fit the page
 -- sizes are passed in emu
diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
index 0a7060895..5caeb0753 100644
--- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs
+++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs
@@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError)
 import Control.Monad.Reader
 import Control.Monad.State
 import Codec.Archive.Zip
-import Data.Char (toUpper)
 import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse)
 import Data.Default
+import Data.Text (Text)
 import qualified Data.Text as T
+import qualified Data.Text.Read
 import Data.Time (formatTime, defaultTimeLocale)
 import Data.Time.Clock (UTCTime)
 import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime)
 import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light as XML
 import Text.Pandoc.Definition
 import qualified Text.Pandoc.UTF8 as UTF8
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
@@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext))
 import Text.TeXMath
 import Text.Pandoc.Writers.Math (convertMath)
 import Text.Pandoc.Writers.Powerpoint.Presentation
+import Text.Pandoc.Shared (tshow)
 import Skylighting (fromColor)
 
 -- |The 'EMU' type is used to specify sizes in English Metric Units.
@@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do
   sldSize <- findChild (elemName ns "p" "sldSz") presElement
   cxS <- findAttr (QName "cx" Nothing Nothing) sldSize
   cyS <- findAttr (QName "cy" Nothing Nothing) sldSize
-  (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String)
-  (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String)
+  cx <- readTextAsInteger cxS
+  cy <- readTextAsInteger cyS
   return (cx `div` 12700, cy `div` 12700)
 
+readTextAsInteger :: Text -> Maybe Integer
+readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal
+
 data WriterEnv = WriterEnv { envRefArchive :: Archive
                            , envDistArchive :: Archive
                            , envUTCTime :: UTCTime
@@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st
 
 --------------------------------------------------------------------
 
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText n = fmap T.pack . findAttr n
-
 monospaceFont :: Monad m => P m T.Text
 monospaceFont = do
   vars <- writerVariables <$> asks envOpts
@@ -171,10 +172,9 @@ monospaceFont = do
     Just s -> return s
     Nothing -> return "Courier"
 
--- Kept as string for XML.Light
-fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)]
+fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)]
 fontSizeAttributes RunProps { rPropForceSize = Just sz } =
-  return [("sz", show $ sz * 100)]
+  return [("sz", tshow $ sz * 100)]
 fontSizeAttributes _ = return []
 
 copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive
@@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool
 shapeHasId ns ident element
   | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element
   , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr
-  , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr =
+  , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr =
       nm == ident
   | otherwise = False
 
@@ -396,10 +396,10 @@ getShapeDimensions ns element
       ext <- findChild (elemName ns "a" "ext") xfrm
       cxS <- findAttr (QName "cx" Nothing Nothing) ext
       cyS <- findAttr (QName "cy" Nothing Nothing) ext
-      (x, _) <- listToMaybe $ reads xS
-      (y, _) <- listToMaybe $ reads yS
-      (cx, _) <- listToMaybe $ reads cxS
-      (cy, _) <- listToMaybe $ reads cyS
+      x <- readTextAsInteger xS
+      y <- readTextAsInteger yS
+      cx <- readTextAsInteger cxS
+      cy <- readTextAsInteger cyS
       return ((x `div` 12700, y `div` 12700),
               (cx `div` 12700, cy `div` 12700))
   | otherwise = Nothing
@@ -430,7 +430,7 @@ getContentShapeSize ns layout master
         Nothing -> do let mbSz =
                             findChild (elemName ns "p" "nvSpPr") sp >>=
                             findChild (elemName ns "p" "cNvPr") >>=
-                            findAttrText (QName "id" Nothing Nothing) >>=
+                            findAttr (QName "id" Nothing Nothing) >>=
                             flip getMasterShapeDimensionsById master
                       case mbSz of
                         Just sz' -> return sz'
@@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes =
         fn _        = True
 
 replaceNamedChildren :: NameSpaces
-                     -> String
-                     -> String
+                     -> Text
+                     -> Text
                      -> [Element]
                      -> Element
                      -> Element
@@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do
                        ]
                      , mknode "p:spPr" []
                        [ mknode "a:xfrm" []
-                         [ mknode "a:off" [("x", show $ 12700 * x),
-                                           ("y", show $ 12700 * (y + cy - captionHeight))] ()
-                         , mknode "a:ext" [("cx", show $ 12700 * cx),
-                                           ("cy", show $ 12700 * captionHeight)] ()
+                         [ mknode "a:off" [("x", tshow $ 12700 * x),
+                                           ("y", tshow $ 12700 * (y + cy - captionHeight))] ()
+                         , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                                           ("cy", tshow $ 12700 * captionHeight)] ()
                          ]
                        , mknode "a:prstGeom" [("prst", "rect")]
                          [ mknode "a:avLst" [] ()
@@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do
                                      ,("noChangeAspect","1")] ()
   -- cNvPr will contain the link information so we do that separately,
   -- and register the link if necessary.
-  let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")]
+  let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo),
+                   ("id","0"),
+                   ("name","Picture 1")]
   cNvPr <- case picPropLink picProps of
     Just link -> do idNum <- registerLink link
                     return $ mknode "p:cNvPr" cNvPrAttr $
-                      mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] ()
+                      mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] ()
     Nothing   -> return $ mknode "p:cNvPr" cNvPrAttr ()
   let nvPicPr  = mknode "p:nvPicPr" []
                  [ cNvPr
@@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do
                  , mknode "p:nvPr" [] ()]
   let blipFill = mknode "p:blipFill" []
                  [ mknode "a:blip" [("r:embed", "rId" <>
-          show (mInfoLocalId mInfo))] ()
+                     tshow (mInfoLocalId mInfo))] ()
                  , mknode "a:stretch" [] $
                    mknode "a:fillRect" [] () ]
   let xfrm =    mknode "a:xfrm" []
-                [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] ()
-                , mknode "a:ext" [("cx",show dimX')
-                                 ,("cy",show dimY')] () ]
+                [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] ()
+                , mknode "a:ext" [("cx", tshow dimX')
+                                 ,("cy", tshow dimY')] () ]
   let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
                  mknode "a:avLst" [] ()
   let ln =      mknode "a:ln" [("w","9525")]
@@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do
             Just DoubleStrike -> [("strike", "dblStrike")]
             Nothing -> []) <>
         (case rBaseline rpr of
-            Just n -> [("baseline", show n)]
+            Just n -> [("baseline", tshow n)]
             Nothing -> []) <>
         (case rCap rpr of
             Just NoCapitals -> [("cap", "none")]
@@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do
                    return $ case link of
                      InternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              , ("action", "ppaction://hlinksldjump")
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                      -- external
                      ExternalTarget _ ->
                        let linkAttrs =
-                             [ ("r:id", "rId" <> show idNum)
+                             [ ("r:id", "rId" <> tshow idNum)
                              ]
                        in [mknode "a:hlinkClick" linkAttrs ()]
                  Nothing -> return []
   let colorContents = case rSolidFill rpr of
                         Just color ->
                           case fromColor color of
-                            '#':hx ->  [mknode "a:solidFill" []
-                                        [mknode "a:srgbClr" [("val", map toUpper hx)] ()]
-                                       ]
+                            '#':hx ->
+                              [mknode "a:solidFill" []
+                                [mknode "a:srgbClr"
+                                  [("val", T.toUpper $ T.pack hx)] ()]]
                             _ -> []
                         Nothing -> []
   codeFont <- monospaceFont
   let codeContents =
-        [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr]
+        [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr]
   let propContents = linkProps <> colorContents <> codeContents
   return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents
-                                 , mknode "a:t" [] $ T.unpack s
+                                 , mknode "a:t" [] s
                                  ]]
 paraElemToElements (MathElem mathType texStr) = do
   isInSpkrNotes <- asks envInSpeakerNotes
   if isInSpkrNotes
     then paraElemToElements $ Run def $ unTeXString texStr
     else do res <- convertMath writeOMML mathType (unTeXString texStr)
-            case res of
+            case fromXLElement <$> res of
               Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r]
               Left (Str s) -> paraElemToElements (Run def s)
               Left _       -> throwError $ PandocShouldNeverHappenError "non-string math fallback"
 paraElemToElements (RawOOXMLParaElem str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 
 
 -- This is a bit of a kludge -- really requires adding an option to
@@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return
 -- step at a time.
 addMathInfo :: Element -> Element
 addMathInfo element =
-  let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns")
-                       , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
-                       }
+  let mathspace =
+        Attr { attrKey = QName "m" Nothing (Just "xmlns")
+             , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math"
+             }
   in add_attr mathspace element
 
 -- We look through the element to see if it contains an a14:m
@@ -849,13 +853,13 @@ surroundWithMathAlternate element =
 paragraphToElement :: PandocMonad m => Paragraph -> P m Element
 paragraphToElement par = do
   let
-    attrs = [("lvl", show $ pPropLevel $ paraProps par)] <>
+    attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <>
             (case pPropMarginLeft (paraProps par) of
-               Just px -> [("marL", show $ pixelsToEmu px)]
+               Just px -> [("marL", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropIndent (paraProps par) of
-               Just px -> [("indent", show $ pixelsToEmu px)]
+               Just px -> [("indent", tshow $ pixelsToEmu px)]
                Nothing -> []
             ) <>
             (case pPropAlign (paraProps par) of
@@ -867,7 +871,7 @@ paragraphToElement par = do
     props = [] <>
             (case pPropSpaceBefore $ paraProps par of
                Just px -> [mknode "a:spcBef" [] [
-                              mknode "a:spcPts" [("val", show $ 100 * px)] ()
+                              mknode "a:spcPts" [("val", tshow $ 100 * px)] ()
                               ]
                           ]
                Nothing -> []
@@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do
 shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$>
   graphicFrameToElements layout tbls cptn
 shapeToElements _ (RawOOXMLShape str) = return
-  [Text (CData CDataRaw (T.unpack str) Nothing)]
+  [Text (CData CDataRaw str Nothing)]
 shapeToElements layout shp = do
   element <- shapeToElement layout shp
   return [Elem element]
@@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do
             [mknode "p:ph" [("idx", "1")] ()]
           ]
         , mknode "p:xfrm" []
-          [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] ()
-          , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] ()
+          [ mknode "a:off" [("x", tshow $ 12700 * x),
+                            ("y", tshow $ 12700 * y)] ()
+          , mknode "a:ext" [("cx", tshow $ 12700 * cx),
+                            ("cy", tshow $ 12700 * cy)] ()
           ]
         ] <> elements
 
@@ -957,7 +963,7 @@ getDefaultTableStyle = do
   refArchive <- asks envRefArchive
   distArchive <- asks envDistArchive
   tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml"
-  return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst
+  return $ findAttr (QName "def" Nothing Nothing) tblStyleLst
 
 graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element
 graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
@@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
   let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells
 
   let mkgridcol w = mknode "a:gridCol"
-                       [("w", show ((12700 * w) :: Integer))] ()
+                       [("w", tshow ((12700 * w) :: Integer))] ()
   let hasHeader = not (all null hdrCells)
 
   mbDefTblStyle <- getDefaultTableStyle
@@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do
                  , ("bandRow", if tblPrBandRow tblPr then "1" else "0")
                  ] (case mbDefTblStyle of
                       Nothing -> []
-                      Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty])
+                      Just sty -> [mknode "a:tableStyleId" [] sty])
 
   return $ mknode "a:graphic" []
     [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")]
@@ -1037,7 +1043,7 @@ findPHType ns spElem phType
         -- if it's a named PHType, we want to check that the attribute
         -- value matches.
         Just phElem | (PHType tp) <- phType ->
-                        case findAttrText (QName "type" Nothing Nothing) phElem of
+                        case findAttr (QName "type" Nothing Nothing) phElem of
                           Just tp' -> tp == tp'
                           Nothing -> False
         -- if it's an ObjType, we want to check that there is NO
@@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster
   , Just txBody <- findChild (elemName ns "p" "txBody") sp
   , Just p <- findChild (elemName ns "a" "p") txBody
   , Just fld <- findChild (elemName ns "a" "fld") p
-  , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld =
+  , Just fldId <- findAttr (QName "id" Nothing Nothing) fld =
       return fldId
   | otherwise = throwError $
                 PandocSomeError
@@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId =
     [ mknode "a:bodyPr" [] ()
     , mknode "a:lstStyle" [] ()
     , mknode "a:p" []
-      [ mknode "a:fld" [ ("id", T.unpack fieldId)
+      [ mknode "a:fld" [ ("id", fieldId)
                        , ("type", "slidenum")
                        ]
         [ mknode "a:rPr" [("lang", "en-US")] ()
-        , mknode "a:t" [] (show pgNum)
+        , mknode "a:t" [] (tshow pgNum)
         ]
       , mknode "a:endParaRPr" [("lang", "en-US")] ()
       ]
@@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do
     Just n -> return n
     Nothing -> throwError $
                PandocShouldNeverHappenError $
-               "Slide Id " <> T.pack (show sldId) <> " not found."
+               "Slide Id " <> tshow sldId <> " not found."
 
 slideNum :: PandocMonad m => Slide -> P m Int
 slideNum slide = getSlideIdNum $ slideId slide
@@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text
 slideToRelId slide = do
   n <- slideNum slide
   offset <- asks envSlideIdOffset
-  return $ "rId" <> T.pack (show $ n + offset)
+  return $ "rId" <> tshow (n + offset)
 
 
 data Relationship = Relationship { relId :: Int
@@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship
 elementToRel element
   | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing =
       do rId <- findAttr (QName "Id" Nothing Nothing) element
-         numStr <- stripPrefix "rId" rId
-         num <- case reads numStr :: [(Int, String)] of
-           (n, _) : _ -> Just n
-           []         -> Nothing
-         type' <- findAttrText (QName "Type" Nothing Nothing) element
+         numStr <- T.stripPrefix "rId" rId
+         num <- fromIntegral <$> readTextAsInteger numStr
+         type' <- findAttr (QName "Type" Nothing Nothing) element
          target <- findAttr (QName "Target" Nothing Nothing) element
-         return $ Relationship num type' target
+         return $ Relationship num type' (T.unpack target)
   | otherwise = Nothing
 
 slideToPresRel :: PandocMonad m => Slide -> P m Relationship
@@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry
 topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels
 
 relToElement :: Relationship -> Element
-relToElement rel = mknode "Relationship" [ ("Id", "rId" <>
-    show (relId rel))
-                                         , ("Type", T.unpack $ relType rel)
-                                         , ("Target", relTarget rel) ] ()
+relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel))
+                                         , ("Type", relType rel)
+                                         , ("Target", T.pack $ relTarget rel) ] ()
 
 relsToElement :: [Relationship] -> Element
 relsToElement rels = mknode "Relationships"
@@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do
       Just element | Just notesIdNum <- mbNotesIdNum ->
                        Just <$>
                        elemToEntry
-                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml")
+                       ("ppt/notesSlides/notesSlide" <> show notesIdNum <>
+                        ".xml")
                        element
       _ -> return Nothing
 
@@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do
     [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")]
     [ mknode "Relationship" [ ("Id", "rId2")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                            , ("Target", "../slides/slide" <> show idNum <> ".xml")
+                            , ("Target", "../slides/slide" <> tshow idNum <> ".xml")
                             ] ()
     , mknode "Relationship" [ ("Id", "rId1")
                             , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster")
@@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element
 linkRelElement (rIdNum, InternalTarget targetId) = do
   targetIdNum <- getSlideIdNum targetId
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide")
-                          , ("Target", "slide" <> show targetIdNum <> ".xml")
+                          , ("Target", "slide" <> tshow targetIdNum <> ".xml")
                           ] ()
 linkRelElement (rIdNum, ExternalTarget (url, _)) =
   return $
-    mknode "Relationship" [ ("Id", "rId" <> show rIdNum)
+    mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum)
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink")
-                          , ("Target", T.unpack url)
+                          , ("Target", url)
                           , ("TargetMode", "External")
                           ] ()
 
@@ -1567,10 +1571,10 @@ mediaRelElement mInfo =
   let ext = fromMaybe "" (mInfoExt mInfo)
   in
     mknode "Relationship" [ ("Id", "rId" <>
-      show (mInfoLocalId mInfo))
+      tshow (mInfoLocalId mInfo))
                           , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image")
                           , ("Target", "../media/image" <>
-      show (mInfoGlobalId mInfo) <> T.unpack ext)
+      tshow (mInfoGlobalId mInfo) <> ext)
                           ] ()
 
 speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element)
@@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do
   return $ case M.lookup idNum mp of
     Nothing -> Nothing
     Just n ->
-      let target = "../notesSlides/notesSlide" <> show n <> ".xml"
+      let target = "../notesSlides/notesSlide" <> tshow n <> ".xml"
       in Just $
          mknode "Relationship" [ ("Id", "rId2")
                                , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide")
@@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do
 slideToSldIdElement :: PandocMonad m => Slide -> P m Element
 slideToSldIdElement slide = do
   n <- slideNum slide
-  let id' = show $ n + 255
+  let id' = tshow $ n + 255
   rId <- slideToRelId slide
-  return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] ()
+  return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] ()
 
 presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element
 presentationToSldIdLst (Presentation _ slides) = do
@@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do
       notesMasterElem =  mknode "p:notesMasterIdLst" []
                          [ mknode
                            "p:NotesMasterId"
-                           [("r:id", "rId" <> show notesMasterRId)]
+                           [("r:id", "rId" <> tshow notesMasterRId)]
                            ()
                          ]
 
@@ -1702,17 +1706,17 @@ docPropsElement docProps = do
     ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
     ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
     $
-      mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps)
+      mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps)
     :
-      mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps)
+      mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps)
     :
-      mknode "cp:keywords" [] (T.unpack keywords)
-    : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)])
-    <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)])
-    <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)])
+      mknode "cp:keywords" [] keywords
+    : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)])
+    <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)])
+    <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)])
     <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x
-             , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
-             ]) (formatTime defaultTimeLocale "%FT%XZ" utctime)
+              , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x
+              ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime)
 
 docPropsToEntry :: PandocMonad m => DocProps -> P m Entry
 docPropsToEntry docProps = docPropsElement docProps >>=
@@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element
 docCustomPropsElement docProps = do
   let mkCustomProp (k, v) pid = mknode "property"
          [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}")
-         ,("pid", show pid)
-         ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v)
+         ,("pid", tshow pid)
+         ,("name", k)] $ mknode "vt:lpwstr" [] v
   return $ mknode "Properties"
           [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties")
           ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes")
@@ -1743,7 +1747,7 @@ viewPropsElement = do
   distArchive <- asks envDistArchive
   viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml"
   -- remove  "lastView" if it exists:
-  let notLastView :: Text.XML.Light.Attr -> Bool
+  let notLastView :: XML.Attr -> Bool
       notLastView attr =
           qName (attrKey attr) /= "lastView"
   return $
@@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml"
 defaultContentTypeToElem :: DefaultContentType -> Element
 defaultContentTypeToElem dct =
   mknode "Default"
-  [("Extension", T.unpack $ defContentTypesExt dct),
-    ("ContentType", T.unpack $ defContentTypesType dct)]
+  [("Extension", defContentTypesExt dct),
+    ("ContentType", defContentTypesType dct)]
   ()
 
 overrideContentTypeToElem :: OverrideContentType -> Element
 overrideContentTypeToElem oct =
   mknode "Override"
-  [("PartName", overrideContentTypesPart oct),
-    ("ContentType", T.unpack $ overrideContentTypesType oct)]
+  [("PartName", T.pack $ overrideContentTypesPart oct),
+   ("ContentType", overrideContentTypesType oct)]
   ()
 
 contentTypesToElement :: ContentTypes -> Element
@@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath]
 getSpeakerNotesFilePaths = do
   mp <- asks envSpeakerNotesIdMap
   let notesIdNums = M.elems mp
-  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums
+  return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml")
+               notesIdNums
 
 presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes
 presentationToContentTypes p@(Presentation _ slides) = do
@@ -1885,11 +1890,11 @@ getContentType fp
   | otherwise = Nothing
 
 -- Kept as String for XML.Light
-autoNumAttrs :: ListAttributes -> [(String, String)]
+autoNumAttrs :: ListAttributes -> [(Text, Text)]
 autoNumAttrs (startNum, numStyle, numDelim) =
   numAttr <> typeAttr
   where
-    numAttr = [("startAt", show startNum) | startNum /= 1]
+    numAttr = [("startAt", tshow startNum) | startNum /= 1]
     typeAttr = [("type", typeString <> delimString)]
     typeString = case numStyle of
       Decimal -> "arabic"
diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs
new file mode 100644
index 000000000..38e4df218
--- /dev/null
+++ b/src/Text/Pandoc/XML/Light.hs
@@ -0,0 +1,586 @@
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+   Module      : Text.Pandoc.XML.Light
+   Copyright   : Copyright (C) 2021 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+xml-light, which we used in pandoc's the XML-based readers, has
+some limitations: in particular, it produces nodes with String
+instead of Text, and the parser falls over on processing instructions
+(see #7091).
+
+This module exports much of the API of xml-light, but using Text instead
+of String. In addition, the xml-light parsers are replaced by xml-conduit's
+well-tested parser.  (The xml-conduit types are mapped to types
+isomorphic to xml-light's, to avoid the need for massive code modifications
+elsewhere.)  Bridge functions to map xml-light types to this module's
+types are also provided (since libraries like texmath still use xml-light).
+
+Another advantage of the xml-conduit parser is that it gives us
+detailed information on xml parse errors.
+
+In the future we may want to move to using xml-conduit or another
+xml library in the code base, but this change gives us
+better performance and accuracy without much change in the
+code that used xml-light.
+-}
+module Text.Pandoc.XML.Light
+  ( -- * Basic types, duplicating those from xml-light but with Text
+    -- instead of String
+    Line
+  , Content(..)
+  , Element(..)
+  , Attr(..)
+  , CData(..)
+  , CDataKind(..)
+  , QName(..)
+  , Node(..)
+  , unode
+  , unqual
+  , add_attr
+  , add_attrs
+    -- * Conversion functions from xml-light types
+  , fromXLQName
+  , fromXLCData
+  , fromXLElement
+  , fromXLAttr
+  , fromXLContent
+    -- * Replacement for xml-light's Text.XML.Proc
+  , strContent
+  , onlyElems
+  , elChildren
+  , onlyText
+  , findChildren
+  , filterChildren
+  , filterChildrenName
+  , findChild
+  , filterChild
+  , filterChildName
+  , findElement
+  , filterElement
+  , filterElementName
+  , findElements
+  , filterElements
+  , filterElementsName
+  , findAttr
+  , lookupAttr
+  , lookupAttrBy
+  , findAttrBy
+    -- * Replacement for xml-light's Text.XML.Output
+  , ppTopElement
+  , ppElement
+  , ppContent
+  , ppcElement
+  , ppcContent
+  , showTopElement
+  , showElement
+  , showContent
+  , useShortEmptyTags
+  , defaultConfigPP
+  , ConfigPP(..)
+    -- * Replacement for xml-light's Text.XML.Input
+  , parseXMLElement
+  , parseXMLContents
+  ) where
+
+import qualified Control.Exception as E
+import qualified Text.XML as Conduit
+import Text.XML.Unresolved (InvalidEventStream(..))
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText)
+import qualified Data.Map as M
+import Data.Data (Data)
+import Data.Typeable (Typeable)
+import Data.Maybe (mapMaybe, listToMaybe)
+import Data.List(find)
+import qualified Text.XML.Light as XL
+
+-- Drop in replacement for parseXMLDoc in xml-light.
+parseXMLElement :: TL.Text -> Either T.Text Element
+parseXMLElement t =
+  elementToElement .  Conduit.documentRoot <$>
+    either (Left . T.pack . E.displayException) Right
+    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
+
+parseXMLContents :: TL.Text -> Either T.Text [Content]
+parseXMLContents t =
+  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
+    Left e ->
+      case E.fromException e of
+        Just (ContentAfterRoot _) ->
+          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
+        _ -> Left . T.pack . E.displayException $ e
+    Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x]
+
+elementToElement :: Conduit.Element -> Element
+elementToElement (Conduit.Element name attribMap nodes) =
+  Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
+ where
+  attrs = map (\(n,v) -> Attr (nameToQname n) v) $
+              M.toList attribMap
+  nameToQname (Conduit.Name localName mbns mbpref) =
+    case mbpref of
+      Nothing ->
+        case T.stripPrefix "xmlns:" localName of
+          Just rest -> QName rest mbns (Just "xmlns")
+          Nothing   -> QName localName mbns mbpref
+      _ -> QName localName mbns mbpref
+
+nodeToContent :: Conduit.Node -> Maybe Content
+nodeToContent (Conduit.NodeElement el) =
+  Just (Elem (elementToElement el))
+nodeToContent (Conduit.NodeContent t) =
+  Just (Text (CData CDataText t Nothing))
+nodeToContent _ = Nothing
+
+unqual :: Text -> QName
+unqual x = QName x Nothing Nothing
+
+-- | Add an attribute to an element.
+add_attr :: Attr -> Element -> Element
+add_attr a e = add_attrs [a] e
+
+-- | Add some attributes to an element.
+add_attrs :: [Attr] -> Element -> Element
+add_attrs as e = e { elAttribs = as ++ elAttribs e }
+
+--
+-- type definitions lightly modified from xml-light
+--
+
+-- | A line is an Integer
+type Line     = Integer
+
+-- | XML content
+data Content  = Elem Element
+              | Text CData
+              | CRef Text
+                deriving (Show, Typeable, Data)
+
+-- | XML elements
+data Element  = Element {
+                  elName      :: QName,
+                  elAttribs   :: [Attr],
+                  elContent   :: [Content],
+                  elLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+-- | XML attributes
+data Attr     = Attr {
+                  attrKey :: QName,
+                  attrVal :: Text
+                } deriving (Eq, Ord, Show, Typeable, Data)
+
+-- | XML CData
+data CData    = CData {
+                  cdVerbatim  :: CDataKind,
+                  cdData      :: Text,
+                  cdLine      :: Maybe Line
+                } deriving (Show, Typeable, Data)
+
+data CDataKind
+ = CDataText      -- ^ Ordinary character data; pretty printer escapes &, < etc.
+ | CDataVerbatim  -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[..
+ | CDataRaw       -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up.
+   deriving ( Eq, Show, Typeable, Data )
+
+-- | XML qualified names
+data QName    = QName {
+                  qName   :: Text,
+                  qURI    :: Maybe Text,
+                  qPrefix :: Maybe Text
+                } deriving (Show, Typeable, Data)
+
+
+instance Eq QName where
+  q1 == q2  = compare q1 q2 == EQ
+
+instance Ord QName where
+  compare q1 q2 =
+    case compare (qName q1) (qName q2) of
+      EQ  -> case (qURI q1, qURI q2) of
+               (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2)
+               (u1,u2)           -> compare u1 u2
+      x   -> x
+
+class Node t where
+ node :: QName -> t -> Element
+
+instance Node ([Attr],[Content]) where
+  node n (attrs,cont) = Element { elName     = n
+                                , elAttribs  = attrs
+                                , elContent  = cont
+                                , elLine     = Nothing
+                                }
+
+instance Node [Attr]             where node n as   = node n (as,[]::[Content])
+instance Node Attr               where node n a    = node n [a]
+instance Node ()                 where node n ()   = node n ([]::[Attr])
+
+instance Node [Content]          where node n cs     = node n ([]::[Attr],cs)
+instance Node Content            where node n c      = node n [c]
+instance Node ([Attr],Content)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Content)     where node n (a,c)  = node n ([a],[c])
+
+instance Node ([Attr],[Element]) where
+  node n (as,cs) = node n (as,map Elem cs)
+
+instance Node ([Attr],Element)   where node n (as,c) = node n (as,[c])
+instance Node (Attr,Element)     where node n (a,c)  = node n ([a],c)
+instance Node [Element]          where node n es     = node n ([]::[Attr],es)
+instance Node Element            where node n e      = node n [e]
+
+instance Node ([Attr],[CData])   where
+  node n (as,cs) = node n (as,map Text cs)
+
+instance Node ([Attr],CData)     where node n (as,c) = node n (as,[c])
+instance Node (Attr,CData)       where node n (a,c)  = node n ([a],c)
+instance Node [CData]            where node n es     = node n ([]::[Attr],es)
+instance Node CData              where node n e      = node n [e]
+
+instance Node ([Attr],Text)      where
+  node n (as,t) = node n (as, CData { cdVerbatim = CDataText
+                                    , cdData = t
+                                    , cdLine = Nothing })
+
+instance Node (Attr,Text )       where node n (a,t)  = node n ([a],t)
+instance Node Text               where node n t      = node n ([]::[Attr],t)
+
+-- | Create node with unqualified name
+unode :: Node t => Text -> t -> Element
+unode = node . unqual
+
+--
+-- conversion from xml-light
+--
+
+fromXLQName :: XL.QName -> QName
+fromXLQName qn = QName { qName = T.pack $ XL.qName qn
+                       , qURI = T.pack <$> XL.qURI qn
+                       , qPrefix = T.pack <$> XL.qPrefix qn }
+
+fromXLCData :: XL.CData -> CData
+fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of
+                                        XL.CDataText -> CDataText
+                                        XL.CDataVerbatim -> CDataVerbatim
+                                        XL.CDataRaw -> CDataRaw
+                       , cdData = T.pack $ XL.cdData cd
+                       , cdLine = XL.cdLine cd }
+
+fromXLElement :: XL.Element -> Element
+fromXLElement el = Element { elName = fromXLQName $ XL.elName el
+                           , elAttribs = map fromXLAttr $ XL.elAttribs el
+                           , elContent = map fromXLContent $ XL.elContent el
+                           , elLine = XL.elLine el }
+
+fromXLAttr :: XL.Attr -> Attr
+fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s)
+
+fromXLContent :: XL.Content -> Content
+fromXLContent (XL.Elem el) = Elem $ fromXLElement el
+fromXLContent (XL.Text cd) = Text $ fromXLCData cd
+fromXLContent (XL.CRef s)  = CRef (T.pack s)
+
+--
+-- copied from xml-light Text.XML.Proc
+--
+
+-- | Get the text value of an XML element.  This function
+-- ignores non-text elements, and concatenates all text elements.
+strContent         :: Element -> Text
+strContent          = mconcat . map cdData . onlyText . elContent
+
+-- | Select only the elements from a list of XML content.
+onlyElems          :: [Content] -> [Element]
+onlyElems xs        = [ x | Elem x <- xs ]
+
+-- | Select only the elements from a parent.
+elChildren         :: Element -> [Element]
+elChildren e        = [ x | Elem x <- elContent e ]
+
+-- | Select only the text from a list of XML content.
+onlyText           :: [Content] -> [CData]
+onlyText xs         = [ x | Text x <- xs ]
+
+-- | Find all immediate children with the given name.
+findChildren       :: QName -> Element -> [Element]
+findChildren q e    = filterChildren ((q ==) . elName) e
+
+-- | Filter all immediate children wrt a given predicate.
+filterChildren       :: (Element -> Bool) -> Element -> [Element]
+filterChildren p e    = filter p (onlyElems (elContent e))
+
+
+-- | Filter all immediate children wrt a given predicate over their names.
+filterChildrenName      :: (QName -> Bool) -> Element -> [Element]
+filterChildrenName p e   = filter (p.elName) (onlyElems (elContent e))
+
+
+-- | Find an immediate child with the given name.
+findChild          :: QName -> Element -> Maybe Element
+findChild q e       = listToMaybe (findChildren q e)
+
+-- | Find an immediate child with the given name.
+filterChild          :: (Element -> Bool) -> Element -> Maybe Element
+filterChild p e       = listToMaybe (filterChildren p e)
+
+-- | Find an immediate child with name matching a predicate.
+filterChildName      :: (QName -> Bool) -> Element -> Maybe Element
+filterChildName p e   = listToMaybe (filterChildrenName p e)
+
+-- | Find the left-most occurrence of an element matching given name.
+findElement        :: QName -> Element -> Maybe Element
+findElement q e     = listToMaybe (findElements q e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElement        :: (Element -> Bool) -> Element -> Maybe Element
+filterElement p e     = listToMaybe (filterElements p e)
+
+-- | Filter the left-most occurrence of an element wrt. given predicate.
+filterElementName     :: (QName -> Bool) -> Element -> Maybe Element
+filterElementName p e  = listToMaybe (filterElementsName p e)
+
+-- | Find all non-nested occurances of an element.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+findElements       :: QName -> Element -> [Element]
+findElements qn e = filterElementsName (qn==) e
+
+-- | Find all non-nested occurrences of an element wrt. given predicate.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElements       :: (Element -> Bool) -> Element -> [Element]
+filterElements p e
+ | p e        = [e]
+ | otherwise  = concatMap (filterElements p) $ onlyElems $ elContent e
+
+-- | Find all non-nested occurences of an element wrt a predicate over element names.
+-- (i.e., once we have found an element, we do not search
+-- for more occurances among the element's children).
+filterElementsName       :: (QName -> Bool) -> Element -> [Element]
+filterElementsName p e = filterElements (p.elName) e
+
+-- | Lookup the value of an attribute.
+findAttr          :: QName -> Element -> Maybe Text
+findAttr x e       = lookupAttr x (elAttribs e)
+
+-- | Lookup attribute name from list.
+lookupAttr        :: QName -> [Attr] -> Maybe Text
+lookupAttr x       = lookupAttrBy (x ==)
+
+-- | Lookup the first attribute whose name satisfies the given predicate.
+lookupAttrBy       :: (QName -> Bool) -> [Attr] -> Maybe Text
+lookupAttrBy p as   = attrVal `fmap` find (p . attrKey) as
+
+-- | Lookup the value of the first attribute whose name
+-- satisfies the given predicate.
+findAttrBy         :: (QName -> Bool) -> Element -> Maybe Text
+findAttrBy p e      = lookupAttrBy p (elAttribs e)
+
+
+--
+-- duplicates functinos from Text.XML.Output
+--
+
+-- | The XML 1.0 header
+xmlHeader :: Text
+xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+
+
+--------------------------------------------------------------------------------
+data ConfigPP = ConfigPP
+  { shortEmptyTag :: QName -> Bool
+  , prettify      :: Bool
+  }
+
+-- | Default pretty orinting configuration.
+--  * Always use abbreviate empty tags.
+defaultConfigPP :: ConfigPP
+defaultConfigPP = ConfigPP { shortEmptyTag = const True
+                           , prettify      = False
+                           }
+
+-- | The predicate specifies for which empty tags we should use XML's
+-- abbreviated notation <TAG />.  This is useful if we are working with
+-- some XML-ish standards (such as certain versions of HTML) where some
+-- empty tags should always be displayed in the <TAG></TAG> form.
+useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP
+useShortEmptyTags p c = c { shortEmptyTag = p }
+
+
+-- | Specify if we should use extra white-space to make document more readable.
+-- WARNING: This adds additional white-space to text elements,
+-- and so it may change the meaning of the document.
+useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP
+useExtraWhiteSpace p c  = c { prettify = p }
+
+-- | A configuration that tries to make things pretty
+-- (possibly at the cost of changing the semantics a bit
+-- through adding white space.)
+prettyConfigPP     :: ConfigPP
+prettyConfigPP      = useExtraWhiteSpace True defaultConfigPP
+
+
+--------------------------------------------------------------------------------
+
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppTopElement       :: Element -> Text
+ppTopElement        = ppcTopElement prettyConfigPP
+
+-- | Pretty printing elements
+ppElement          :: Element -> Text
+ppElement           = ppcElement prettyConfigPP
+
+-- | Pretty printing content
+ppContent          :: Content -> Text
+ppContent           = ppcContent prettyConfigPP
+
+-- | Pretty printing renders XML documents faithfully,
+-- with the exception that whitespace may be added\/removed
+-- in non-verbatim character data.
+ppcTopElement      :: ConfigPP -> Element -> Text
+ppcTopElement c e   = T.unlines [xmlHeader,ppcElement c e]
+
+-- | Pretty printing elements
+ppcElement         :: ConfigPP -> Element -> Text
+ppcElement c        = TL.toStrict . toLazyText . ppElementS c mempty
+
+-- | Pretty printing content
+ppcContent         :: ConfigPP -> Content -> Text
+ppcContent c        = TL.toStrict . toLazyText . ppContentS c mempty
+
+ppcCData           :: ConfigPP -> CData -> Text
+ppcCData c         = TL.toStrict . toLazyText . ppCDataS c mempty
+
+type Indent = Builder
+
+-- | Pretty printing content using ShowT
+ppContentS         :: ConfigPP -> Indent -> Content -> Builder
+ppContentS c i x = case x of
+                     Elem e -> ppElementS c i e
+                     Text t -> ppCDataS c i t
+                     CRef r -> showCRefS r
+
+ppElementS         :: ConfigPP -> Indent -> Element -> Builder
+ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <>
+  (case elContent e of
+    [] | "?" `T.isPrefixOf` qName name -> fromText " ?>"
+       | shortEmptyTag c name  -> fromText " />"
+    [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name
+    cs -> singleton '>' <> nl <>
+          mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <>
+          i <> tagEnd name
+      where (nl,sp)  = if prettify c then ("\n","  ") else ("","")
+  )
+  where name = elName e
+
+ppCDataS           :: ConfigPP -> Indent -> CData -> Builder
+ppCDataS c i t     = i <> if cdVerbatim t /= CDataText || not (prettify c)
+                             then showCDataS t
+                             else foldr cons mempty (T.unpack (showCData t))
+  where cons         :: Char -> Builder -> Builder
+        cons '\n' ys  = singleton '\n' <> i <> ys
+        cons y ys     = singleton y <> ys
+
+
+
+--------------------------------------------------------------------------------
+
+-- | Adds the <?xml?> header.
+showTopElement     :: Element -> Text
+showTopElement c    = xmlHeader <> showElement c
+
+showContent        :: Content -> Text
+showContent         = ppcContent defaultConfigPP
+
+showElement        :: Element -> Text
+showElement         = ppcElement defaultConfigPP
+
+showCData          :: CData -> Text
+showCData           = ppcCData defaultConfigPP
+
+-- Note: crefs should not contain '&', ';', etc.
+showCRefS          :: Text -> Builder
+showCRefS r         = singleton '&' <> fromText r <> singleton ';'
+
+-- | Convert a text element to characters.
+showCDataS         :: CData -> Builder
+showCDataS cd =
+ case cdVerbatim cd of
+   CDataText     -> escStr (cdData cd)
+   CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <>
+                    fromText "]]>"
+   CDataRaw      -> fromText (cdData cd)
+
+--------------------------------------------------------------------------------
+escCData           :: Text -> Builder
+escCData t
+  | "]]>" `T.isPrefixOf` t =
+     fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t)
+escCData t
+  = case T.uncons t of
+      Nothing     -> mempty
+      Just (c,t') -> singleton c <> escCData t'
+
+escChar            :: Char -> Builder
+escChar c = case c of
+  '<'   -> fromText "&lt;"
+  '>'   -> fromText "&gt;"
+  '&'   -> fromText "&amp;"
+  '"'   -> fromText "&quot;"
+  -- we use &#39 instead of &apos; because IE apparently has difficulties
+  -- rendering &apos; in xhtml.
+  -- Reported by Rohan Drape <rohan.drape@gmail.com>.
+  '\''  -> fromText "&#39;"
+  _     -> singleton c
+
+  {- original xml-light version:
+  -- NOTE: We escape '\r' explicitly because otherwise they get lost
+  -- when parsed back in because of then end-of-line normalization rules.
+  _ | isPrint c || c == '\n' -> singleton c
+    | otherwise -> showText "&#" . showsT oc . singleton ';'
+      where oc = ord c
+  -}
+
+escStr             :: Text -> Builder
+escStr cs          = if T.any needsEscape cs
+                        then mconcat (map escChar (T.unpack cs))
+                        else fromText cs
+ where
+  needsEscape '<' = True
+  needsEscape '>' = True
+  needsEscape '&' = True
+  needsEscape '"' = True
+  needsEscape '\'' = True
+  needsEscape _ = False
+
+tagEnd             :: QName -> Builder
+tagEnd qn           = fromText "</" <> showQName qn <> singleton '>'
+
+tagStart           :: QName -> [Attr] -> Builder
+tagStart qn as      = singleton '<' <> showQName qn <> as_str
+ where as_str       = if null as
+                         then mempty
+                         else mconcat (map showAttr as)
+
+showAttr           :: Attr -> Builder
+showAttr (Attr qn v) = singleton ' ' <> showQName qn <>
+                       singleton '=' <>
+                       singleton '"' <> escStr v <> singleton '"'
+
+showQName          :: QName -> Builder
+showQName q         =
+  case qPrefix q of
+    Nothing -> fromText (qName q)
+    Just p  -> fromText p <> singleton ':' <> fromText (qName q)
diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs
deleted file mode 100644
index 8ad22a66a..000000000
--- a/src/Text/Pandoc/XMLParser.hs
+++ /dev/null
@@ -1,66 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{- |
-   Module      : Text.Pandoc.XMLParser
-   Copyright   : Copyright (C) 2021 John MacFarlane
-   License     : GNU GPL, version 2 or above
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Bridge to allow using xml-conduit's parser with xml-light's types.
--}
-module Text.Pandoc.XMLParser
-  ( parseXMLElement
-  , parseXMLContents
-  , module Text.XML.Light.Types
-  ) where
-
-import qualified Control.Exception as E
-import qualified Text.XML as Conduit
-import Text.XML.Unresolved (InvalidEventStream(..))
-import qualified Text.XML.Light as Light
-import Text.XML.Light.Types
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-import qualified Data.Map as M
-import Data.Maybe (mapMaybe)
-
--- Drop in replacement for parseXMLDoc in xml-light.
-parseXMLElement :: TL.Text -> Either T.Text Light.Element
-parseXMLElement t =
-  elementToElement .  Conduit.documentRoot <$>
-    either (Left . T.pack . E.displayException) Right
-    (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t)
-
-parseXMLContents :: TL.Text -> Either T.Text [Light.Content]
-parseXMLContents t =
-  case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of
-    Left e ->
-      case E.fromException e of
-        Just (ContentAfterRoot _) ->
-          elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>")
-        _ -> Left . T.pack . E.displayException $ e
-    Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x]
-
-elementToElement :: Conduit.Element -> Light.Element
-elementToElement (Conduit.Element name attribMap nodes) =
-  Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing
- where
-  attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $
-              M.toList attribMap
-  nameToQname (Conduit.Name localName mbns mbpref) =
-    case mbpref of
-      Nothing | "xmlns:" `T.isPrefixOf` localName ->
-           Light.QName (T.unpack $ T.drop 6 localName)  (T.unpack <$> mbns)
-                       (Just "xmlns")
-      _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns)
-                       (T.unpack <$> mbpref)
-
-nodeToContent :: Conduit.Node -> Maybe Light.Content
-nodeToContent (Conduit.NodeElement el) =
-  Just (Light.Elem (elementToElement el))
-nodeToContent (Conduit.NodeContent t) =
-  Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing))
-nodeToContent _ = Nothing
-
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index 376f02c55..c1e47622d 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -43,7 +43,8 @@ compareXMLBool _ _ = False
 
 displayDiff :: Content -> Content -> String
 displayDiff elemA elemB =
-  showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB)
+  showDiff (1,1)
+    (getDiff (lines $ showContent elemA) (lines $ showContent elemB))
 
 goldenArchive :: FilePath -> IO Archive
 goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index d3b16d0f2..ed7d1165c 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 6293ef493..07ae75676 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index 4205a1516..e5f034378 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index adb3f23db..174942135 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 92c8137fe..b5c31a851 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index f53470617..c42ca1b05 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index d6af90a72..1cb4c1fd7 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index e18dbe853..7122456ea 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 820299043..616ba0f81 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index ae0f41d12..c30dcdee9 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 94cd35dfa..8a704b41e 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 879f2a25b..b1906c8c4 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 93f86478f..8adf1cf75 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 967d297f2..584117503 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index c5614e2fa..8859fe55c 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 0f39a831f..b80f3b3ba 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 07046f223..35beed68a 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3656618e6..2c29fd674 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 8798253d5..10a948886 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 0a24d1840..5b90e74a0 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 52bb7a217..cc81b46d1 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 182c06c64..1394dc442 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 7b69a56a3..0d1688694 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 3d3a35701..be1caef2d 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 5ae37b406..a1d2323c2 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index c29aa6716..2f3a831a7 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 664493246..af066107c 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index b6d15340e..9cc7a075f 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f8e1092d2..f8b8dcfde 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index b4cda82f2..1c3baf0bf 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index ee222efa0..28686970d 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c6f8d9c96..7051cefbd 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index ea8146690..9df631640 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx
index 58070eb3f..5e9c2c630 100644
Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ
diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx
index db9b7e371..15232c32d 100644
Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ
diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx
index c7b1ed7d5..aab0cc6f5 100644
Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ
diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx
index 6944d92bf..fe5b675f3 100644
Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ
diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx
index ae0d28429..de5e68151 100644
Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ
diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx
index 37c74c69a..89e6fbdf2 100644
Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ
diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx
index 324e443a1..6bcbd1b9c 100644
Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ
diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx
index c81b983e3..79d48560b 100644
Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ
diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx
index 30ce33db6..9d46036fe 100644
Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ
diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx
index d6c604968..54ec7f305 100644
Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ
diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx
index 000e17ecd..a028b346f 100644
Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ
diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx
index fdcd2e29b..1158c16fc 100644
Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ
diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx
index e73126376..670a825de 100644
Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ
diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx
index e3f968e9e..6d297ef11 100644
Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ
diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx
index eadb9372e..473b9498d 100644
Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ
diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx
index 8ca6bab2b..2cdf54474 100644
Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ
diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx
index ae188ee68..ffc2eb9f7 100644
Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ
diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx
index 60301fa50..676954cb8 100644
Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ
diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx
index 17124a50d..29164af15 100644
Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ
diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx
index 19ae7dd4e..1742b3296 100644
Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ
diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx
index b650b7585..c6df8e18e 100644
Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ
diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx
index 0ab029614..cf6e52eef 100644
Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ
diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx
index 2a6e35080..e06d9079d 100644
Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx
index a7bcf6a4b..449339778 100644
Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ
diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx
index 21b018c25..258098082 100644
Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ
diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx
index 4ec4772a4..2f0213919 100644
Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ
diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx
index 5983657b6..9dbfa41a0 100644
Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ
diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx
index dd54c7082..f288dde14 100644
Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ
diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx
index b3e5ed5b9..0ab1302da 100644
Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx
index 1078854bb..6343bffe4 100644
Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ
diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx
index 5116c6c4e..5d4465f64 100644
Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx
index 0c8e49bd9..d581681aa 100644
Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ
diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx
index 68695939d..c922df3a8 100644
Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx
index 7ed9b946d..13f564bf0 100644
Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ
diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx
index 79fc82345..f0b302738 100644
Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ
diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx
index 9f943c279..2d8d00242 100644
Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ
diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx
index ac72d8ced..4320128b3 100644
Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ
diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx
index 15c7b5469..0a3f6e56d 100644
Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ
diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx
index 926c5e699..e41219844 100644
Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ
diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx
index a37e72d2c..82b8aa13d 100644
Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ
diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx
index 7f86533fe..270a7eeac 100644
Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ
diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx
index 89e3db0ab..44985d701 100644
Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ
-- 
cgit v1.2.3


From eed18d231cc706e27a1495d46e8c05dd18a0938f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 13 Mar 2021 12:05:52 -0800
Subject: Use integral values for w:tblW in docx.

Cloess #7141.
---
 src/Text/Pandoc/Writers/Docx.hs            |   2 +-
 test/docx/golden/table_one_row.docx        | Bin 9908 -> 9906 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10227 -> 10225 bytes
 test/docx/golden/tables.docx               | Bin 10244 -> 10241 bytes
 4 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 89c71d773..c47bfb2ea 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1031,7 +1031,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do
            map mkcell cells
   let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
   let fullrow = 5000 -- 100% specified in pct
-  let rowwidth = fullrow * sum widths
+  let (rowwidth :: Int) = round $ fullrow * sum widths
   let mkgridcol w = mknode "w:gridCol"
                        [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
   let hasHeader = not $ all null headers
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index a1d2323c2..1178f7c6e 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 2f3a831a7..84be2720f 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index af066107c..140366d8b 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From c3f9e8c12256d19ed6c89d15470945855ee16a94 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Wed, 17 Mar 2021 22:31:20 -0700
Subject: Docx writer: make nsid in abstractNum deterministic.

Previously we assigned a random number (though in a deterministic
way).  But changes in the random package mean we get different
results now on different architectures, even with the same random
seed. We don't need random values; so now we just assign a value
based on the list number id, which is guaranteed to be unique
to the list marker.
---
 src/Text/Pandoc/Writers/Docx.hs                     |  10 ++++------
 test/docx/golden/block_quotes.docx                  | Bin 10071 -> 10067 bytes
 test/docx/golden/codeblock.docx                     | Bin 9920 -> 9916 bytes
 test/docx/golden/comments.docx                      | Bin 10258 -> 10254 bytes
 test/docx/golden/custom_style_no_reference.docx     | Bin 10021 -> 10017 bytes
 test/docx/golden/custom_style_preserve.docx         | Bin 10650 -> 10646 bytes
 test/docx/golden/custom_style_reference.docx        | Bin 12403 -> 12400 bytes
 test/docx/golden/definition_list.docx               | Bin 9920 -> 9916 bytes
 .../docx/golden/document-properties-short-desc.docx | Bin 9925 -> 9922 bytes
 test/docx/golden/document-properties.docx           | Bin 10404 -> 10399 bytes
 test/docx/golden/headers.docx                       | Bin 10059 -> 10055 bytes
 test/docx/golden/image.docx                         | Bin 26736 -> 26733 bytes
 test/docx/golden/inline_code.docx                   | Bin 9859 -> 9855 bytes
 test/docx/golden/inline_formatting.docx             | Bin 10038 -> 10035 bytes
 test/docx/golden/inline_images.docx                 | Bin 26793 -> 26789 bytes
 test/docx/golden/link_in_notes.docx                 | Bin 10081 -> 10077 bytes
 test/docx/golden/links.docx                         | Bin 10251 -> 10248 bytes
 test/docx/golden/lists.docx                         | Bin 10332 -> 10314 bytes
 test/docx/golden/lists_continuing.docx              | Bin 10123 -> 10110 bytes
 test/docx/golden/lists_multiple_initial.docx        | Bin 10210 -> 10192 bytes
 test/docx/golden/lists_restarting.docx              | Bin 10122 -> 10108 bytes
 test/docx/golden/nested_anchors_in_header.docx      | Bin 10216 -> 10212 bytes
 test/docx/golden/notes.docx                         | Bin 10028 -> 10024 bytes
 test/docx/golden/raw-blocks.docx                    | Bin 9960 -> 9956 bytes
 test/docx/golden/raw-bookmarks.docx                 | Bin 10094 -> 10090 bytes
 test/docx/golden/table_one_row.docx                 | Bin 9906 -> 9903 bytes
 test/docx/golden/table_with_list_cell.docx          | Bin 10225 -> 10212 bytes
 test/docx/golden/tables.docx                        | Bin 10241 -> 10238 bytes
 test/docx/golden/track_changes_deletion.docx        | Bin 9903 -> 9899 bytes
 test/docx/golden/track_changes_insertion.docx       | Bin 9886 -> 9882 bytes
 test/docx/golden/track_changes_move.docx            | Bin 9920 -> 9916 bytes
 .../golden/track_changes_scrubbed_metadata.docx     | Bin 10032 -> 10028 bytes
 test/docx/golden/unicode.docx                       | Bin 9845 -> 9841 bytes
 test/docx/golden/verbatim_subsuper.docx             | Bin 9892 -> 9888 bytes
 34 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index c47bfb2ea..20bcd0324 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,6 @@ import qualified Data.Text.Lazy as TL
 import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
-import System.Random (randomRs, mkStdGen)
 import Text.Pandoc.BCP47 (getLang, renderLang)
 import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
 import qualified Text.Pandoc.Class.PandocMonad as P
@@ -695,8 +694,7 @@ baseListId = 1000
 mkNumbering :: [ListMarker] -> [Element]
 mkNumbering lists =
   elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)]
-    where elts = zipWith mkAbstractNum (ordNub lists) $
-                     randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848
+    where elts = map mkAbstractNum (ordNub lists)
 
 maxListLevel :: Int
 maxListLevel = 8
@@ -713,10 +711,10 @@ mkNum marker numid =
               $ mknode "w:startOverride" [("w:val",tshow start)] ())
                 [0..maxListLevel]
 
-mkAbstractNum :: ListMarker -> Integer -> Element
-mkAbstractNum marker nsid =
+mkAbstractNum :: ListMarker -> Element
+mkAbstractNum marker =
   mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)]
-    $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] ()
+    $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] ()
     : mknode "w:multiLevelType" [("w:val","multilevel")] ()
     : map (mkLvl marker)
       [0..maxListLevel]
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index ed7d1165c..af1b33ca2 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 07ae75676..f748f1f01 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index e5f034378..ac9d56680 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 174942135..f27727edd 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index b5c31a851..1da499d6a 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index c42ca1b05..4d2fe245d 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 1cb4c1fd7..f386fcea3 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 7122456ea..debe9a3f6 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 616ba0f81..cd17400bf 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index c30dcdee9..d3af8a3dd 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 8a704b41e..1c4e738c0 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index b1906c8c4..35f43f19f 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 8adf1cf75..8de3f70f6 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 584117503..e76558be9 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 8859fe55c..88bae8142 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index b80f3b3ba..455adcfc7 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 35beed68a..081d9ddba 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 2c29fd674..fc9213fc5 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 10a948886..b636fd3f8 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 5b90e74a0..252623215 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index cc81b46d1..a8c3f5478 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 1394dc442..43e650ebd 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 0d1688694..fe4f7845b 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index be1caef2d..45e90608f 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 1178f7c6e..6eaea2ac2 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 84be2720f..45a97ccaa 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 140366d8b..115a16a48 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 9cc7a075f..247725aaa 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f8b8dcfde..3863afef2 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 1c3baf0bf..5c848b63a 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 28686970d..e0c843713 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 7051cefbd..78a773bdd 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 9df631640..c66a45b74 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 0921b82d98b6ec7fa80ffd522c129b3828b9c00b Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Mon, 26 Apr 2021 18:07:01 +0200
Subject: Docx writer: autoset table width if no column has an explicit width.

---
 src/Text/Pandoc/Writers/Docx/Table.hs      |  18 +++++++++++-------
 test/docx/golden/table_one_row.docx        | Bin 9903 -> 9904 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10212 bytes
 test/docx/golden/tables.docx               | Bin 10238 -> 10239 bytes
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index a6b137fc4..349f3a4ce 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -1,7 +1,8 @@
+{-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {- |
-Module      : Text.Pandoc.Writers.Docx
+Module      : Text.Pandoc.Writers.Docx.Table
 Copyright   : Copyright (C) 2012-2021 John MacFarlane
 License     : GNU GPL, version 2 or above
 Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -87,7 +88,10 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
      mknode "w:tbl" []
       ( mknode "w:tblPr" []
         (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () :
+            mknode "w:tblW" (if all (== 0) widths
+                              then [("w:type", "auto"), ("w:w", "0")]
+                              else [("w:type", "pct"), ("w:w", tshow rowwidth)])
+                            () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
@@ -107,8 +111,8 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
       )]
 
 alignmentToString :: Alignment -> Text
-alignmentToString alignment = case alignment of
-                                 AlignLeft    -> "left"
-                                 AlignRight   -> "right"
-                                 AlignCenter  -> "center"
-                                 AlignDefault -> "left"
+alignmentToString = \case
+  AlignLeft    -> "left"
+  AlignRight   -> "right"
+  AlignCenter  -> "center"
+  AlignDefault -> "left"
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index 6eaea2ac2..cab3fc31c 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 45a97ccaa..9238c7e20 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 115a16a48..6f0379def 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From ddbf83f62c8bb6516203c99acd894c404351b5ae Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Sat, 1 May 2021 18:52:24 +0200
Subject: Docx writer: support colspans and rowspans in tables

See: #6315
---
 src/Text/Pandoc/Writers/Docx.hs            |   6 +-
 src/Text/Pandoc/Writers/Docx/Table.hs      | 200 +++++++++++++++++++----------
 src/Text/Pandoc/Writers/GridTable.hs       |   4 +-
 test/docx/golden/table_one_row.docx        | Bin 9904 -> 9925 bytes
 test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10230 bytes
 test/docx/golden/tables.docx               | Bin 10239 -> 10271 bytes
 6 files changed, 140 insertions(+), 70 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 7064ded09..e11961bfd 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Writers.Docx.Table
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Pandoc.Walk
+import qualified Text.Pandoc.Writers.GridTable as Grid
 import Text.Pandoc.Writers.Math
 import Text.Pandoc.Writers.Shared
 import Text.TeXMath
@@ -889,8 +890,9 @@ blockToOpenXML' _ HorizontalRule = do
     $ mknode "v:rect" [("style","width:0;height:1.5pt"),
                        ("o:hralign","center"),
                        ("o:hrstd","t"),("o:hr","t")] () ]
-blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) =
-  tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot
+blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) =
+  tableToOpenXML (blocksToOpenXML opts)
+                 (Grid.toTable attr caption colspecs thead tbodies tfoot)
 blockToOpenXML' opts el
   | BulletList lst <- el = addOpenXMLList BulletMarker lst
   | OrderedList (start, numstyle, numdelim) lst <- el
diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index 349f3a4ce..bb931bf08 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -14,65 +14,39 @@ module Text.Pandoc.Writers.Docx.Table
   ) where
 
 import Control.Monad.State.Strict
+import Data.Array
 import Data.Text (Text)
 import Text.Pandoc.Definition
 import Text.Pandoc.Class.PandocMonad (PandocMonad)
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
-import Text.Pandoc.Writers.Shared
 import Text.Printf (printf)
+import Text.Pandoc.Writers.GridTable hiding (Table)
 import Text.Pandoc.Writers.OOXML
-import Text.Pandoc.XML.Light as XML
+import Text.Pandoc.XML.Light as XML hiding (Attr)
 import qualified Data.Text as T
+import qualified Text.Pandoc.Writers.GridTable as Grid
 
 tableToOpenXML :: PandocMonad m
                => ([Block] -> WS m [Content])
-               -> Caption
-               -> [ColSpec]
-               -> TableHead
-               -> [TableBody]
-               -> TableFoot
+               -> Grid.Table
                -> WS m [Content]
-tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
-  let (caption, aligns, widths, headers, rows) =
-        toLegacyTable blkCapt specs thead tbody tfoot
+tableToOpenXML blocksToOpenXML gridTable = do
   setFirstPara
   modify $ \s -> s { stInTable = True }
-  let captionStr = stringify caption
-  caption' <- if null caption
-                 then return []
-                 else withParaPropM (pStyleM "Table Caption")
-                      $ blocksToOpenXML [Para caption]
-  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
-  -- Table cells require a <w:p> element, even an empty one!
-  -- Not in the spec but in Word 2007, 2010. See #4953. And
-  -- apparently the last element must be a <w:p>, see #6983.
-  let cellToOpenXML (al, cell) = do
-        es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell
-        return $
-          case reverse (onlyElems es) of
-            b:e:_ | qName (elName b) == "bookmarkEnd"
-                  , qName (elName e) == "p" -> es
-            e:_   | qName (elName e) == "p" -> es
-            _ -> es ++ [Elem $ mknode "w:p" [] ()]
-  headers' <- mapM cellToOpenXML $ zip aligns headers
-  rows' <- mapM (mapM cellToOpenXML . zip aligns) rows
-  compactStyle <- pStyleM "Compact"
-  let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
-  let mkcell contents = mknode "w:tc" []
-                            $ if null contents
-                                 then emptyCell'
-                                 else contents
-  let mkrow cells =
-         mknode "w:tr" [] $
-           map mkcell cells
-  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
-  let fullrow = 5000 -- 100% specified in pct
-  let (rowwidth :: Int) = round $ fullrow * sum widths
-  let mkgridcol w = mknode "w:gridCol"
-                       [("w:w", tshow (floor (textwidth * w) :: Integer))] ()
-  let hasHeader = not $ all null headers
-  modify $ \s -> s { stInTable = False }
+  let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) =
+        gridTable
+  let (Caption _maybeShortCaption captionBlocks) = caption
+  let captionStr = stringify captionBlocks
+  captionXml <- if null captionBlocks
+                then return []
+                else withParaPropM (pStyleM "Table Caption")
+                     $ blocksToOpenXML captionBlocks
+  head' <- cellGridToOpenXML blocksToOpenXML thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML tfoot
+
+  let hasHeader = not . null . indices . partRowAttrs $ thead
   -- for compatibility with Word <= 2007, we include a val with a bitmask
   -- 0×0020  Apply first row conditional formatting
   -- 0×0040  Apply last row conditional formatting
@@ -80,18 +54,12 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
   -- 0×0100  Apply last column conditional formatting
   -- 0×0200  Do not apply row banding conditional formatting
   -- 0×0400  Do not apply column banding conditional formattin
-  let tblLookVal :: Int
-      tblLookVal = if hasHeader then 0x20 else 0
-  return $
-    caption' ++
-    [Elem $
-     mknode "w:tbl" []
-      ( mknode "w:tblPr" []
-        (   mknode "w:tblStyle" [("w:val","Table")] () :
-            mknode "w:tblW" (if all (== 0) widths
-                              then [("w:type", "auto"), ("w:w", "0")]
-                              else [("w:type", "pct"), ("w:w", tshow rowwidth)])
-                            () :
+  let tblLookVal = if hasHeader then (0x20 :: Int) else 0
+  let (gridCols, tblWattr) = tableLayout (elems colspecs)
+  let tbl = mknode "w:tbl" []
+        ( mknode "w:tblPr" []
+          ( mknode "w:tblStyle" [("w:val","Table")] () :
+            mknode "w:tblW" tblWattr () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
                                ,("w:lastRow","0")
                                ,("w:firstColumn","0")
@@ -100,15 +68,14 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do
                                ,("w:noVBand","0")
                                ,("w:val", T.pack $ printf "%04x" tblLookVal)
                                ] () :
-          [ mknode "w:tblCaption" [("w:val", captionStr)] ()
-          | not (null caption) ] )
-      : mknode "w:tblGrid" []
-        (if all (==0) widths
-            then []
-            else map mkgridcol widths)
-      : [ mkrow headers' | hasHeader ] ++
-      map mkrow rows'
-      )]
+            [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+            | not (T.null captionStr) ]
+          )
+          : mknode "w:tblGrid" [] gridCols
+          : head' ++ mconcat bodies ++ foot'
+        )
+  modify $ \s -> s { stInTable = False }
+  return $ captionXml ++ [Elem tbl]
 
 alignmentToString :: Alignment -> Text
 alignmentToString = \case
@@ -116,3 +83,104 @@ alignmentToString = \case
   AlignRight   -> "right"
   AlignCenter  -> "center"
   AlignDefault -> "left"
+
+tableLayout :: [ColSpec] -> ([Element], [(Text, Text)])
+tableLayout specs =
+  let
+    textwidth = 7920  -- 5.5 in in twips       (1 twip == 1/20 pt)
+    fullrow   = 5000  -- 100% specified in pct (1 pct  == 1/50th of a percent)
+    ncols = length specs
+    getWidth = \case
+      ColWidth n -> n
+      _          -> 0
+    widths = map (getWidth . snd) specs
+    rowwidth  = round (fullrow * sum widths) :: Int
+    widthToTwips w = floor (textwidth * w)   :: Int
+    mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] ()
+  in if all (== 0) widths
+     then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols)
+          , [ ("w:type", "auto"), ("w:w", "0")])
+     else ( map mkGridCol widths
+          , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
+
+cellGridToOpenXML :: PandocMonad m
+                   => ([Block] -> WS m [Content])
+                   -> Part
+                   -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) =
+  if null (indices rowAttrs)
+  then return mempty
+  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part
+
+data OOXMLCell
+  = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
+  | OOXMLCellMerge ColSpan
+
+data OOXMLRow = OOXMLRow Attr [OOXMLCell]
+
+partToRows :: Part -> [OOXMLRow]
+partToRows part =
+  let
+    toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
+    toOOXMLCell ridx cidx = \case
+      ContentCell attr align rowspan colspan blocks ->
+        [OOXMLCell attr align rowspan colspan blocks]
+      ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' ->
+        case (partCellArray part)!idx' of
+          (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan]
+          x -> error $ "Content cell expected, got, " ++ show x ++
+                       " at index " ++ show idx'
+      _ -> mempty
+    mkRow :: (RowIndex, Attr) -> OOXMLRow
+    mkRow (ridx, attr) = OOXMLRow attr
+                       . concatMap (uncurry $ toOOXMLCell ridx)
+                       . assocs
+                       . rowArray ridx
+                       $ partCellArray part
+  in map mkRow $ assocs (partRowAttrs part)
+
+rowToOpenXML :: PandocMonad m
+             => ([Block] -> WS m [Content])
+             -> OOXMLRow
+             -> WS m Element
+rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do
+  xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+  -- let align' = case align of
+  --       AlignDefault -> colAlign
+  --       _            -> align
+  return $ mknode "w:tr" [] xmlcells
+
+ooxmlCellToOpenXML :: PandocMonad m
+                   => ([Block] -> WS m [Content])
+                   -> OOXMLCell
+                   -> WS m Element
+ooxmlCellToOpenXML blocksToOpenXML = \case
+  OOXMLCellMerge (ColSpan colspan) -> do
+    return $ mknode "w:tc" []
+      [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                           , mknode "w:vMerge"   [("w:val", "continue")] () ]
+      , mknode "w:p" [] [mknode "w:pPr" [] ()]]
+  OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
+    -- we handle rowspans via 'leftpad', so we can ignore those here
+
+    compactStyle <- pStyleM "Compact"
+    es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
+    -- Table cells require a <w:p> element, even an empty one!
+    -- Not in the spec but in Word 2007, 2010. See #4953. And
+    -- apparently the last element must be a <w:p>, see #6983.
+    return . mknode "w:tc" [] $
+      Elem
+       (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] ()
+                            | colspan > 1] ++
+                            [ mknode "w:vMerge" [("w:val", "restart")] ()
+                            | rowspan > RowSpan 1 ])) :
+      if null contents
+      then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]]
+      else case reverse (onlyElems es) of
+             b:e:_ | qName (elName b) == "bookmarkEnd"  -- y tho?
+                   , qName (elName e) == "p" -> es
+             e:_   | qName (elName e) == "p" -> es
+             _ -> es ++ [Elem $ mknode "w:p" [] ()]
+
+alignmentFor :: Alignment -> Element
+alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs
index c6f4cf456..bc468febc 100644
--- a/src/Text/Pandoc/Writers/GridTable.hs
+++ b/src/Text/Pandoc/Writers/GridTable.hs
@@ -87,8 +87,8 @@ toTable attr caption colSpecs  thead tbodies tfoot =
     tbGrids = map bodyToGrid tbodies
     tfGrid = let (TableFoot footAttr rows) = tfoot
              in rowsToPart footAttr rows
-    bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) =
-      rowsToPart bodyAttr rows
+    bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) =
+      rowsToPart bodyAttr (headRows ++ rows)
 
 data BuilderCell
   = FilledCell GridCell
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index cab3fc31c..e60bb303f 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 9238c7e20..a4037cf32 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 6f0379def..bc1bc27f8 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 5eb7ad7d1ebbfe27a282a2d75f199bacf2052be3 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 11 May 2021 22:31:38 -0600
Subject: Improve integration of settings from reference.docx.

The settings we can carry over from a reference.docx are
autoHyphenation, consecutiveHyphenLimit, hyphenationZone,
doNotHyphenateCap, evenAndOddHeaders, and proofState.

Previously this was implemented in a buggy way, so that the
reference doc's values AND the new values were included.

This change allows users to create a reference.docx that
sets w:proofState for spelling or grammar to "dirty,"
so that spell/grammar checking will be triggered on the
generated docx.

Closes #1209.
---
 src/Text/Pandoc/Writers/Docx.hs                    |  27 +++++++++++----------
 test/docx/golden/block_quotes.docx                 | Bin 10067 -> 10061 bytes
 test/docx/golden/codeblock.docx                    | Bin 9916 -> 9910 bytes
 test/docx/golden/comments.docx                     | Bin 10254 -> 10248 bytes
 test/docx/golden/custom_style_no_reference.docx    | Bin 10017 -> 10011 bytes
 test/docx/golden/custom_style_preserve.docx        | Bin 10646 -> 10640 bytes
 test/docx/golden/custom_style_reference.docx       | Bin 12400 -> 12397 bytes
 test/docx/golden/definition_list.docx              | Bin 9916 -> 9910 bytes
 .../golden/document-properties-short-desc.docx     | Bin 9922 -> 9916 bytes
 test/docx/golden/document-properties.docx          | Bin 10399 -> 10393 bytes
 test/docx/golden/headers.docx                      | Bin 10055 -> 10049 bytes
 test/docx/golden/image.docx                        | Bin 26733 -> 26727 bytes
 test/docx/golden/inline_code.docx                  | Bin 9855 -> 9849 bytes
 test/docx/golden/inline_formatting.docx            | Bin 10035 -> 10029 bytes
 test/docx/golden/inline_images.docx                | Bin 26789 -> 26783 bytes
 test/docx/golden/link_in_notes.docx                | Bin 10077 -> 10071 bytes
 test/docx/golden/links.docx                        | Bin 10248 -> 10242 bytes
 test/docx/golden/lists.docx                        | Bin 10314 -> 10308 bytes
 test/docx/golden/lists_continuing.docx             | Bin 10110 -> 10104 bytes
 test/docx/golden/lists_multiple_initial.docx       | Bin 10192 -> 10186 bytes
 test/docx/golden/lists_restarting.docx             | Bin 10108 -> 10102 bytes
 test/docx/golden/nested_anchors_in_header.docx     | Bin 10212 -> 10206 bytes
 test/docx/golden/notes.docx                        | Bin 10024 -> 10018 bytes
 test/docx/golden/raw-blocks.docx                   | Bin 9956 -> 9950 bytes
 test/docx/golden/raw-bookmarks.docx                | Bin 10090 -> 10084 bytes
 test/docx/golden/table_one_row.docx                | Bin 9925 -> 9920 bytes
 test/docx/golden/table_with_list_cell.docx         | Bin 10230 -> 10225 bytes
 test/docx/golden/tables.docx                       | Bin 10271 -> 10266 bytes
 test/docx/golden/track_changes_deletion.docx       | Bin 9899 -> 9893 bytes
 test/docx/golden/track_changes_insertion.docx      | Bin 9882 -> 9876 bytes
 test/docx/golden/track_changes_move.docx           | Bin 9916 -> 9910 bytes
 .../golden/track_changes_scrubbed_metadata.docx    | Bin 10028 -> 10022 bytes
 test/docx/golden/unicode.docx                      | Bin 9841 -> 9835 bytes
 test/docx/golden/verbatim_subsuper.docx            | Bin 9888 -> 9882 bytes
 34 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index e11961bfd..75bed1595 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -469,12 +469,12 @@ writeDocx opts doc = do
   -- adds references to footnotes or endnotes we don't have...
   -- we do, however, copy some settings over from reference
   let settingsPath = "word/settings.xml"
-      settingsList = [ "w:autoHyphenation"
-                     , "w:consecutiveHyphenLimit"
-                     , "w:hyphenationZone"
-                     , "w:doNotHyphenateCap"
-                     , "w:evenAndOddHeaders"
-                     , "w:proofState"
+      settingsList = [ "autoHyphenation"
+                     , "consecutiveHyphenLimit"
+                     , "hyphenationZone"
+                     , "doNotHyphenateCap"
+                     , "evenAndOddHeaders"
+                     , "proofState"
                      ]
   settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
 
@@ -577,16 +577,17 @@ copyChildren :: (PandocMonad m)
 copyChildren refArchive distArchive path timestamp elNames = do
   ref  <- parseXml refArchive distArchive path
   dist <- parseXml distArchive distArchive path
+  let elsToCopy =
+        map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref
+  let elsToKeep =
+        [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)]
   return $ toEntry path timestamp $ renderXml dist{
-      elContent = elContent dist ++ copyContent ref
+      elContent = map Elem elsToKeep ++ map Elem elsToCopy
     }
   where
-    strName QName{qName=name, qPrefix=prefix}
-      | Just p <- prefix = p <> ":" <> name
-      | otherwise        = name
-    shouldCopy = (`elem` elNames) . strName
-    cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}}
-    copyContent = map cleanElem . filterChildrenName shouldCopy
+    hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) =
+      qName n1 == qName n2
+    cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}}
 
 -- this is the lowest number used for a list numId
 baseListId :: Int
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index af1b33ca2..b6973fcfd 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index f748f1f01..f0d35d3ad 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index ac9d56680..fb1c15dec 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index f27727edd..43d536b65 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 1da499d6a..8865010d1 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 4d2fe245d..10f7bf661 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index f386fcea3..2a7d81a34 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index debe9a3f6..2fcd14908 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index cd17400bf..39533f42d 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index d3af8a3dd..c8d67c45b 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 1c4e738c0..8a7aeec10 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 35f43f19f..969237cec 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 8de3f70f6..cda936a39 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index e76558be9..69991e791 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 88bae8142..40e848195 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 455adcfc7..28237a30d 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 081d9ddba..bf075805e 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index fc9213fc5..e7d308e13 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index b636fd3f8..9763e347e 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index 252623215..b717ca619 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index a8c3f5478..873e731d2 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 43e650ebd..134cb2eaf 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index fe4f7845b..c49ed79c8 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 45e90608f..1f2cbb214 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index e60bb303f..a0160cdb4 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index a4037cf32..6427d475c 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index bc1bc27f8..470eac2ae 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 247725aaa..3542b8f9c 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index 3863afef2..b36b4485e 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 5c848b63a..e30ab06ae 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index e0c843713..11597d578 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 78a773bdd..c7bff82e5 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index c66a45b74..c70f6946e 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 17d96404f5b6f5f080329e220a0784f10c364f2d Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <albert@zeitkraut.de>
Date: Fri, 14 May 2021 16:19:20 +0200
Subject: Docx writer: allow multirow table headers

---
 src/Text/Pandoc/Writers/Docx/Table.hs      |  42 +++++++++++++++++------------
 test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10238 bytes
 test/docx/golden/tables.docx               | Bin 10266 -> 10279 bytes
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index bb931bf08..ccc325fbe 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -42,11 +42,12 @@ tableToOpenXML blocksToOpenXML gridTable = do
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
                      $ blocksToOpenXML captionBlocks
-  head' <- cellGridToOpenXML blocksToOpenXML thead
-  bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies
-  foot' <- cellGridToOpenXML blocksToOpenXML tfoot
+  head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead
+  bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies
+  foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot
 
   let hasHeader = not . null . indices . partRowAttrs $ thead
+  let hasFooter = not . null . indices . partRowAttrs $ tfoot
   -- for compatibility with Word <= 2007, we include a val with a bitmask
   -- 0×0020  Apply first row conditional formatting
   -- 0×0040  Apply last row conditional formatting
@@ -61,7 +62,7 @@ tableToOpenXML blocksToOpenXML gridTable = do
           ( mknode "w:tblStyle" [("w:val","Table")] () :
             mknode "w:tblW" tblWattr () :
             mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0")
-                               ,("w:lastRow","0")
+                               ,("w:lastRow",if hasFooter then "1" else "0")
                                ,("w:firstColumn","0")
                                ,("w:lastColumn","0")
                                ,("w:noHBand","0")
@@ -77,6 +78,9 @@ tableToOpenXML blocksToOpenXML gridTable = do
   modify $ \s -> s { stInTable = False }
   return $ captionXml ++ [Elem tbl]
 
+-- | Parts of a table
+data RowType = HeadRow | BodyRow | FootRow
+
 alignmentToString :: Alignment -> Text
 alignmentToString = \case
   AlignLeft    -> "left"
@@ -104,22 +108,23 @@ tableLayout specs =
           , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ])
 
 cellGridToOpenXML :: PandocMonad m
-                   => ([Block] -> WS m [Content])
-                   -> Part
-                   -> WS m [Element]
-cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) =
+                  => ([Block] -> WS m [Content])
+                  -> RowType
+                  -> Part
+                  -> WS m [Element]
+cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) =
   if null (indices rowAttrs)
   then return mempty
-  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part
+  else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part
 
 data OOXMLCell
   = OOXMLCell Attr Alignment RowSpan ColSpan [Block]
   | OOXMLCellMerge ColSpan
 
-data OOXMLRow = OOXMLRow Attr [OOXMLCell]
+data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell]
 
-partToRows :: Part -> [OOXMLRow]
-partToRows part =
+partToRows :: RowType -> Part -> [OOXMLRow]
+partToRows rowType part =
   let
     toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell]
     toOOXMLCell ridx cidx = \case
@@ -132,7 +137,7 @@ partToRows part =
                        " at index " ++ show idx'
       _ -> mempty
     mkRow :: (RowIndex, Attr) -> OOXMLRow
-    mkRow (ridx, attr) = OOXMLRow attr
+    mkRow (ridx, attr) = OOXMLRow rowType attr
                        . concatMap (uncurry $ toOOXMLCell ridx)
                        . assocs
                        . rowArray ridx
@@ -143,12 +148,17 @@ rowToOpenXML :: PandocMonad m
              => ([Block] -> WS m [Content])
              -> OOXMLRow
              -> WS m Element
-rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do
+rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do
   xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells
+  let addTrPr = case rowType of
+        HeadRow -> (mknode "w:trPr" []
+                    [mknode "w:tblHeader" [("w:val", "true")] ()] :)
+        BodyRow -> id
+        FootRow -> id
   -- let align' = case align of
   --       AlignDefault -> colAlign
   --       _            -> align
-  return $ mknode "w:tr" [] xmlcells
+  return $ mknode "w:tr" [] (addTrPr xmlcells)
 
 ooxmlCellToOpenXML :: PandocMonad m
                    => ([Block] -> WS m [Content])
@@ -161,8 +171,6 @@ ooxmlCellToOpenXML blocksToOpenXML = \case
                            , mknode "w:vMerge"   [("w:val", "continue")] () ]
       , mknode "w:p" [] [mknode "w:pPr" [] ()]]
   OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do
-    -- we handle rowspans via 'leftpad', so we can ignore those here
-
     compactStyle <- pStyleM "Compact"
     es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents
     -- Table cells require a <w:p> element, even an empty one!
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index 6427d475c..e51910770 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 470eac2ae..8029774a9 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
-- 
cgit v1.2.3


From 2cf971cf56cbcdfcd6ee245df0d6e9811dc3573b Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 15 May 2021 10:54:05 -0700
Subject: docx writer: Remove rsids from settings.docx.

Word will add these when revisions are made.  But it's
pointless to start out with a set of them.
---
 data/docx/word/settings.xml                         |  12 +-----------
 test/docx/golden/block_quotes.docx                  | Bin 10061 -> 9981 bytes
 test/docx/golden/codeblock.docx                     | Bin 9910 -> 9830 bytes
 test/docx/golden/comments.docx                      | Bin 10248 -> 10168 bytes
 test/docx/golden/custom_style_no_reference.docx     | Bin 10011 -> 9931 bytes
 test/docx/golden/custom_style_preserve.docx         | Bin 10640 -> 10560 bytes
 test/docx/golden/custom_style_reference.docx        | Bin 12397 -> 12317 bytes
 test/docx/golden/definition_list.docx               | Bin 9910 -> 9830 bytes
 .../docx/golden/document-properties-short-desc.docx | Bin 9916 -> 9835 bytes
 test/docx/golden/document-properties.docx           | Bin 10393 -> 10314 bytes
 test/docx/golden/headers.docx                       | Bin 10049 -> 9969 bytes
 test/docx/golden/image.docx                         | Bin 26727 -> 26647 bytes
 test/docx/golden/inline_code.docx                   | Bin 9849 -> 9769 bytes
 test/docx/golden/inline_formatting.docx             | Bin 10029 -> 9949 bytes
 test/docx/golden/inline_images.docx                 | Bin 26783 -> 26703 bytes
 test/docx/golden/link_in_notes.docx                 | Bin 10071 -> 9991 bytes
 test/docx/golden/links.docx                         | Bin 10242 -> 10162 bytes
 test/docx/golden/lists.docx                         | Bin 10308 -> 10228 bytes
 test/docx/golden/lists_continuing.docx              | Bin 10104 -> 10024 bytes
 test/docx/golden/lists_multiple_initial.docx        | Bin 10186 -> 10106 bytes
 test/docx/golden/lists_restarting.docx              | Bin 10102 -> 10022 bytes
 test/docx/golden/nested_anchors_in_header.docx      | Bin 10206 -> 10126 bytes
 test/docx/golden/notes.docx                         | Bin 10018 -> 9938 bytes
 test/docx/golden/raw-blocks.docx                    | Bin 9950 -> 9870 bytes
 test/docx/golden/raw-bookmarks.docx                 | Bin 10084 -> 10004 bytes
 test/docx/golden/table_one_row.docx                 | Bin 9920 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx          | Bin 10238 -> 10159 bytes
 test/docx/golden/tables.docx                        | Bin 10279 -> 10200 bytes
 test/docx/golden/track_changes_deletion.docx        | Bin 9893 -> 9813 bytes
 test/docx/golden/track_changes_insertion.docx       | Bin 9876 -> 9796 bytes
 test/docx/golden/track_changes_move.docx            | Bin 9910 -> 9830 bytes
 .../golden/track_changes_scrubbed_metadata.docx     | Bin 10022 -> 9942 bytes
 test/docx/golden/unicode.docx                       | Bin 9835 -> 9755 bytes
 test/docx/golden/verbatim_subsuper.docx             | Bin 9882 -> 9802 bytes
 34 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'test/docx/golden')

diff --git a/data/docx/word/settings.xml b/data/docx/word/settings.xml
index afa0199c9..fca255b90 100644
--- a/data/docx/word/settings.xml
+++ b/data/docx/word/settings.xml
@@ -17,16 +17,6 @@
   <w:characterSpacingControl w:val="doNotCompress" />
   <w:savePreviewPicture />
   <w:rsids>
-    <w:rsidRoot w:val="00590D07" />
-    <w:rsid w:val="00011C8B" />
-    <w:rsid w:val="004E29B3" />
-    <w:rsid w:val="00590D07" />
-    <w:rsid w:val="00784D58" />
-    <w:rsid w:val="008D6863" />
-    <w:rsid w:val="00B86B75" />
-    <w:rsid w:val="00BC48D5" />
-    <w:rsid w:val="00C36279" />
-    <w:rsid w:val="00E315A3" />
   </w:rsids>
   <m:mathPr>
     <m:mathFont m:val="Lucida Grande" />
@@ -44,4 +34,4 @@
   <w:clrSchemeMapping w:bg1="light1" w:t1="dark1" w:bg2="light2" w:t2="dark2" w:accent1="accent1" w:accent2="accent2" w:accent3="accent3" w:accent4="accent4" w:accent5="accent5" w:accent6="accent6" w:hyperlink="hyperlink" w:followedHyperlink="followedHyperlink" />
   <w:decimalSymbol w:val="." />
   <w:listSeparator w:val="," />
-</w:settings>
\ No newline at end of file
+</w:settings>
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index b6973fcfd..f5e4b6428 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index f0d35d3ad..0bbe42bd7 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index fb1c15dec..6b759b522 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 43d536b65..74e30f651 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 8865010d1..7ee99c87b 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index 10f7bf661..dfaf16e2b 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 2a7d81a34..02992c6bd 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 2fcd14908..9bfe56dca 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 39533f42d..2a37045f2 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index c8d67c45b..ca2c7a261 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 8a7aeec10..c4447d021 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 969237cec..1824c0aae 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index cda936a39..82565a5b1 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 69991e791..0416bcc15 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index 40e848195..b51ec5220 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index 28237a30d..ba0d100c8 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index bf075805e..9632c598b 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index e7d308e13..3269a0528 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 9763e347e..716129170 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index b717ca619..af6edfe86 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index 873e731d2..f141425f7 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 134cb2eaf..93b4222f6 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index c49ed79c8..9d1aa9853 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index 1f2cbb214..b57289fdd 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index a0160cdb4..edb23cc72 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index e51910770..f9cbed156 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index 8029774a9..b585c803d 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 3542b8f9c..313942750 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index b36b4485e..f18b3f85f 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index e30ab06ae..50bdab767 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 11597d578..230b27006 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index c7bff82e5..627276e4b 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index c70f6946e..790278108 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 0a4c6925b6db433bdb8b9d57c94a7c36be3daea7 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 15 May 2021 15:40:49 -0700
Subject: Docx writer: copy over more settings from referenc.odcx.

From settings.xml in the reference-doc, we now include:
`zoom`, `embedSystemFonts`, `doNotTrackMoves`, `defaultTabStop`,
`drawingGridHorizontalSpacing`, `drawingGridVerticalSpacing`,
`displayHorizontalDrawingGridEvery`, `displayVerticalDrawingGridEvery`,
`characterSpacingControl`, `savePreviewPicture`, `mathPr`, `themeFontLang`,
`decimalSymbol`, `listSeparator`, `autoHyphenation`, `compat`.

Closes #7240.
---
 src/Text/Pandoc/Writers/Docx.hs                     |  17 ++++++++++++++++-
 test/docx/golden/block_quotes.docx                  | Bin 9981 -> 9981 bytes
 test/docx/golden/codeblock.docx                     | Bin 9830 -> 9830 bytes
 test/docx/golden/comments.docx                      | Bin 10168 -> 10168 bytes
 test/docx/golden/custom_style_no_reference.docx     | Bin 9931 -> 9931 bytes
 test/docx/golden/custom_style_preserve.docx         | Bin 10560 -> 10560 bytes
 test/docx/golden/custom_style_reference.docx        | Bin 12317 -> 12368 bytes
 test/docx/golden/definition_list.docx               | Bin 9830 -> 9830 bytes
 .../docx/golden/document-properties-short-desc.docx | Bin 9835 -> 9835 bytes
 test/docx/golden/document-properties.docx           | Bin 10314 -> 10314 bytes
 test/docx/golden/headers.docx                       | Bin 9969 -> 9969 bytes
 test/docx/golden/image.docx                         | Bin 26647 -> 26647 bytes
 test/docx/golden/inline_code.docx                   | Bin 9769 -> 9769 bytes
 test/docx/golden/inline_formatting.docx             | Bin 9949 -> 9949 bytes
 test/docx/golden/inline_images.docx                 | Bin 26703 -> 26703 bytes
 test/docx/golden/link_in_notes.docx                 | Bin 9991 -> 9991 bytes
 test/docx/golden/links.docx                         | Bin 10162 -> 10162 bytes
 test/docx/golden/lists.docx                         | Bin 10228 -> 10228 bytes
 test/docx/golden/lists_continuing.docx              | Bin 10024 -> 10024 bytes
 test/docx/golden/lists_multiple_initial.docx        | Bin 10106 -> 10106 bytes
 test/docx/golden/lists_restarting.docx              | Bin 10022 -> 10022 bytes
 test/docx/golden/nested_anchors_in_header.docx      | Bin 10126 -> 10126 bytes
 test/docx/golden/notes.docx                         | Bin 9938 -> 9938 bytes
 test/docx/golden/raw-blocks.docx                    | Bin 9870 -> 9870 bytes
 test/docx/golden/raw-bookmarks.docx                 | Bin 10004 -> 10004 bytes
 test/docx/golden/table_one_row.docx                 | Bin 9840 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx          | Bin 10159 -> 10159 bytes
 test/docx/golden/tables.docx                        | Bin 10200 -> 10200 bytes
 test/docx/golden/track_changes_deletion.docx        | Bin 9813 -> 9813 bytes
 test/docx/golden/track_changes_insertion.docx       | Bin 9796 -> 9796 bytes
 test/docx/golden/track_changes_move.docx            | Bin 9830 -> 9830 bytes
 .../golden/track_changes_scrubbed_metadata.docx     | Bin 9942 -> 9942 bytes
 test/docx/golden/unicode.docx                       | Bin 9755 -> 9755 bytes
 test/docx/golden/verbatim_subsuper.docx             | Bin 9802 -> 9802 bytes
 34 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 75bed1595..e7a49ba02 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -469,12 +469,27 @@ writeDocx opts doc = do
   -- adds references to footnotes or endnotes we don't have...
   -- we do, however, copy some settings over from reference
   let settingsPath = "word/settings.xml"
-      settingsList = [ "autoHyphenation"
+      settingsList = [ "zoom"
+                     , "embedSystemFonts"
+                     , "doNotTrackMoves"
+                     , "defaultTabStop"
+                     , "drawingGridHorizontalSpacing"
+                     , "drawingGridVerticalSpacing"
+                     , "displayHorizontalDrawingGridEvery"
+                     , "displayVerticalDrawingGridEvery"
+                     , "characterSpacingControl"
+                     , "savePreviewPicture"
+                     , "mathPr"
+                     , "themeFontLang"
+                     , "decimalSymbol"
+                     , "listSeparator"
+                     , "autoHyphenation"
                      , "consecutiveHyphenLimit"
                      , "hyphenationZone"
                      , "doNotHyphenateCap"
                      , "evenAndOddHeaders"
                      , "proofState"
+                     , "compat"
                      ]
   settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList
 
diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx
index f5e4b6428..d05020f82 100644
Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ
diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx
index 0bbe42bd7..616c9b1d0 100644
Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ
diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx
index 6b759b522..aa200c8d6 100644
Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ
diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx
index 74e30f651..49cf42f38 100644
Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ
diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx
index 7ee99c87b..e24940478 100644
Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ
diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx
index dfaf16e2b..e7da8f06d 100644
Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ
diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx
index 02992c6bd..df148cfed 100644
Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ
diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx
index 9bfe56dca..2d9e96b15 100644
Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ
diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx
index 2a37045f2..d8f091956 100644
Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ
diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx
index ca2c7a261..8c1b3e870 100644
Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index c4447d021..48b72e283 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx
index 1824c0aae..048ac8f15 100644
Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ
diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx
index 82565a5b1..cf1301c4b 100644
Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ
diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx
index 0416bcc15..662e70556 100644
Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ
diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx
index b51ec5220..d800a5fb4 100644
Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ
diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx
index ba0d100c8..bffdbbaf8 100644
Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ
diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx
index 9632c598b..2b201df28 100644
Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ
diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx
index 3269a0528..257053a78 100644
Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ
diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx
index 716129170..0a3bf1016 100644
Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ
diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx
index af6edfe86..0aa69805f 100644
Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ
diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx
index f141425f7..88dd21abd 100644
Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ
diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx
index 93b4222f6..f02d5951c 100644
Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ
diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx
index 9d1aa9853..58a101b3f 100644
Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ
diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx
index b57289fdd..484c363a2 100644
Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index edb23cc72..f75e567ab 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index f9cbed156..a49f70643 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index b585c803d..f24e27516 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx
index 313942750..de7c44bf4 100644
Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ
diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx
index f18b3f85f..958533459 100644
Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ
diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx
index 50bdab767..04fa05062 100644
Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ
diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx
index 230b27006..ef2dc96f8 100644
Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ
diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx
index 627276e4b..a9de2b367 100644
Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ
diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx
index 790278108..3ebadc59d 100644
Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ
-- 
cgit v1.2.3


From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001
From: Emily Bourke <undergroundquizscene@gmail.com>
Date: Sun, 11 Apr 2021 21:42:53 +0100
Subject: Docx reader: Read table column widths.

---
 src/Text/Pandoc/Readers/Docx.hs             |   5 +-
 src/Text/Pandoc/Readers/Docx/Parse.hs       |   2 +-
 test/Tests/Writers/Docx.hs                  |   5 ++
 test/docx/0_level_headers.native            |   4 +-
 test/docx/golden/table_one_row.docx         | Bin 9840 -> 9840 bytes
 test/docx/golden/table_with_list_cell.docx  | Bin 10159 -> 10162 bytes
 test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes
 test/docx/golden/tables.docx                | Bin 10200 -> 10202 bytes
 test/docx/sdt_elements.native               |   6 +-
 test/docx/table_one_row.native              |   8 +--
 test/docx/table_variable_width.native       |  12 ++--
 test/docx/table_with_list_cell.native       |   6 +-
 test/docx/tables-default-widths.native      |  92 ++++++++++++++++++++++++++++
 test/docx/tables.native                     |  18 +++---
 14 files changed, 128 insertions(+), 30 deletions(-)
 create mode 100644 test/docx/golden/tables-default-widths.docx
 create mode 100644 test/docx/tables-default-widths.native

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 22dd54193..375bb7338 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
     bodyPartToBlocks $ Paragraph pPr' parparts
 bodyPartToBlocks (Tbl _ _ _ []) =
   return $ para mempty
-bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
+bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do
   let cap' = simpleCaption $ plain $ text cap
       (hdr, rows) = case firstRowFormatting look of
         True | null rs -> (Nothing, [r])
@@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
       -- so should be possible. Alignment might be more difficult,
       -- since there doesn't seem to be a column entity in docx.
   let alignments = replicate width AlignDefault
-      widths = replicate width ColWidthDefault
+      totalWidth = sum grid
+      widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
 
   return $ table cap'
                  (zip alignments widths)
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 7325ff300..978d6ff3a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid
 elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
   let cols = findChildrenByName ns "w" "gridCol" element
   in
-   mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
+   mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
    cols
 elemToTblGrid _ _ = throwError WrongElem
 
diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs
index 2e0f1e3fb..da25b95e0 100644
--- a/test/Tests/Writers/Docx.hs
+++ b/test/Tests/Writers/Docx.hs
@@ -111,6 +111,11 @@ tests = [ testGroup "inlines"
             def
             "docx/tables.native"
             "docx/golden/tables.docx"
+          , docxTest
+            "tables without explicit column widths"
+            def
+            "docx/tables-default-widths.native"
+            "docx/golden/tables-default-widths.docx"
           , docxTest
             "tables with lists in cells"
             def
diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native
index 7f875891e..ed589b029 100644
--- a/test/docx/0_level_headers.native
+++ b/test/docx/0_level_headers.native
@@ -1,6 +1,6 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 1.0)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -49,4 +49,4 @@
 ,Para [Strong [Str "Table",Space,Str "Page"]]
 ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
 ,Header 1 ("introduction",[],[]) [Str "Introduction"]
-,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
\ No newline at end of file
+,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]]
diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx
index f75e567ab..a7a8f2519 100644
Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ
diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx
index a49f70643..1362d4609 100644
Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ
diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx
new file mode 100644
index 000000000..f24e27516
Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ
diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx
index f24e27516..9dcbbc9d0 100644
Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ
diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native
index dca82f0a0..a072c0d39 100644
--- a/test/docx/sdt_elements.native
+++ b/test/docx/sdt_elements.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.22069570301081556)
+ ,(AlignDefault,ColWidth 0.5586085939783689)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native
index e9188b145..88d5e3af5 100644
--- a/test/docx/table_one_row.native
+++ b/test/docx/table_one_row.native
@@ -1,8 +1,8 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)
+ ,(AlignDefault,ColWidth 0.3333333333333333)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -15,4 +15,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     [Plain [Str "Table"]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native
index 229cb83b1..43ac40cca 100644
--- a/test/docx/table_variable_width.native
+++ b/test/docx/table_variable_width.native
@@ -1,10 +1,10 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 2.0096205237840725e-2)
+ ,(AlignDefault,ColWidth 1.9882415820416888e-2)
+ ,(AlignDefault,ColWidth 0.22202030999465527)
+ ,(AlignDefault,ColWidth 0.4761090326028862)
+ ,(AlignDefault,ColWidth 1.0689470871191876e-4)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -42,4 +42,4 @@
    ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
     []]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native
index 06d8606da..51a35184b 100644
--- a/test/docx/table_with_list_cell.native
+++ b/test/docx/table_with_list_cell.native
@@ -1,7 +1,7 @@
 [Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -22,4 +22,4 @@
      ,[Para [Str "A"]]
      ,[Para [Str "Numbered",Space,Str "list."]]]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native
new file mode 100644
index 000000000..e541e5a6e
--- /dev/null
+++ b/test/docx/tables-default-widths.native
@@ -0,0 +1,92 @@
+[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [Row ("",[],[])
+  [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Name"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Game"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Fame"]]
+  ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+   [Plain [Str "Blame"]]]])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Lebron",Space,Str "James"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Basketball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Very",Space,Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Leaving",Space,Str "Cleveland"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Ryan",Space,Str "Braun"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Baseball"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Moderate"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Steroids"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Russell",Space,Str "Wilson"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Football"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "High"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Tacky",Space,Str "uniform"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Sinple"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Table"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Without"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Plain [Str "Header"]]]])]
+ (TableFoot ("",[],[])
+ [])
+,Table ("",[],[]) (Caption Nothing
+ [])
+ [(AlignDefault,ColWidthDefault)
+ ,(AlignDefault,ColWidthDefault)]
+ (TableHead ("",[],[])
+ [])
+ [(TableBody ("",[],[]) (RowHeadColumns 0)
+  []
+  [Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Simple"]
+    ,Para [Str "Multiparagraph"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Table"]
+    ,Para [Str "Full"]]]
+  ,Row ("",[],[])
+   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "Of"]
+    ,Para [Str "Paragraphs"]]
+   ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+    [Para [Str "In",Space,Str "each"]
+    ,Para [Str "Cell."]]]])]
+ (TableFoot ("",[],[])
+ [])]
\ No newline at end of file
diff --git a/test/docx/tables.native b/test/docx/tables.native
index e541e5a6e..5a89496be 100644
--- a/test/docx/tables.native
+++ b/test/docx/tables.native
@@ -1,10 +1,10 @@
 [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"]
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)
+ ,(AlignDefault,ColWidth 0.25)]
  (TableHead ("",[],[])
  [Row ("",[],[])
   [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -48,8 +48,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -68,8 +68,8 @@
  [])
 ,Table ("",[],[]) (Caption Nothing
  [])
- [(AlignDefault,ColWidthDefault)
- ,(AlignDefault,ColWidthDefault)]
+ [(AlignDefault,ColWidth 0.5)
+ ,(AlignDefault,ColWidth 0.5)]
  (TableHead ("",[],[])
  [])
  [(TableBody ("",[],[]) (RowHeadColumns 0)
@@ -89,4 +89,4 @@
     [Para [Str "In",Space,Str "each"]
     ,Para [Str "Cell."]]]])]
  (TableFoot ("",[],[])
- [])]
\ No newline at end of file
+ [])]
-- 
cgit v1.2.3


From a3d745e48560a55d9a9ea9fa43ffdd5a8b84987f Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Jun 2021 09:44:37 -0700
Subject: Docx writer: support figure numbers.

These are set up in such a way that they will work with Word's
automatic table of figures.

Closes #7392.
---
 src/Text/Pandoc/Writers/Docx.hs       |  22 +++++++++++++++++++---
 src/Text/Pandoc/Writers/Docx/Types.hs |   2 ++
 test/docx/golden/image.docx           | Bin 26647 -> 26774 bytes
 3 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index d1065eb7d..b3e008b8a 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -36,7 +36,8 @@ import Data.Time.Clock.POSIX
 import Data.Digest.Pure.SHA (sha1, showDigest)
 import Skylighting
 import Text.Collate.Lang (renderLang)
-import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm)
+import qualified Text.Pandoc.Translations as Term
 import qualified Text.Pandoc.Class.PandocMonad as P
 import Data.Time
 import Text.Pandoc.UTF8 (fromTextLazy)
@@ -854,14 +855,29 @@ blockToOpenXML' opts (Plain lst) = do
 -- title beginning with fig: indicates that the image is a figure
 blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do
   setFirstPara
+  fignum <- gets stNextFigureNum
+  modify $ \st -> st{ stNextFigureNum = fignum + 1 }
+  let figid = "fig" <> tshow fignum
+  figname <- translateTerm Term.Figure
   prop <- pStyleM $
         if null alt
         then "Figure"
         else "Captioned Figure"
   paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False)
   contents <- inlinesToOpenXML opts [Image attr alt (src,tit)]
-  captionNode <- withParaPropM (pStyleM "Image Caption")
-                 $ blockToOpenXML opts (Para alt)
+  captionNode <- if null alt
+                    then return []
+                    else withParaPropM (pStyleM "Image Caption")
+                         $ blockToOpenXML opts
+                            (Para $ Span (figid,[],[])
+                               [Str "Figure\160",
+                                RawInline (Format "openxml")
+                                ("<w:fldSimple w:instr=\"SEQ "
+                                <> figname
+                                <> " \\* ARABIC \"><w:r><w:t>"
+                                <> tshow fignum
+                                <> "</w:t></w:r></w:fldSimple>"),
+                                Str ":", Space] : alt)
   return $
     Elem (mknode "w:p" [] (map Elem paraProps ++ contents))
     : captionNode
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
index 006584c30..36ac45ad2 100644
--- a/src/Text/Pandoc/Writers/Docx/Types.hs
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -117,6 +117,7 @@ data WriterState = WriterState{
        , stDynamicParaProps :: Set.Set ParaStyleName
        , stDynamicTextProps :: Set.Set CharStyleName
        , stCurId          :: Int
+       , stNextFigureNum  :: Int
        }
 
 defaultWriterState :: WriterState
@@ -137,6 +138,7 @@ defaultWriterState = WriterState{
       , stDynamicParaProps = Set.empty
       , stDynamicTextProps = Set.empty
       , stCurId          = 20
+      , stNextFigureNum  = 1
       }
 
 setFirstPara :: PandocMonad m => WS m ()
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 48b72e283..9fe65326f 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
-- 
cgit v1.2.3


From 0948af9cc549f0ea3b85fa760aa521b8deaad2c0 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Tue, 29 Jun 2021 11:15:40 -0700
Subject: Docx writer: Add table numbering for captioned tables.

The numbers are added using fields, so that Word can
create a list of tables that will update automatically.
---
 src/Text/Pandoc/Writers/Docx/Table.hs |  31 ++++++++++++++++++++++++++++---
 src/Text/Pandoc/Writers/Docx/Types.hs |   2 ++
 test/Tests/Writers/OOXML.hs           |   4 +++-
 test/docx/golden/image.docx           | Bin 26774 -> 26776 bytes
 4 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'test/docx/golden')

diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs
index 49917e315..7a84c5278 100644
--- a/src/Text/Pandoc/Writers/Docx/Table.hs
+++ b/src/Text/Pandoc/Writers/Docx/Table.hs
@@ -17,7 +17,7 @@ import Control.Monad.State.Strict
 import Data.Array
 import Data.Text (Text)
 import Text.Pandoc.Definition
-import Text.Pandoc.Class.PandocMonad (PandocMonad)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm)
 import Text.Pandoc.Writers.Docx.Types
 import Text.Pandoc.Shared
 import Text.Printf (printf)
@@ -25,6 +25,7 @@ import Text.Pandoc.Writers.GridTable hiding (Table)
 import Text.Pandoc.Writers.OOXML
 import Text.Pandoc.XML.Light as XML hiding (Attr)
 import qualified Data.Text as T
+import qualified Text.Pandoc.Translations as Term
 import qualified Text.Pandoc.Writers.GridTable as Grid
 
 tableToOpenXML :: PandocMonad m
@@ -33,15 +34,23 @@ tableToOpenXML :: PandocMonad m
                -> WS m [Content]
 tableToOpenXML blocksToOpenXML gridTable = do
   setFirstPara
-  let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) =
+  let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) =
         gridTable
   let (Caption _maybeShortCaption captionBlocks) = caption
+  tablenum <- gets stNextTableNum
+  unless (null captionBlocks) $
+    modify $ \st -> st{ stNextTableNum = tablenum + 1 }
+  let tableid = if T.null ident
+                   then "table" <> tshow tablenum
+                   else ident
+  tablename <- translateTerm Term.Table
   let captionStr = stringify captionBlocks
   let aligns = map fst $ elems colspecs
   captionXml <- if null captionBlocks
                 then return []
                 else withParaPropM (pStyleM "Table Caption")
-                     $ blocksToOpenXML captionBlocks
+                     $ blocksToOpenXML
+                     $ addLabel tableid tablename tablenum captionBlocks
   -- We set "in table" after processing the caption, because we don't
   -- want the "Table Caption" style to be overwritten with "Compact".
   modify $ \s -> s { stInTable = True }
@@ -81,6 +90,22 @@ tableToOpenXML blocksToOpenXML gridTable = do
   modify $ \s -> s { stInTable = False }
   return $ captionXml ++ [Elem tbl]
 
+addLabel :: Text -> Text -> Int -> [Block] -> [Block]
+addLabel tableid tablename tablenum bs =
+  case bs of
+    (Para ils : rest)  -> Para (label : Space : ils) : rest
+    (Plain ils : rest) -> Plain (label : Space : ils) : rest
+    _ -> Para [label] : bs
+ where
+  label = Span (tableid,[],[])
+            [Str (tablename <> "\160"),
+             RawInline (Format "openxml")
+               ("<w:fldSimple w:instr=\"SEQ Table"
+               <> " \\* ARABIC \"><w:r><w:t>"
+               <> tshow tablenum
+               <> "</w:t></w:r></w:fldSimple>"),
+             Str ":"]
+
 -- | Parts of a table
 data RowType = HeadRow | BodyRow | FootRow
 
diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs
index 36ac45ad2..74b8d2753 100644
--- a/src/Text/Pandoc/Writers/Docx/Types.hs
+++ b/src/Text/Pandoc/Writers/Docx/Types.hs
@@ -118,6 +118,7 @@ data WriterState = WriterState{
        , stDynamicTextProps :: Set.Set CharStyleName
        , stCurId          :: Int
        , stNextFigureNum  :: Int
+       , stNextTableNum   :: Int
        }
 
 defaultWriterState :: WriterState
@@ -139,6 +140,7 @@ defaultWriterState = WriterState{
       , stDynamicTextProps = Set.empty
       , stCurId          = 20
       , stNextFigureNum  = 1
+      , stNextTableNum   = 1
       }
 
 setFirstPara :: PandocMonad m => WS m ()
diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs
index c1e47622d..83f05cfec 100644
--- a/test/Tests/Writers/OOXML.hs
+++ b/test/Tests/Writers/OOXML.hs
@@ -55,7 +55,9 @@ testArchive :: (WriterOptions -> Pandoc -> PandocIO BL.ByteString)
             -> IO Archive
 testArchive writerFn opts fp = do
   txt <- T.readFile fp
-  bs <- runIOorExplode $ readNative def txt >>= writerFn opts
+  bs <- runIOorExplode $ do
+    setTranslations "en-US"
+    readNative def txt >>= writerFn opts
   return $ toArchive bs
 
 compareFileList :: FilePath -> Archive -> Archive -> Maybe String
diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx
index 9fe65326f..7c2d8a9ac 100644
Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ
-- 
cgit v1.2.3