From 5bbd5a9e80c4f4356d71b54315cf75b5ae2f3650 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 3 Dec 2020 09:51:23 -0800 Subject: Docx writer: Support bold and italic in "complex script." Previously bold and italics didn't work properly in LTR text. This commit causes the w:bCs and w:iCs attributes to be used, in addition to w:b and w:i, for bold and italics respectively. Closes #6911. --- test/docx/golden/custom_style_preserve.docx | Bin 10573 -> 10578 bytes test/docx/golden/definition_list.docx | Bin 9844 -> 9850 bytes test/docx/golden/inline_formatting.docx | Bin 9959 -> 9969 bytes 3 files changed, 0 insertions(+), 0 deletions(-) (limited to 'test/docx/golden') diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index ac595cdd4..17804bb81 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 005f5dbe3..21629e208 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index ce1d16daa..e12e3b38d 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ -- cgit v1.2.3 From 00031fc809117cb436397aba83a41ca1d4056f61 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 13 Dec 2020 14:09:59 +0100 Subject: Docx writer: keep raw openxml strings verbatim. Closes: #6933 --- src/Text/Pandoc/Writers/Docx.hs | 7 +++++-- test/Tests/Writers/Docx.hs | 10 ++++++++++ test/docx/golden/raw-blocks.docx | Bin 0 -> 9888 bytes test/docx/golden/raw-bookmarks.docx | Bin 0 -> 10023 bytes test/docx/raw-blocks.native | 6 ++++++ test/docx/raw-bookmarks.native | 3 +++ 6 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 test/docx/golden/raw-blocks.docx create mode 100644 test/docx/golden/raw-bookmarks.docx create mode 100644 test/docx/raw-blocks.native create mode 100644 test/docx/raw-bookmarks.native (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 97048e980..0174a8501 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -978,7 +978,9 @@ blockToOpenXML' opts (Para lst) return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return (parseXML str) + | format == Format "openxml" = return [ + Text (CData CDataRaw (T.unpack str) Nothing) + ] | otherwise = do report $ BlockNotRendered b return [] @@ -1312,7 +1314,8 @@ inlineToOpenXML' opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return (parseXML str) + | f == Format "openxml" = return + [Text (CData CDataRaw (T.unpack str) Nothing)] | otherwise = do report $ InlineNotRendered il return [] diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 8f051b4b7..66a5c3d36 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -128,6 +128,16 @@ tests = [ testGroup "inlines" def "docx/codeblock.native" "docx/golden/codeblock.docx" + , docxTest + "raw OOXML blocks" + def + "docx/raw-blocks.native" + "docx/golden/raw-blocks.docx" + , docxTest + "raw bookmark markers" + def + "docx/raw-bookmarks.native" + "docx/golden/raw-bookmarks.docx" ] , testGroup "track changes" [ docxTest diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx new file mode 100644 index 000000000..ae7f8f1f0 Binary files /dev/null and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx new file mode 100644 index 000000000..5e433b736 Binary files /dev/null and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/raw-blocks.native b/test/docx/raw-blocks.native new file mode 100644 index 000000000..d7f985bc3 --- /dev/null +++ b/test/docx/raw-blocks.native @@ -0,0 +1,6 @@ +[Para [Str "Cell",Space,Str "compartments"] +,RawBlock (Format "openxml") "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +,Para [Str "Ribosome"] +,RawBlock (Format "openxml") "\n" +,Para [Str "Lysosome"] +,RawBlock (Format "openxml") "\n\n"] diff --git a/test/docx/raw-bookmarks.native b/test/docx/raw-bookmarks.native new file mode 100644 index 000000000..1e76655d6 --- /dev/null +++ b/test/docx/raw-bookmarks.native @@ -0,0 +1,3 @@ +[Para [Str "Manual",Space,Str "endnotes."] +,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "",Str "Aliquam",Space,Str "posuere."] +,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]] -- cgit v1.2.3 From c451207b08edc36fa5c2f1af5556a8d211e023ed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 12 Jan 2021 09:49:10 -0800 Subject: Docx writer: handle table header using styles. Instead of hard-coding the border and header cell vertical alignment, we now let this be determined by the Table style, making use of Word's "conditional formatting" for the table's first row. For headerless tables, we use the tblLook element to tell Word not to apply conditional first-row formatting. Closes #7008. --- data/docx/word/styles.xml | 15 +++++++++ src/Text/Pandoc/Writers/Docx.hs | 37 +++++++++++---------- test/docx/golden/block_quotes.docx | Bin 10001 -> 10092 bytes test/docx/golden/codeblock.docx | Bin 9853 -> 9944 bytes test/docx/golden/comments.docx | Bin 10188 -> 10279 bytes test/docx/golden/custom_style_no_reference.docx | Bin 9952 -> 10042 bytes test/docx/golden/custom_style_preserve.docx | Bin 10578 -> 10666 bytes test/docx/golden/definition_list.docx | Bin 9850 -> 9941 bytes .../golden/document-properties-short-desc.docx | Bin 9856 -> 9947 bytes test/docx/golden/document-properties.docx | Bin 10332 -> 10423 bytes test/docx/golden/headers.docx | Bin 9989 -> 10080 bytes test/docx/golden/image.docx | Bin 26667 -> 26758 bytes test/docx/golden/inline_code.docx | Bin 9789 -> 9880 bytes test/docx/golden/inline_formatting.docx | Bin 9969 -> 10060 bytes test/docx/golden/inline_images.docx | Bin 26725 -> 26816 bytes test/docx/golden/link_in_notes.docx | Bin 10010 -> 10101 bytes test/docx/golden/links.docx | Bin 10185 -> 10276 bytes test/docx/golden/lists.docx | Bin 10261 -> 10352 bytes test/docx/golden/lists_continuing.docx | Bin 10052 -> 10143 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10141 -> 10232 bytes test/docx/golden/lists_restarting.docx | Bin 10053 -> 10144 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10148 -> 10239 bytes test/docx/golden/notes.docx | Bin 9955 -> 10046 bytes test/docx/golden/raw-blocks.docx | Bin 9888 -> 9980 bytes test/docx/golden/raw-bookmarks.docx | Bin 10023 -> 10115 bytes test/docx/golden/table_one_row.docx | Bin 9834 -> 9932 bytes test/docx/golden/table_with_list_cell.docx | Bin 10199 -> 10249 bytes test/docx/golden/tables.docx | Bin 10225 -> 10266 bytes test/docx/golden/track_changes_deletion.docx | Bin 9833 -> 9924 bytes test/docx/golden/track_changes_insertion.docx | Bin 9816 -> 9907 bytes test/docx/golden/track_changes_move.docx | Bin 9850 -> 9941 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 9962 -> 10053 bytes test/docx/golden/unicode.docx | Bin 9774 -> 9865 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9822 -> 9913 bytes 34 files changed, 35 insertions(+), 17 deletions(-) (limited to 'test/docx/golden') diff --git a/data/docx/word/styles.xml b/data/docx/word/styles.xml index 6bb5a3f52..832b1b25b 100644 --- a/data/docx/word/styles.xml +++ b/data/docx/word/styles.xml @@ -350,6 +350,21 @@ + + + + + + + + + + + + + + + diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a99e13a85..8f498775d 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1023,23 +1023,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do _ -> es ++ [Elem $ mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - let borderProps = Elem $ mknode "w:tcPr" [] - [ mknode "w:tcBorders" [] - $ mknode "w:bottom" [("w:val","single")] () - , mknode "w:vAlign" [("w:val","bottom")] () ] compactStyle <- pStyleM "Compact" let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell border contents = mknode "w:tc" [] - $ [ borderProps | border ] ++ - if null contents - then emptyCell' - else contents - let mkrow border cells = + let mkcell contents = mknode "w:tc" [] + $ if null contents + then emptyCell' + else contents + let mkrow cells = mknode "w:tr" [] $ - [ mknode "w:trPr" [] - [ mknode "w:cnfStyle" [("w:firstRow","1")] ()] - | border] - ++ map (mkcell border) cells + map mkcell cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths @@ -1047,6 +1039,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do [("w:w", show (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers modify $ \s -> s { stInTable = False } + -- for compatibility with Word <= 2007, we include a val with a bitmask + -- 0×0020 Apply first row conditional formatting + -- 0×0040 Apply last row conditional formatting + -- 0×0080 Apply first column conditional formatting + -- 0×0100 Apply last column conditional formatting + -- 0×0200 Do not apply row banding conditional formatting + -- 0×0400 Do not apply column banding conditional formattin + let tblLookVal :: Int + tblLookVal = if hasHeader then 0x20 else 0 return $ caption' ++ [Elem $ @@ -1059,15 +1060,17 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") - ,("w:noVBand","0")] () : + ,("w:noVBand","0") + ,("w:val", printf "%04x" tblLookVal) + ] () : [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | hasHeader ] ++ - map (mkrow False) rows' + : [ mkrow headers' | hasHeader ] ++ + map mkrow rows' )] blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index bbc8d8de9..3e1bf16e7 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index e20efcab4..66f055063 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index f1185da98..fb3a02a0a 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 83243ab8c..bc6c2702a 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 17804bb81..8c555a5bd 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 21629e208..c21b3a5b3 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 5cf8db0b0..92ce144e9 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 14bfab6d4..d21b67309 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index 416743aa1..3558a47bf 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index ef2940f89..606df92a3 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 479ea65ec..759269cac 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index e12e3b38d..c37777080 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 8bd57bb8c..9450b1a73 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 2c6a638fc..6f0b830e6 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 11e52c4b1..e53889cfb 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 7667990c4..5dbe298b7 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3e8c6d2b2..194181288 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 05a7cf060..6e0b634f7 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index f5ae4a384..477178e77 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index d02c77271..51110356e 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index f7fdcbe11..b6206cdf5 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index ae7f8f1f0..07b576080 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 5e433b736..d46095eb7 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index d404878c6..7caba4e93 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 79c395262..6aaa6da61 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index df9680773..5746c5ad0 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index bb73b82f6..5f22dccc6 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index 7df484aaa..ab5c4f56d 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index d717b93ab..085f33162 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 791182db2..1ac86d5c8 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index b64a7b58e..c2c443b19 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index b5116d1a8..5ea18d32e 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 8ca191604dcd13af27c11d2da225da646ebce6fc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Feb 2021 23:35:19 -0800 Subject: Add new unexported module T.P.XMLParser. This exports functions that uses xml-conduit's parser to produce an xml-light Element or [Content]. This allows existing pandoc code to use a better parser without much modification. The new parser is used in all places where xml-light's parser was previously used. Benchmarks show a significant performance improvement in parsing XML-based formats (especially ODT and FB2). Note that the xml-light types use String, so the conversion from xml-conduit types involves a lot of extra allocation. It would be desirable to avoid that in the future by gradually switching to using xml-conduit directly. This can be done module by module. The new parser also reports errors, which we report when possible. A new constructor PandocXMLError has been added to PandocError in T.P.Error [API change]. Closes #7091, which was the main stimulus. These changes revealed the need for some changes in the tests. The docbook-reader.docbook test lacked definitions for the entities it used; these have been added. And the docx golden tests have been updated, because the new parser does not preserve the order of attributes. Add entity defs to docbook-reader.docbook. Update golden tests for docx. --- MANUAL.txt | 1 + pandoc.cabal | 2 + src/Text/Pandoc/Error.hs | 3 + src/Text/Pandoc/ImageSize.hs | 5 +- src/Text/Pandoc/Readers/DocBook.hs | 52 ++++++++++------ src/Text/Pandoc/Readers/Docx/Parse.hs | 21 ++++--- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 28 +++++---- src/Text/Pandoc/Readers/EPUB.hs | 17 ++++-- src/Text/Pandoc/Readers/FB2.hs | 10 ++-- src/Text/Pandoc/Readers/JATS.hs | 9 ++- src/Text/Pandoc/Readers/OPML.hs | 10 +++- src/Text/Pandoc/Readers/Odt.hs | 24 ++++---- src/Text/Pandoc/Writers/EPUB.hs | 13 +++- src/Text/Pandoc/Writers/FB2.hs | 11 +++- src/Text/Pandoc/Writers/ODT.hs | 29 +++++---- src/Text/Pandoc/Writers/OOXML.hs | 9 +-- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 4 +- src/Text/Pandoc/XMLParser.hs | 66 +++++++++++++++++++++ test/Tests/Readers/JATS.hs | 1 + test/command/5321.md | 4 +- test/docbook-reader.docbook | 7 ++- test/docx/golden/block_quotes.docx | Bin 10092 -> 10098 bytes test/docx/golden/codeblock.docx | Bin 9944 -> 9950 bytes test/docx/golden/comments.docx | Bin 10279 -> 10285 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10042 -> 10048 bytes test/docx/golden/custom_style_preserve.docx | Bin 10666 -> 10673 bytes test/docx/golden/custom_style_reference.docx | Bin 12434 -> 12434 bytes test/docx/golden/definition_list.docx | Bin 9941 -> 9947 bytes .../golden/document-properties-short-desc.docx | Bin 9947 -> 9953 bytes test/docx/golden/document-properties.docx | Bin 10423 -> 10429 bytes test/docx/golden/headers.docx | Bin 10080 -> 10086 bytes test/docx/golden/image.docx | Bin 26758 -> 26764 bytes test/docx/golden/inline_code.docx | Bin 9880 -> 9886 bytes test/docx/golden/inline_formatting.docx | Bin 10060 -> 10066 bytes test/docx/golden/inline_images.docx | Bin 26816 -> 26822 bytes test/docx/golden/link_in_notes.docx | Bin 10101 -> 10107 bytes test/docx/golden/links.docx | Bin 10276 -> 10282 bytes test/docx/golden/lists.docx | Bin 10352 -> 10358 bytes test/docx/golden/lists_continuing.docx | Bin 10143 -> 10149 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10232 -> 10238 bytes test/docx/golden/lists_restarting.docx | Bin 10144 -> 10150 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10239 -> 10245 bytes test/docx/golden/notes.docx | Bin 10046 -> 10052 bytes test/docx/golden/raw-blocks.docx | Bin 9980 -> 9986 bytes test/docx/golden/raw-bookmarks.docx | Bin 10115 -> 10121 bytes test/docx/golden/table_one_row.docx | Bin 9932 -> 9938 bytes test/docx/golden/table_with_list_cell.docx | Bin 10249 -> 10255 bytes test/docx/golden/tables.docx | Bin 10266 -> 10272 bytes test/docx/golden/track_changes_deletion.docx | Bin 9924 -> 9930 bytes test/docx/golden/track_changes_insertion.docx | Bin 9907 -> 9913 bytes test/docx/golden/track_changes_move.docx | Bin 9941 -> 9947 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10053 -> 10059 bytes test/docx/golden/unicode.docx | Bin 9865 -> 9871 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9913 -> 9919 bytes test/jats-reader.native | 2 +- test/jats-reader.xml | 1 + test/pptx/code-custom.pptx | Bin 28230 -> 28221 bytes test/pptx/code-custom_templated.pptx | Bin 395524 -> 395516 bytes test/pptx/code.pptx | Bin 28229 -> 28220 bytes test/pptx/code_templated.pptx | Bin 395522 -> 395514 bytes test/pptx/document-properties-short-desc.pptx | Bin 27012 -> 27004 bytes .../document-properties-short-desc_templated.pptx | Bin 394298 -> 394288 bytes test/pptx/document-properties.pptx | Bin 27417 -> 27408 bytes test/pptx/document-properties_templated.pptx | Bin 394701 -> 394691 bytes test/pptx/endnotes.pptx | Bin 26969 -> 26962 bytes test/pptx/endnotes_templated.pptx | Bin 394262 -> 394253 bytes test/pptx/endnotes_toc.pptx | Bin 27892 -> 27789 bytes test/pptx/endnotes_toc_templated.pptx | Bin 395186 -> 395083 bytes test/pptx/images.pptx | Bin 44626 -> 44619 bytes test/pptx/images_templated.pptx | Bin 411916 -> 411909 bytes test/pptx/inline_formatting.pptx | Bin 26156 -> 26148 bytes test/pptx/inline_formatting_templated.pptx | Bin 393447 -> 393438 bytes test/pptx/lists.pptx | Bin 27056 -> 27049 bytes test/pptx/lists_templated.pptx | Bin 394349 -> 394340 bytes test/pptx/raw_ooxml.pptx | Bin 26948 -> 26940 bytes test/pptx/raw_ooxml_templated.pptx | Bin 394240 -> 394231 bytes test/pptx/remove_empty_slides.pptx | Bin 44073 -> 44065 bytes test/pptx/remove_empty_slides_templated.pptx | Bin 411359 -> 411352 bytes test/pptx/slide_breaks.pptx | Bin 28582 -> 28575 bytes test/pptx/slide_breaks_slide_level_1.pptx | Bin 27751 -> 27744 bytes .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395045 -> 395038 bytes test/pptx/slide_breaks_templated.pptx | Bin 395875 -> 395868 bytes test/pptx/slide_breaks_toc.pptx | Bin 29539 -> 29532 bytes test/pptx/slide_breaks_toc_templated.pptx | Bin 396833 -> 396826 bytes test/pptx/speaker_notes.pptx | Bin 35444 -> 35436 bytes test/pptx/speaker_notes_after_metadata.pptx | Bin 31683 -> 31675 bytes .../speaker_notes_after_metadata_templated.pptx | Bin 398964 -> 398955 bytes test/pptx/speaker_notes_afterheader.pptx | Bin 30700 -> 30691 bytes test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397988 -> 397979 bytes test/pptx/speaker_notes_afterseps.pptx | Bin 51612 -> 51604 bytes test/pptx/speaker_notes_afterseps_templated.pptx | Bin 418903 -> 418896 bytes test/pptx/speaker_notes_templated.pptx | Bin 402736 -> 402728 bytes test/pptx/start_numbering_at.pptx | Bin 27031 -> 27023 bytes test/pptx/start_numbering_at_templated.pptx | Bin 394323 -> 394314 bytes test/pptx/tables.pptx | Bin 27573 -> 27566 bytes test/pptx/tables_templated.pptx | Bin 394868 -> 394859 bytes test/pptx/two_column.pptx | Bin 26075 -> 26065 bytes test/pptx/two_column_templated.pptx | Bin 393366 -> 393355 bytes 98 files changed, 238 insertions(+), 91 deletions(-) create mode 100644 src/Text/Pandoc/XMLParser.hs (limited to 'test/docx/golden') diff --git a/MANUAL.txt b/MANUAL.txt index 5b90d039a..dc3b4ca77 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings: 24 PandocCiteprocError 31 PandocEpubSubdirectoryError 43 PandocPDFError + 44 PandocXMLError 47 PandocPDFProgramNotFoundError 61 PandocHttpError 62 PandocShouldNeverHappenError diff --git a/pandoc.cabal b/pandoc.cabal index 72e7c2da5..e56456c68 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -493,6 +493,7 @@ library unicode-transforms >= 0.3 && < 0.4, unordered-containers >= 0.2 && < 0.3, xml >= 1.3.12 && < 1.4, + xml-conduit >= 1.7 && < 1.10, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 if os(windows) && arch(i386) @@ -686,6 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, + Text.Pandoc.XMLParser, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 204cf15ca..831405f42 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError | PandocFailOnWarningError | PandocPDFProgramNotFoundError Text | PandocPDFError Text + | PandocXMLError Text Text | PandocFilterError Text Text | PandocLuaError Text | PandocCouldNotFindDataFileError Text @@ -103,6 +104,8 @@ handleError (Left e) = PandocPDFProgramNotFoundError pdfprog -> err 47 $ pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg + PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <> + (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg PandocFilterError filtername msg -> err 83 $ "Error running filter " <> filtername <> ":\n" <> msg PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e19958f6a..e0a1af8e8 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -45,7 +45,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Control.Applicative import qualified Data.Attoparsec.ByteString.Char8 as A @@ -327,7 +329,8 @@ getSize img = svgSize :: WriterOptions -> ByteString -> Maybe ImageSize svgSize opts img = do - doc <- Xml.parseXMLDoc $ UTF8.toString img + doc <- either (const mzero) return $ parseXMLElement + $ TL.fromStrict $ UTF8.toText img let viewboxSize = do vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ada3e98ec..ad0108843 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Control.Monad.State.Strict -import Data.Char (isSpace, toUpper) +import Data.Char (isSpace, toUpper, isLetter) import Data.Default import Data.Either (rights) import Data.Foldable (asum) @@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex) import Data.Maybe (fromMaybe,mapMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Control.Monad.Except (throwError) import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options @@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) {- @@ -537,22 +541,25 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp + tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $ + parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) --- We treat specially (issue #1236), converting it --- to
, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. +-- We treat certain processing instructions by converting them to tags +-- beginning "pi-". handleInstructions :: Text -> Text -handleInstructions = T.pack . handleInstructions' . T.unpack - -handleInstructions' :: String -> String -handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs -handleInstructions' xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions' zs - (ys, zs) -> ys ++ handleInstructions' zs +handleInstructions t = + let (x,y) = T.breakOn "" y + in (if T.takeWhile (\c -> isLetter c || c == '-') + (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"] + then x <> " T.drop 2 w <> "/>" + else x <> w <> T.take 2 z) <> + handleInstructions (T.drop 2 z) getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -892,7 +899,11 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then " qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty codeBlockWithLang = do @@ -964,7 +975,7 @@ parseBlock (Elem e) = cs -> map toAlignment cs let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9') || x == '.') s) - let textWidth = case filterChild (named "?dbfo") e of + let textWidth = case filterChild (named "pi-dbfo") e of Just d -> case attrValue "table-width" d of "" -> 1.0 w -> fromMaybe 100.0 (parseWidth w) / 100.0 @@ -1165,12 +1176,15 @@ parseInline (Elem e) = "title" -> return mempty "affiliation" -> skip -- Note: this isn't a real docbook tag; it's what we convert - -- to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak + -- to in handleInstructions, above. + "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then " qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty innerInlines f = extractSpaces f . mconcat <$> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index fdcffcc3f..056dab6c2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC +import Text.Pandoc.XMLParser (parseXMLElement) data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e +parseXMLFromEntry :: Entry -> Maybe Element +parseXMLFromEntry entry = + case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of + Left _ -> Nothing + Right el -> Just el + getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf - relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + relsElem <- parseXMLFromEntry entry let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") @@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do docPath <- asks envDocXmlPath entry <- maybeToD $ findEntryByPath docPath zf - docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) @@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces @@ -415,7 +422,7 @@ archiveToNotes zf = archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) in @@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar - , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + , Just relElems <- parseXMLFromEntry entry = mapMaybe (relElemToRelationship relType) $ elChildren relElems filePathToRelationships _ _ _ = [] @@ -527,7 +534,7 @@ archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of Nothing -> Just $ Numbering [] [] [] Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem numElems = findChildrenByName namespaces "w" "num" numberingElem absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 236167187..edade8654 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -53,6 +53,7 @@ import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing , rParentStyle = Nothing } -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' + :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) + => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + case findEntryByPath "word/styles.xml" zf of + Nothing -> (M.empty, M.empty) + Just entry -> + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Left _ -> (M.empty, M.empty) + Right styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ + buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ + buildBasedOnList namespaces styElem Nothing) isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 5e3326e6d..369c4f0c9 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB (readEPUB) where -import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, +import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry, toArchiveOrFail) import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) @@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) -import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) +import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type Items = M.Map String (FilePath, MimeType) @@ -181,7 +182,7 @@ renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" "container.xml") archive - docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry + docElem <- parseXMLDocE metaEntry let namespaces = mapMaybe attrToNSPair (elAttribs docElem) ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) @@ -190,7 +191,7 @@ getManifest archive = do let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive - (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) + (rootdir,) <$> parseXMLDocE manifest -- Fixup @@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: PandocMonad m => String -> m Element -parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc +parseXMLDocE :: PandocMonad m => Entry -> m Element +parseXMLDocE entry = + either (throwError . PandocXMLError fp) return $ parseXMLElement doc + where + doc = UTF8.toTextLazy . fromEntry $ entry + fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b0d2f092b..b804eab4f 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -32,6 +32,7 @@ import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -42,6 +43,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type FB2 m = StateT FB2State m @@ -64,10 +66,10 @@ instance HasMeta FB2State where readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readFB2 _ inp = - case parseXMLDoc $ crFilter inp of - Nothing -> throwError $ PandocParseError "Not an XML document" - Just e -> do - (bs, st) <- runStateT (parseRootElement e) def + case parseXMLElement $ TL.fromStrict $ crFilter inp of + Left msg -> throwError $ PandocXMLError "" msg + Right el -> do + (bs, st) <- runStateT (parseRootElement el) def let authors = if null $ fb2Authors st then id else setMeta "author" (map text $ reverse $ fb2Authors st) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c638da519..dfd343b7a 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document. module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) import Data.Char (isDigit, isSpace, toUpper) import Data.Default import Data.Generics @@ -22,6 +24,7 @@ import qualified Data.Map as Map import Data.Maybe (maybeToList, fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -29,6 +32,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -51,8 +55,9 @@ instance Default JATSState where readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readJATS _ inp = do - let tree = normalizeTree . parseXML - $ T.unpack $ crFilter inp + tree <- either (throwError . PandocXMLError "") + (return . normalizeTree) $ + parseXMLContents (TL.fromStrict $ crFilter inp) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5b8996025..bdadc4dd9 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -19,14 +19,18 @@ import Data.Generics import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) +import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -49,8 +53,10 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT - (mapM parseBlock $ normalizeTree $ - parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } + (case parseXMLContents (TL.fromStrict (crFilter inp)) of + Left msg -> throwError $ PandocXMLError "" msg + Right ns -> mapM parseBlock $ normalizeTree ns) + def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 9943d3147..85308deb1 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip import qualified Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.ByteString.Lazy as B @@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = either (Left. PandocParseError) Right $ do - let onFailure msg Nothing = Left msg +archiveToOdt archive = do + let onFailure msg Nothing = Left $ PandocParseError msg onFailure _ (Just x) = Right x contentEntry <- onFailure "Could not find content.xml" (findEntryByPath "content.xml" archive) stylesEntry <- onFailure "Could not find styles.xml" (findEntryByPath "styles.xml" archive) - contentElem <- onFailure "Could not find content element" - (entryToXmlElem contentEntry) - stylesElem <- onFailure "Could not find styles element" - (entryToXmlElem stylesEntry) - styles <- either (\_ -> Left "Could not read styles") Right + contentElem <- entryToXmlElem contentEntry + stylesElem <- entryToXmlElem stylesEntry + styles <- either + (\_ -> Left $ PandocParseError "Could not read styles") + Right (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) let filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = @@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") let media = filteredFilesFromArchive archive filePathIsOdtMedia let startState = readerState styles media - either (\_ -> Left "Could not convert opendocument") Right + either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right (runConverter' read_body startState contentElem) -- -entryToXmlElem :: Entry -> Maybe XML.Element -entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry +entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem entry = + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Right x -> Right x + Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 1f16f6772..e99fa2567 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, parseXML, + add_attrs, lookupAttr, node, onlyElems, ppElement, showElement, strContent, unode, unqual) +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -160,7 +161,12 @@ mkEntry path content = do getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts + elts <- case writerEpubMetadata opts of + Nothing -> return [] + Just t -> case parseXMLContents (TL.fromStrict t) of + Left msg -> throwError $ + PandocXMLError "epub metadata" msg + Right ns -> return (onlyElems ns) let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) @@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - where titElements = parseXML titRendered + where titElements = either (const []) id $ + parseXMLContents (TL.fromStrict titRendered) titRendered = case P.runPure (writeHtmlStringForEPUB version opts{ writerTemplate = Nothing diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 25b1f28d1..9334d6e9a 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see: module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad (zipWithM) -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) @@ -27,16 +27,18 @@ import Data.Either (lefts, rights) import Data.List (intercalate) import Data.Text (Text, pack) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) import Text.XML.Light import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC -import qualified Text.XML.Light.Input as XI +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, @@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code" . T.unpack) . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" - then return $ XI.parseXML str + then + case parseXMLContents (TL.fromStrict str) of + Left msg -> throwError $ PandocXMLError "" msg + Right nds -> return nds else return [] blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 05dfad5eb..a32ff618c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) @@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) @@ -35,10 +36,11 @@ import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML +import Text.Pandoc.XMLParser (parseXMLElement) import Text.TeXMath import Text.XML.Light @@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive updateStyleWithLang Nothing arch = return arch updateStyleWithLang (Just lang) arch = do epochtime <- floor `fmap` lift P.getPOSIXTime - return arch{ zEntries = [if eRelativePath e == "styles.xml" - then case parseXMLDoc - (toStringLazy (fromEntry e)) of - Nothing -> e - Just d -> - toEntry "styles.xml" epochtime - ( fromStringLazy - . ppTopElement - . addLang lang $ d ) - else e - | e <- zEntries arch] } + entries <- mapM (\e -> if eRelativePath e == "styles.xml" + then case parseXMLElement + (toTextLazy (fromEntry e)) of + Left msg -> throwError $ + PandocXMLError "styles.xml" msg + Right d -> return $ + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else return e) (zEntries arch) + return arch{ zEntries = entries } addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 3ac007f4e..8f60e70d5 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -35,6 +35,7 @@ import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = @@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath = findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ T.pack relpath <> " missing in reference file" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> throwError $ PandocSomeError $ - T.pack relpath <> " corrupt in reference file" - Just d -> return d + Just e -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of + Left msg -> + throwError $ PandocXMLError (T.pack relpath) msg + Right d -> return d -- Copied from Util diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 8554db622..cd092969b 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) getPresentationSize refArchive distArchive = do entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` findEntryByPath "ppt/presentation.xml" distArchive - presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + presElement <- either (const Nothing) return $ + parseXMLElement $ UTF8.toTextLazy $ fromEntry entry let ns = elemToNameSpaces presElement sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs new file mode 100644 index 000000000..8ad22a66a --- /dev/null +++ b/src/Text/Pandoc/XMLParser.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XMLParser + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Bridge to allow using xml-conduit's parser with xml-light's types. +-} +module Text.Pandoc.XMLParser + ( parseXMLElement + , parseXMLContents + , module Text.XML.Light.Types + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import qualified Text.XML.Light as Light +import Text.XML.Light.Types +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Map as M +import Data.Maybe (mapMaybe) + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Light.Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Light.Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("" <> t <> "") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Light.Element +elementToElement (Conduit.Element name attribMap nodes) = + Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing | "xmlns:" `T.isPrefixOf` localName -> + Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) + (Just "xmlns") + _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) + (T.unpack <$> mbpref) + +nodeToContent :: Conduit.Node -> Maybe Light.Content +nodeToContent (Conduit.NodeElement el) = + Just (Light.Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) +nodeToContent _ = Nothing + diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index 525499c86..a9c9a0586 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -88,6 +88,7 @@ tests = [ testGroup "inline code" "

\n\ \ \n\ \ \n\ + \ \n\ \

" =?> para (math "\\sigma|_{\\{x\\}}") , test jats "math ml only" $ diff --git a/test/command/5321.md b/test/command/5321.md index 081abe2a0..83404632a 100644 --- a/test/command/5321.md +++ b/test/command/5321.md @@ -4,7 +4,7 @@

bar

- + ^D [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]] @@ -17,7 +17,7 @@ foo

bar

- + ^D [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]] diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 02568d8de..5717d78d0 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -1,6 +1,11 @@ +"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" +[ + + + +]>
Pandoc Test Suite diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index 3e1bf16e7..d3b16d0f2 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 66f055063..6293ef493 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index fb3a02a0a..4205a1516 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index bc6c2702a..adb3f23db 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 8c555a5bd..92c8137fe 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index 5f96cc911..f53470617 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index c21b3a5b3..d6af90a72 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 92ce144e9..e18dbe853 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index d21b67309..820299043 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index 3558a47bf..ae0f41d12 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 606df92a3..94cd35dfa 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 759269cac..879f2a25b 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index c37777080..93f86478f 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 9450b1a73..967d297f2 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 6f0b830e6..c5614e2fa 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index e53889cfb..0f39a831f 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 5dbe298b7..07046f223 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 194181288..3656618e6 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 6e0b634f7..8798253d5 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 477178e77..0a24d1840 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 51110356e..52bb7a217 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index b6206cdf5..182c06c64 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 07b576080..7b69a56a3 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index d46095eb7..3d3a35701 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 7caba4e93..5ae37b406 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 6aaa6da61..c29aa6716 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 5746c5ad0..664493246 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 5f22dccc6..b6d15340e 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index ab5c4f56d..f8e1092d2 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 085f33162..b4cda82f2 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 1ac86d5c8..ee222efa0 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c2c443b19..c6f8d9c96 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 5ea18d32e..ea8146690 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ diff --git a/test/jats-reader.native b/test/jats-reader.native index ab77dd1a0..0715ea8cc 100644 --- a/test/jats-reader.native +++ b/test/jats-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")] diff --git a/test/jats-reader.xml b/test/jats-reader.xml index f75b3e95a..f33cb9ab3 100644 --- a/test/jats-reader.xml +++ b/test/jats-reader.xml @@ -20,6 +20,7 @@ MacFarlane John + Anonymous diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx index aa9b7692a..58070eb3f 100644 Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx index 9aaef4cb5..db9b7e371 100644 Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx index 1737ec757..c7b1ed7d5 100644 Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx index 87fb560ef..6944d92bf 100644 Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx index 961c31020..ae0d28429 100644 Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx index 894738ef7..37c74c69a 100644 Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx index 188e8d826..324e443a1 100644 Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx index 253e8c0a7..c81b983e3 100644 Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx index e230420d2..30ce33db6 100644 Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx index 49384fd65..d6c604968 100644 Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx index cdf1be4ad..000e17ecd 100644 Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx index c4fcbad45..fdcd2e29b 100644 Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx index 4a13b5b7f..e73126376 100644 Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx index 7a6e9700e..e3f968e9e 100644 Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx index 926c8ff3f..eadb9372e 100644 Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx index 16f48e182..8ca6bab2b 100644 Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx index f47b17a74..ae188ee68 100644 Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx index 88109a95e..60301fa50 100644 Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx index 84020708f..17124a50d 100644 Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx index a2f77e945..19ae7dd4e 100644 Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx index 48bf7bc8a..b650b7585 100644 Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx index 23b134a5f..0ab029614 100644 Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx index d6eebeffb..2a6e35080 100644 Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx index a6c76a187..a7bcf6a4b 100644 Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx index 1fbde815b..21b018c25 100644 Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx index cb3af4aa1..4ec4772a4 100644 Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx index dff386885..5983657b6 100644 Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx index 43b125f5e..dd54c7082 100644 Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx index 3314a1c65..b3e5ed5b9 100644 Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx index 27a136838..1078854bb 100644 Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx index 7aa3b6a87..5116c6c4e 100644 Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx index d43709ca7..0c8e49bd9 100644 Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx index 793ea10f6..68695939d 100644 Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx index 2f4d3b820..7ed9b946d 100644 Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx index 94a221398..79fc82345 100644 Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx index 22040c88c..9f943c279 100644 Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx index 18477380b..ac72d8ced 100644 Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx index 4b9d0ba4d..15c7b5469 100644 Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx index 1c5b54185..926c5e699 100644 Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx index 1314f4de4..a37e72d2c 100644 Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx index 9018be36e..7f86533fe 100644 Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx index 35e93af67..89e3db0ab 100644 Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ -- cgit v1.2.3 From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Feb 2021 22:29:21 -0800 Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light... ..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms | --- .hlint.yaml | 1 + pandoc.cabal | 2 +- src/Text/Pandoc/ImageSize.hs | 9 +- src/Text/Pandoc/Readers/DocBook.hs | 76 +-- src/Text/Pandoc/Readers/Docx/Parse.hs | 163 +++--- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 31 +- src/Text/Pandoc/Readers/Docx/Util.hs | 27 +- src/Text/Pandoc/Readers/EPUB.hs | 65 +-- src/Text/Pandoc/Readers/FB2.hs | 93 ++-- src/Text/Pandoc/Readers/JATS.hs | 58 +- src/Text/Pandoc/Readers/OPML.hs | 29 +- src/Text/Pandoc/Readers/Odt.hs | 5 +- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 13 +- src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 3 +- src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 33 +- .../Pandoc/Readers/Odt/Generic/XMLConverter.hs | 23 +- src/Text/Pandoc/Readers/Odt/Namespaces.hs | 11 +- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 23 +- src/Text/Pandoc/Writers/Docx.hs | 263 +++++---- src/Text/Pandoc/Writers/EPUB.hs | 356 ++++++------- src/Text/Pandoc/Writers/FB2.hs | 127 ++--- src/Text/Pandoc/Writers/ODT.hs | 16 +- src/Text/Pandoc/Writers/OOXML.hs | 35 +- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 201 +++---- src/Text/Pandoc/XML/Light.hs | 586 +++++++++++++++++++++ src/Text/Pandoc/XMLParser.hs | 66 --- test/Tests/Writers/OOXML.hs | 3 +- test/docx/golden/block_quotes.docx | Bin 10098 -> 10071 bytes test/docx/golden/codeblock.docx | Bin 9950 -> 9920 bytes test/docx/golden/comments.docx | Bin 10285 -> 10258 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10048 -> 10021 bytes test/docx/golden/custom_style_preserve.docx | Bin 10673 -> 10650 bytes test/docx/golden/custom_style_reference.docx | Bin 12434 -> 12403 bytes test/docx/golden/definition_list.docx | Bin 9947 -> 9920 bytes .../golden/document-properties-short-desc.docx | Bin 9953 -> 9925 bytes test/docx/golden/document-properties.docx | Bin 10429 -> 10404 bytes test/docx/golden/headers.docx | Bin 10086 -> 10059 bytes test/docx/golden/image.docx | Bin 26764 -> 26736 bytes test/docx/golden/inline_code.docx | Bin 9886 -> 9859 bytes test/docx/golden/inline_formatting.docx | Bin 10066 -> 10038 bytes test/docx/golden/inline_images.docx | Bin 26822 -> 26793 bytes test/docx/golden/link_in_notes.docx | Bin 10107 -> 10081 bytes test/docx/golden/links.docx | Bin 10282 -> 10251 bytes test/docx/golden/lists.docx | Bin 10358 -> 10332 bytes test/docx/golden/lists_continuing.docx | Bin 10149 -> 10123 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10238 -> 10210 bytes test/docx/golden/lists_restarting.docx | Bin 10150 -> 10122 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10245 -> 10216 bytes test/docx/golden/notes.docx | Bin 10052 -> 10028 bytes test/docx/golden/raw-blocks.docx | Bin 9986 -> 9960 bytes test/docx/golden/raw-bookmarks.docx | Bin 10121 -> 10094 bytes test/docx/golden/table_one_row.docx | Bin 9938 -> 9908 bytes test/docx/golden/table_with_list_cell.docx | Bin 10255 -> 10227 bytes test/docx/golden/tables.docx | Bin 10272 -> 10244 bytes test/docx/golden/track_changes_deletion.docx | Bin 9930 -> 9903 bytes test/docx/golden/track_changes_insertion.docx | Bin 9913 -> 9886 bytes test/docx/golden/track_changes_move.docx | Bin 9947 -> 9920 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10059 -> 10032 bytes test/docx/golden/unicode.docx | Bin 9871 -> 9845 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9919 -> 9892 bytes test/pptx/code-custom.pptx | Bin 28221 -> 28184 bytes test/pptx/code-custom_templated.pptx | Bin 395516 -> 395477 bytes test/pptx/code.pptx | Bin 28220 -> 28183 bytes test/pptx/code_templated.pptx | Bin 395514 -> 395477 bytes test/pptx/document-properties-short-desc.pptx | Bin 27004 -> 26967 bytes .../document-properties-short-desc_templated.pptx | Bin 394288 -> 394253 bytes test/pptx/document-properties.pptx | Bin 27408 -> 27375 bytes test/pptx/document-properties_templated.pptx | Bin 394691 -> 394656 bytes test/pptx/endnotes.pptx | Bin 26962 -> 26928 bytes test/pptx/endnotes_templated.pptx | Bin 394253 -> 394219 bytes test/pptx/endnotes_toc.pptx | Bin 27789 -> 27747 bytes test/pptx/endnotes_toc_templated.pptx | Bin 395083 -> 395041 bytes test/pptx/images.pptx | Bin 44619 -> 44579 bytes test/pptx/images_templated.pptx | Bin 411909 -> 411870 bytes test/pptx/inline_formatting.pptx | Bin 26148 -> 26121 bytes test/pptx/inline_formatting_templated.pptx | Bin 393438 -> 393412 bytes test/pptx/lists.pptx | Bin 27049 -> 27015 bytes test/pptx/lists_templated.pptx | Bin 394340 -> 394307 bytes test/pptx/raw_ooxml.pptx | Bin 26940 -> 26908 bytes test/pptx/raw_ooxml_templated.pptx | Bin 394231 -> 394198 bytes test/pptx/remove_empty_slides.pptx | Bin 44065 -> 44025 bytes test/pptx/remove_empty_slides_templated.pptx | Bin 411352 -> 411311 bytes test/pptx/slide_breaks.pptx | Bin 28575 -> 28531 bytes test/pptx/slide_breaks_slide_level_1.pptx | Bin 27744 -> 27705 bytes .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes test/pptx/slide_breaks_templated.pptx | Bin 395868 -> 395825 bytes test/pptx/slide_breaks_toc.pptx | Bin 29532 -> 29481 bytes test/pptx/slide_breaks_toc_templated.pptx | Bin 396826 -> 396776 bytes test/pptx/speaker_notes.pptx | Bin 35436 -> 35360 bytes test/pptx/speaker_notes_after_metadata.pptx | Bin 31675 -> 31636 bytes .../speaker_notes_after_metadata_templated.pptx | Bin 398955 -> 398915 bytes test/pptx/speaker_notes_afterheader.pptx | Bin 30691 -> 30657 bytes test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes test/pptx/speaker_notes_afterseps.pptx | Bin 51604 -> 51548 bytes test/pptx/speaker_notes_afterseps_templated.pptx | Bin 418896 -> 418834 bytes test/pptx/speaker_notes_templated.pptx | Bin 402728 -> 402650 bytes test/pptx/start_numbering_at.pptx | Bin 27023 -> 26991 bytes test/pptx/start_numbering_at_templated.pptx | Bin 394314 -> 394283 bytes test/pptx/tables.pptx | Bin 27566 -> 27532 bytes test/pptx/tables_templated.pptx | Bin 394859 -> 394827 bytes test/pptx/two_column.pptx | Bin 26065 -> 26038 bytes test/pptx/two_column_templated.pptx | Bin 393355 -> 393327 bytes 102 files changed, 1388 insertions(+), 930 deletions(-) create mode 100644 src/Text/Pandoc/XML/Light.hs delete mode 100644 src/Text/Pandoc/XMLParser.hs (limited to 'test/docx/golden') diff --git a/.hlint.yaml b/.hlint.yaml index d5ebffd34..e482b2b37 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -14,6 +14,7 @@ - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained - ignore: {name: "Use &&&"} - ignore: {name: "Use String"} +- ignore: {name: "Use camelCase"} - ignore: {name: "Use fmap"} # specific for GHC 7.8 compat - ignore: {name: "Use isDigit"} diff --git a/pandoc.cabal b/pandoc.cabal index 3c7063f6c..22aebd55e 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -687,7 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, - Text.Pandoc.XMLParser, + Text.Pandoc.XML.Light, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e0a1af8e8..bb1aa6351 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -44,8 +44,7 @@ import Numeric (showFFloat) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Text.XML.Light as Xml -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light hiding (Attr) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE @@ -332,12 +331,12 @@ svgSize opts img = do doc <- either (const mzero) return $ parseXMLElement $ TL.fromStrict $ UTF8.toText img let viewboxSize = do - vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc - [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) + vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc + [_,_,w,h] <- mapM safeRead (T.words vb) return (w,h) let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do - dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack + dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim return $ inPixel opts dim w <- dirToInt "width" <|> (fst <$> viewboxSize) h <- dirToInt "height" <|> (snd <$> viewboxSize) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ad0108843..e201b54fe 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -31,8 +31,7 @@ import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light {- @@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> + convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function named :: Text -> Element -> Bool -named s e = qName (elName e) == T.unpack s +named s e = qName (elName e) == s -- @@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blockTags isBlockElement _ = False -blockTags :: [String] +blockTags :: [Text] blockTags = [ "abstract" , "ackno" @@ -721,7 +721,7 @@ blockTags = , "variablelist" ] ++ admonitionTags -admonitionTags :: [String] +admonitionTags :: [Text] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters @@ -779,10 +779,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -837,7 +837,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l | l `elem` admonitionTags -> parseAdmonition $ T.pack l + l | l `elem` admonitionTags -> parseAdmonition l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -899,7 +899,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then " qn <> "?>" else qn @@ -911,7 +911,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ T.pack $ strContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -965,7 +965,7 @@ parseBlock (Elem e) = w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') (T.pack w) + || x == '.') w if n > 0 then Just n else Nothing let numrows = case bodyrows of [] -> 0 @@ -1048,12 +1048,12 @@ parseMixed container conts = do x <- parseMixed container rs return $ p <> b <> x -parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell] +parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell] parseRow cn = do let isEntry x = named "entry" x || named "td" x || named "th" x mapM (parseEntry cn) . filterChildren isEntry -parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell +parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell parseEntry cn el = do let colDistance sa ea = do let iStrt = elemIndex sa cn @@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines getInlines e' = trimInlines . mconcat <$> mapM parseInline (elContent e') -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s +parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref + return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref) parseInline (Elem e) = case qName (elName e) of "anchor" -> do @@ -1138,7 +1138,7 @@ parseInline (Elem e) = "userinput" -> codeWithLang "systemitem" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ T.pack $ strContent e) + "keycap" -> return (str $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -1150,17 +1150,17 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" (T.pack . strContent) + endterm -> maybe "???" strContent (findElementById endterm content) return $ link ("#" <> linkend) "" (text title) - "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" - $ str $ T.pack $ strContent e - "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e + "email" -> return $ link ("mailto:" <> strContent e) "" + $ str $ strContent e + "uri" -> return $ link (strContent e) "" $ str $ strContent e "ulink" -> innerInlines (link (attrValue "url" e) "") "link" -> do ils <- innerInlines id let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) @@ -1180,7 +1180,7 @@ parseInline (Elem e) = "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then " qn <> "?>" else qn @@ -1193,7 +1193,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1234,10 +1234,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> T.pack $ qName (elName el) ++ "_title" + _ -> qName (elName el) <> "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" (T.pack . strContent) + descendantContent name = maybe "???" strContent . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1258,7 +1258,7 @@ equation e constructor = mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . T.pack . showElement) + (readMathML . showElement) latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") @@ -1272,8 +1272,8 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. showVerbatimCData :: Content -> Text -showVerbatimCData (Text (CData _ d _)) = T.pack d -showVerbatimCData c = T.pack $ showContent c +showVerbatimCData (Text (CData _ d _)) = d +showVerbatimCData c = showContent c -- | Set the prefix of a name to 'Nothing' diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 056dab6c2..c76f3c171 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M import qualified Data.Text as T +import Data.Text (Text) import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) -import Text.XML.Light -import qualified Text.XML.Light.Cursor as XMLC -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -128,37 +127,23 @@ mapD f xs = in concatMapM handler xs -unwrap :: NameSpaces -> Content -> [Content] -unwrap ns (Elem element) +unwrapElement :: NameSpaces -> Element -> [Element] +unwrapElement ns element | isElem ns "w" "sdt" element , Just sdtContent <- findChildByName ns "w" "sdtContent" element - = concatMap (unwrap ns . Elem) (elChildren sdtContent) + = concatMap (unwrapElement ns) (elChildren sdtContent) | isElem ns "w" "smartTag" element - = concatMap (unwrap ns . Elem) (elChildren element) -unwrap _ content = [content] + = concatMap (unwrapElement ns) (elChildren element) + | otherwise + = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }] -unwrapChild :: NameSpaces -> Content -> Content -unwrapChild ns (Elem element) = - Elem $ element { elContent = concatMap (unwrap ns) (elContent element) } -unwrapChild _ content = content +unwrapContent :: NameSpaces -> Content -> [Content] +unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element +unwrapContent _ content = [content] -walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor -walkDocument' ns cur = - let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur - in - case XMLC.nextDF modifiedCur of - Just cur' -> walkDocument' ns cur' - Nothing -> XMLC.root modifiedCur - -walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument :: NameSpaces -> Element -> Element walkDocument ns element = - let cur = XMLC.fromContent (Elem element) - cur' = walkDocument' ns cur - in - case XMLC.toTree cur' of - Elem element' -> Just element' - _ -> Nothing - + element{ elContent = concatMap (unwrapContent ns) (elContent element) } newtype Docx = Docx Document deriving Show @@ -361,9 +346,9 @@ getDocumentXmlPath zf = do fp <- findAttr (QName "Target" Nothing Nothing) rel -- sometimes there will be a leading slash, which windows seems to -- have trouble with. - return $ case fp of + return $ case T.unpack fp of '/' : fp' -> fp' - _ -> fp + fp' -> fp' archiveToDocument :: Archive -> D Document archiveToDocument zf = do @@ -372,7 +357,7 @@ archiveToDocument zf = do docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) + let bodyElem' = walkDocument namespaces bodyElem body <- elemToBody namespaces bodyElem' return $ Document namespaces body @@ -414,8 +399,8 @@ archiveToNotes zf = fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote" - en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote" + fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns + en = enElem >>= elemToNotes ns "endnote" . walkDocument ns in Notes ns fn en @@ -424,7 +409,8 @@ archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem - cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) + cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$> + cmtsElem in case cmts of Just c -> Comments cmts_namespaces c @@ -443,8 +429,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttrText (QName "Id" Nothing Nothing) element - target <- findAttrText (QName "Target" Nothing Nothing) element + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger lvl = findChildByName ns "w" "lvl" element >>= levelElemToLevel ns return $ LevelOverride ilvl startOverride lvl @@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrTextByName ns "w" "numId" element + numId <- findAttrByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrTextByName ns "w" "abstractNumId" element + absNumId <- findAttrByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger return (Level ilvl fmt txt start) levelElemToLevel _ _ = Nothing @@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) +elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger , hangingParIndent = findAttrByName ns "w" "hanging" element >>= - stringToInteger} + stringToInteger } elemToParIndentation _ _ = Nothing -testBitMask :: String -> Int -> Bool +testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of [] -> False ((n', _) : _) -> (n' .|. n) /= 0 @@ -642,7 +628,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ T.pack $ showElement c + expsLst <- eitherToD $ readOMML $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -666,7 +652,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -718,7 +704,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrTextByName ns "r" "embed" + >>= findAttrByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -728,7 +714,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrTextByName ns "r" "id" + >>= findAttrByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -797,7 +783,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText + info <- eitherToD $ parseFieldInfo $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -818,48 +804,48 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrTextByName ns "w" "id" element - , Just bmName <- findAttrTextByName ns "w" "name" element = + , Just bmId <- findAttrByName ns "w" "id" element + , Just bmName <- findAttrByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrTextByName ns "r" "id" element = do + , Just relId <- findAttrByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrTextByName ns "w" "anchor" element of + case findAttrByName ns "w" "anchor" element of Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrTextByName ns "w" "anchor" element = do + , Just anchor <- findAttrByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = do + , Just cmtId <- findAttrByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = + , Just cmtId <- findAttrByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrTextByName ns "w" "id" element - , Just cmtAuthor <- findAttrTextByName ns "w" "author" element - , cmtDate <- findAttrTextByName ns "w" "date" element = do + , Just cmtId <- findAttrByName ns "w" "id" element + , Just cmtAuthor <- findAttrByName ns "w" "author" element + , cmtDate <- findAttrByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem @@ -878,7 +864,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack + >>= findAttr (QName at Nothing Nothing) >>= safeRead childElemToRun :: NameSpaces -> Element -> D Run @@ -889,7 +875,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -902,7 +888,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrTextByName ns "w" "id" element = do + , Just fnId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -910,7 +896,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrTextByName ns "w" "id" element = do + , Just enId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing @@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrTextByName ns "w" "val") + (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1012,7 +998,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrTextByName ns "w" "val" >>= + findAttrByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1022,7 +1008,7 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = T.pack $ strContent element + let str = strContent element font <- asks envFont case font of Nothing -> return $ TextRun str @@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = - case readLitChar ("\\x" ++ s) of + case readLitChar ("\\x" ++ T.unpack s) of [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element - lowerFromPrivate ('F':xs) = '0':xs - lowerFromPrivate xs = xs + getFont = textToFont =<< findAttrByName ns "w" "font" element + lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t + | otherwise = t getSymChar _ _ = TextRun "" elemToRunElems :: NameSpaces -> Element -> D [RunElem] @@ -1061,8 +1047,9 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - textToFont . T.pack =<< - foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] + textToFont =<< + foldr ((<|>) . (flip findAttr fontElem . qualName)) + Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index edade8654..0d7271d6a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -48,12 +48,13 @@ import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.Text.Read +import Data.Text (Text) import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool , isRTL :: Maybe Bool , isForceCTL :: Maybe Bool , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String + , rUnderline :: Maybe Text , rParentStyle :: Maybe CharStyle } deriving Show @@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle | otherwise = False class HasStyleId a => ElemToStyle a where - cStyleType :: Maybe a -> String + cStyleType :: Maybe a -> Text elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a class FromStyleId (StyleId a) => HasStyleId a where @@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle = stys -> stys ++ concatMap (buildBasedOnList ns element . Just) stys -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) +stringToInteger :: Text -> Maybe Integer +stringToInteger s = case Data.Text.Read.decimal s of + Right (x,_) -> Just x + Left _ -> Nothing checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag @@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> Just (elemToRunStyle ns element parentStyle) @@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger . T.unpack =<< + , Just n <- stringToInteger =<< (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) @@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") - <|> findAttrTextByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do @@ -298,15 +301,15 @@ getNumInfo ns element = do findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrTextByName ns "w" "val") + findAttrByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrTextByName ns "w" "styleId" element + | Just styleId <- findAttrByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f9c9a8e26..21df03d9e 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.StyleMaps Copyright : © 2014-2020 Jesse Rosenthal , @@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName - , findAttrText , findAttrByName - , findAttrTextByName ) where import Data.Maybe (mapMaybe) import qualified Data.Text as T -import Text.XML.Light +import Data.Text (Text) +import Text.Pandoc.XML.Light -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element +findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element findChildByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChild (elemName ns' pref name) el -findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element] +findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element] findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText x = fmap T.pack . findAttr x - -findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String +findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el -findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text -findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 369c4f0c9..eb8d2405d 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf) import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (mapMaybe) import qualified Data.Text.Lazy as TL @@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) +import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow) import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -type Items = M.Map String (FilePath, MimeType) +type Items = M.Map Text (FilePath, MimeType) readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc readEPUB opts bytes = case toArchiveOrFail bytes of @@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] -type CoverId = String +type CoverId = Text type CoverImage = FilePath -parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) +parseManifest :: (PandocMonad m) + => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) parseManifest content coverId = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest r <- mapM parseItem items let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover `mplus` coverId, M.fromList r) + return (T.unpack <$> (cover `mplus` coverId), M.fromList r) where - findCover e = maybe False (isInfixOf "cover-image") + findCover e = maybe False (T.isInfixOf "cover-image") (findAttr (emptyName "properties") e) || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e) parseItem e = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, T.pack mime)) + return (uid, (T.unpack href, mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -173,11 +173,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta + addMetaField (renameMeta field) (B.str $ strContent e) meta -renameMeta :: String -> T.Text +renameMeta :: Text -> Text renameMeta "creator" = "author" -renameMeta s = T.pack s +renameMeta s = s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -187,7 +187,7 @@ getManifest archive = do ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) (findElementE (QName "rootfile" (Just ns) Nothing) docElem) - manifestFile <- mkE "Root not found" (lookup "full-path" as) + manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as) let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive @@ -201,7 +201,8 @@ fixInternalReferences pathToFile = . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = + splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = @@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [T.Text] -> Inline -> Inline +prependHash :: [Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) | or [s `T.isPrefixOf` url | s <- ps] = Link attr is ("#" <> url, tit) @@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = + (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> T.Text -> T.Text +addHash :: FilePath -> Text -> Text addHash _ "" = "" addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] +removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr :: (Text, a) -> Bool isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c -- Utility -stripNamespace :: QName -> String +stripNamespace :: QName -> Text stripNamespace (QName v _ _) = v -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val) attrToNSPair _ = Nothing -attrToPair :: Attr -> (String, String) +attrToPair :: Attr -> (Text, Text) attrToPair (Attr (QName name _ _) val) = (name, val) -defaultNameSpace :: Maybe String +defaultNameSpace :: Maybe Text defaultNameSpace = Just "http://www.idpf.org/2007/opf" -dfName :: String -> QName +dfName :: Text -> QName dfName s = QName s defaultNameSpace Nothing -emptyName :: String -> QName +emptyName :: Text -> QName emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: PandocMonad m => QName -> Element -> m String +findAttrE :: PandocMonad m => QName -> Element -> m Text findAttrE q e = mkE "findAttr" $ findAttr q e findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = - mkE ("No entry on path: " ++ path) $ findEntryByPath path a + mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a parseXMLDocE :: PandocMonad m => Entry -> m Element parseXMLDocE entry = @@ -293,7 +295,8 @@ parseXMLDocE entry = fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element -findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x +findElementE e x = + mkE ("Unable to find element: " <> tshow e) $ findElement e x -mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ T.pack s) return +mkE :: PandocMonad m => Text -> Maybe a -> m a +mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b804eab4f..66e390bd7 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -25,7 +25,6 @@ TODO: module Text.Pandoc.Readers.FB2 ( readFB2 ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy import Data.Functor import Data.List (intersperse) @@ -42,8 +41,8 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light +import qualified Text.Pandoc.UTF8 as UTF8 type FB2 m = StateT FB2State m @@ -85,12 +84,12 @@ removeHash t = case T.uncons t of Just ('#', xs) -> xs _ -> t -convertEntity :: String -> Text -convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e) parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case T.pack $ qName $ elName e of + case qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -98,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ T.pack $ cdData x +parseInline (Text x) = pure $ text $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case T.pack $ qName $ elName e of + case qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -146,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -158,7 +157,7 @@ parseNote e = -- | Parse a child of @\@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -170,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -184,7 +183,7 @@ parseDescriptionChild e = -- | Parse a child of @\@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -198,7 +197,10 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> + insertMedia (T.unpack filename) contentType + (decodeLenient + (UTF8.fromTextLazy . TL.fromStrict . strContent $ e)) -- * Type parsers @@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case T.pack $ qName $ elName e of - "first-name" -> pure $ Just $ T.pack $ strContent e - "middle-name" -> pure $ Just $ T.pack $ strContent e - "last-name" -> pure $ Just $ T.pack $ strContent e - "nickname" -> pure $ Just $ T.pack $ strContent e - "home-page" -> pure $ Just $ T.pack $ strContent e - "email" -> pure $ Just $ T.pack $ strContent e + case qName $ elName e of + "first-name" -> pure $ Just $ strContent e + "middle-name" -> pure $ Just $ strContent e + "last-name" -> pure $ Just $ strContent e + "nickname" -> pure $ Just $ strContent e + "home-page" -> pure $ Just $ strContent e + "email" -> pure $ Just $ strContent e name -> do report $ IgnoredElement $ name <> " in author" pure Nothing @@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - title = maybe "" T.pack $ findAttr (unqual "title") e - imgId = maybe "" T.pack $ findAttr (unqual "id") e + where alt = maybe mempty str $ findAttr (unqual "alt") e + title = fromMaybe "" $ findAttr (unqual "title") e + imgId = fromMaybe "" $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ T.pack $ strContent e + "date" -> pure $ para $ text $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -292,7 +294,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -302,11 +304,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = maybe "" T.pack $ findAttr (unqual "id") e + where divId = fromMaybe "" $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = maybe "" T.pack $ findAttr (unqual "id") e + let sectionId = fromMaybe "" $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [T.pack name], lang) content + Just name -> pure $ spanWith ("", [name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case T.pack $ qName (elName e) of + case qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement $ name <> " in style" @@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case T.pack $ qName (elName e) of + case qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," - $ T.pack $ strContent e)) - "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) + "date" -> modify (setMeta "date" (text $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () @@ -441,7 +442,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -454,5 +455,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty str $ findAttr (unqual "alt") e + href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index dfd343b7a..5353f2001 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict import Control.Monad.Except (throwError) import Text.Pandoc.Error (PandocError(..)) -import Data.Char (isDigit, isSpace, toUpper) +import Data.Char (isDigit, isSpace) import Data.Default import Data.Generics import Data.List (foldl', intersperse) @@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe Text +maybeAttrValue :: Text -> Element -> Maybe Text maybeAttrValue attr elt = - T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function -named :: String -> Element -> Bool +named :: Text -> Element -> Bool named s e = qName (elName e) == s -- @@ -155,10 +154,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -207,7 +206,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ textContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -271,7 +270,7 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = do - w <- findAttrText (unqual "colwidth") c + w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w if n > 0 then Just n else Nothing let numrows = foldl' max 0 $ map length bodyrows @@ -442,16 +441,10 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation -findAttrText :: QName -> Element -> Maybe Text -findAttrText x = fmap T.pack . findAttr x - textContent :: Element -> Text -textContent = T.pack . strContent - -textContentRecursive :: Element -> Text -textContentRecursive = T.pack . strContentRecursive +textContent = strContent -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s -parseInline (CRef ref) = - return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref +parseInline (Text (CData _ s _)) = return $ text s +parseInline (CRef ref) = return . text . convertEntity $ ref parseInline (Elem e) = case qName (elName e) of "italic" -> innerInlines emph @@ -507,9 +499,9 @@ parseInline (Elem e) = else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines id - let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) @@ -529,7 +521,7 @@ parseInline (Elem e) = where innerInlines f = extractSpaces f . mconcat <$> mapM parseInline (elContent e) mathML x = - case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of + case readMathML . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do @@ -547,4 +539,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index bdadc4dd9..184d5a63f 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document. module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State.Strict -import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) @@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e)) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) - -textContent :: Element -> Text -textContent = T.pack . strContent + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return @@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ textContent e]}) + st{opmlDocAuthors = [text $ strContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ textContent e}) + st{opmlDocDate = text $ strContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ textContent e}) + st{opmlDocTitle = text $ strContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 85308deb1..c274b6fd4 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -14,8 +14,7 @@ Entry point to the odt reader. module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip -import qualified Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import qualified Data.ByteString.Lazy as B @@ -91,7 +90,7 @@ archiveToOdt archive = do -- -entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem :: Entry -> Either PandocError Element entryToXmlElem entry = case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of Right x -> Right x diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 43c44e7e9..df90880fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -29,14 +29,14 @@ import Control.Monad ((<=<)) import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, stripPrefix) +import Data.List (find) import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) import Text.TeXMath (readMathML, writeTeX) -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Builder hiding (underline) import Text.Pandoc.MediaBag (MediaBag, insertMedia) @@ -557,7 +557,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover >>?% mappend -- extractText :: XML.Content -> Fallible T.Text - extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) + extractText (XML.Text cData) = succeedWith (XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -777,14 +777,14 @@ read_frame_img = "" -> returnV mempty -< () src' -> do let exts = extensionsFromList [Ext_auto_identifiers] - resource <- lookupResource -< src' + resource <- lookupResource -< T.unpack src' _ <- updateMediaWithResource -< resource w <- findAttrText' NsSVG "width" -< () h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) @@ -804,7 +804,8 @@ read_frame_mathml = case fold src of "" -> returnV mempty -< () src' -> do - let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" + let path = T.unpack $ + fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" (_, mathml) <- lookupResource -< path case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 77174c793..78a7fc0b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces. module Text.Pandoc.Readers.Odt.Generic.Namespaces where import qualified Data.Map as M +import Data.Text (Text) -- -type NameSpaceIRI = String +type NameSpaceIRI = Text -- type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6dc56a0d9..edefe3c70 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , reverseComposition , tryToRead , Lookupable(..) -, readLookupables , readLookupable , readPercent , findBy @@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils import Control.Category (Category, (<<<), (>>>)) import qualified Control.Category as Cat (id) -import Control.Monad (msum) - +import Data.Char (isSpace) import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe - +import Data.Text (Text) +import qualified Data.Text as T -- | Equivalent to -- > foldr (.) id @@ -76,8 +75,8 @@ swing = flip.(.flip id) -- (nobody wants that) while the latter returns "to much" for simple purposes. -- This function instead applies 'reads' and returns the first match (if any) -- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst -- | A version of 'reads' that requires a '%' sign after the number readPercent :: ReadS Int @@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s -- | Data that can be looked up. -- This is mostly a utility to read data with kind *. class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - a <- maybeToList (lookup word lookupTable) - ] + lookupTable :: [(Text, a)] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 00c636a0d..0d921e23b 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} @@ -60,11 +61,11 @@ import Control.Arrow import Data.Bool ( bool ) import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Text as T +import Data.Text (Text) import Data.Default import Data.Maybe -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible -------------------------------------------------------------------------------- -- -type ElementName = String -type AttributeName = String -type AttributeValue = String -type TextAttributeValue = T.Text +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text -- -type NameSpacePrefix = String +type NameSpacePrefix = Text -- type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix @@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) lookupDefaultingAttr nsID attrName = lookupAttrWithDefault nsID attrName def --- | Return value as a (Maybe String) +-- | Return value as a (Maybe Text) findAttr' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe AttributeValue) @@ -477,7 +478,6 @@ findAttrText' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr - >>^ fmap T.pack -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID) -> FallibleXMLConverter nsID extraState x TextAttributeValue findAttrText nsID attrName = findAttr' nsID attrName - >>^ fmap T.pack >>> maybeToChoice -- | Return value as string or return provided default value @@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID) -> XMLConverter nsID extraState x TextAttributeValue findAttrTextWithDefault nsID attrName deflt = findAttr' nsID attrName - >>^ maybe deflt T.pack + >>^ fromMaybe deflt -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) @@ -748,7 +747,7 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool' :: String -> Maybe Bool +stringToBool' :: Text -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False | otherwise = Nothing diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index 3a24a1162..70741c28d 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann @@ -13,10 +14,10 @@ Namespaces used in odt files. module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) ) where -import Data.List (isPrefixOf) import qualified Data.Map as M (empty, insert) import Data.Maybe (fromMaybe, listToMaybe) - +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Odt.Generic.Namespaces @@ -30,7 +31,7 @@ instance NameSpaceID Namespace where findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri] +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] nsIDmap :: NameSpaceIRIs Namespace nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs @@ -54,12 +55,12 @@ data Namespace = -- Open Document core -- Core XML (basically only for the 'id'-attribute) | NsXML -- Fallback - | NsOther String + | NsOther Text deriving ( Eq, Ord, Show ) -- | Not the actual iri's, but large prefixes of them - this way there are -- less versioning problems and the like. -nsIDs :: [(String,Namespace)] +nsIDs :: [(Text, Namespace)] nsIDs = [ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 46a777df1..5e10f896c 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -2,6 +2,7 @@ {-# LANGUAGE Arrows #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.StyleReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -46,11 +47,13 @@ import qualified Data.Foldable as F import Data.List (unfoldr) import qualified Data.Map as M import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T import qualified Data.Set as S -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, tshow) import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -90,7 +93,7 @@ instance Default FontPitch where -- -- Thus, we want -type FontFaceName = String +type FontFaceName = Text type FontPitches = M.Map FontFaceName FontPitch @@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch" -- Definitions of main data -------------------------------------------------------------------------------- -type StyleName = String +type StyleName = Text -- | There are two types of styles: named styles with a style family and an -- optional style parent, and default styles for each style family, @@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} = -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe String - , listItemSuffix :: Maybe String + , listItemPrefix :: Maybe Text + , listItemSuffix :: Maybe Text , listItemFormat :: ListItemNumberFormat , listItemStart :: Int } @@ -366,9 +369,9 @@ instance Show ListLevelStyle where show ListLevelStyle{..} = " listItemPrefix) ++ show listItemFormat - ++ maybeToString listItemSuffix + ++ maybeToString (T.unpack <$> listItemSuffix) ++ ">" where maybeToString = fromMaybe "" @@ -471,7 +474,7 @@ readTextProperties = ) where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] isFontBold = ("normal",False):("bold",True) - :map ((,True).show) ([100,200..900]::[Int]) + :map ((,True) . tshow) ([100,200..900]::[Int]) readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) readUnderlineMode = readLineMode "text-underline-mode" @@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) readStrikeThroughMode = readLineMode "text-line-through-mode" "text-line-through-style" -readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) readLineMode modeAttr styleAttr = proc x -> do isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x mode <- lookupAttr' NsStyle modeAttr -< x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index da990e4d3..89c71d773 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -31,6 +31,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) @@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Printf (printf) import Text.TeXMath -import Text.XML.Light as XML -import Text.XML.Light.Cursor as XMLC import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import Data.Generics (mkT, everywhere) data ListMarker = NoMarker | BulletMarker | NumberMarker ListNumberStyle ListNumberDelim Int deriving (Show, Read, Eq, Ord) -listMarkerToId :: ListMarker -> String +listMarkerToId :: ListMarker -> Text listMarkerToId NoMarker = "990" listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = +listMarkerToId (NumberMarker sty delim n) = T.pack $ '9' : '9' : styNum : delimNum : show n where styNum = case sty of DefaultStyle -> '2' @@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps , envListLevel :: Int , envListNumId :: Int , envInDel :: Bool - , envChangesAuthor :: T.Text - , envChangesDate :: T.Text + , envChangesAuthor :: Text + , envChangesDate :: Text , envPrintWidth :: Integer } @@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty data WriterState = WriterState{ stFootnotes :: [Element] - , stComments :: [([(T.Text, T.Text)], [Inline])] - , stSectionIds :: Set.Set T.Text - , stExternalLinks :: M.Map String String + , stComments :: [([(Text, Text)], [Inline])] + , stSectionIds :: Set.Set Text + , stExternalLinks :: M.Map Text Text , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] , stInsId :: Int @@ -164,18 +165,18 @@ defaultWriterState = WriterState{ type WS m = ReaderT WriterEnv (StateT WriterState m) -renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) + M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es -replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr] replaceAttr f val = map $ \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a -renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element +renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -184,18 +185,12 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text -findAttrTextBy x = fmap T.pack . findAttrBy x - -lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text -lookupAttrTextBy x = fmap T.pack . lookupAttrBy x - -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: T.Text -> T.Text +stripInvalidChars :: Text -> Text stripInvalidChars = T.filter isValidChar -- | See XML reference @@ -234,11 +229,11 @@ writeDocx opts doc = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs + let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs - let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs + let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs + let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -250,24 +245,21 @@ writeDocx opts doc = do -- styles mblang <- toLang $ getLang opts meta + -- TODO FIXME avoid this generic traversal! + -- lang is in w:docDefaults / w:rPr / w:lang let addLang :: Element -> Element - addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $ - XMLC.fromElement e) <$> mblang of - Just (Elem e') -> e' - _ -> e -- return original - where go :: String -> Cursor -> Cursor - go l cursor = case XMLC.findRec (isLangElt . current) cursor of - Nothing -> cursor - Just t -> XMLC.modifyContent (setval l) t - setval :: String -> Content -> Content - setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ - elAttribs e' } - setval _ x = x - setvalattr :: String -> XML.Attr -> XML.Attr - setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l - setvalattr _ x = x - isLangElt (Elem e') = qName (elName e') == "lang" - isLangElt _ = False + addLang = case mblang of + Nothing -> id + Just l -> everywhere (mkT (go (renderLang l))) + where + go :: Text -> Element -> Element + go l e' + | qName (elName e') == "lang" + = e'{ elAttribs = map (setvalattr l) $ elAttribs e' } + | otherwise = e' + + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x let stylepath = "word/styles.xml" styledoc <- addLang <$> parseXml refArchive distArchive stylepath @@ -337,12 +329,13 @@ writeDocx opts doc = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" - [("PartName",part'),("ContentType",contentType')] () + [("PartName", T.pack part') + ,("ContentType", contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = - mkOverrideNode ("/word/" ++ imgpath, - maybe "application/octet-stream" T.unpack mbMimeType) + mkOverrideNode ("/word/" <> imgpath, + fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) + mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -369,13 +362,14 @@ writeDocx opts doc = do ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs ++ - [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] + [ mkMediaOverride (eRelativePath e) + | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), @@ -421,7 +415,7 @@ writeDocx opts doc = do let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -489,10 +483,10 @@ writeDocx opts doc = do numbering <- parseXml refArchive distArchive numpath let newNumElts = mkNumbering (stLists st) let pandocAdded e = - case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrTextBy ((== "numId") . qName) e >>= safeRead of + case findAttrBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -514,7 +508,7 @@ writeDocx opts doc = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: T.Text -> Meta -> T.Text + let lookupMetaString' :: Text -> Meta -> Text lookupMetaString' key' meta' = case key' of "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') @@ -530,21 +524,21 @@ writeDocx opts doc = do : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps -- docProps/custom.xml - let customProperties :: [(String, String)] - customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta) + let customProperties :: [(Text, Text)] + customProperties = [ (k, lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) + ,("pid", tshow pid) ,("name", k)] $ mknode "vt:lpwstr" [] v let customPropsPath = "docProps/custom.xml" let customProps = mknode "Properties" @@ -594,7 +588,8 @@ writeDocx opts doc = do fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $ - mapMaybe extractTarget (headers ++ footers) + mapMaybe (fmap T.unpack . extractTarget) + (headers ++ footers) let miscRelEntries = [ e | e <- zEntries refArchive , "word/_rels/" `isPrefixOf` eRelativePath e , ".xml.rels" `isSuffixOf` eRelativePath e @@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] @@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -643,13 +638,14 @@ styleToOpenXml sm style = toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), - ("w:customStyle","1"),("w:styleId",show toktype)] - [ mknode "w:name" [("w:val",show toktype)] () + ("w:customStyle","1"),("w:styleId", tshow toktype)] + [ mknode "w:name" [("w:val", tshow toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () , mknode "w:rPr" [] $ - [ mknode "w:color" [("w:val",tokCol toktype)] () + [ mknode "w:color" [("w:val", tokCol toktype)] () | tokCol toktype /= "auto" ] ++ - [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + [ mknode "w:shd" [("w:val","clear") + ,("w:fill",tokBg toktype)] () | tokBg toktype /= "auto" ] ++ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ @@ -657,10 +653,10 @@ styleToOpenXml sm style = ] tokStyles = tokenStyles style tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles - tokCol toktype = maybe "auto" (drop 1 . fromColor) + tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style - tokBg toktype = maybe "auto" (drop 1 . fromColor) + tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing @@ -673,10 +669,11 @@ styleToOpenXml sm style = , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () : - maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry +copyChildren :: (PandocMonad m) + => Archive -> Archive -> String -> Integer -> [Text] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do } where strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p++":"++name + | Just p <- prefix = p <> ":" <> name | otherwise = name shouldCopy = (`elem` elNames) . strName cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} @@ -706,35 +703,35 @@ maxListLevel = 8 mkNum :: ListMarker -> Int -> Element mkNum marker numid = - mknode "w:num" [("w:numId",show numid)] + mknode "w:num" [("w:numId",tshow numid)] $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> - map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) + map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))] + $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] mkAbstractNum :: ListMarker -> Integer -> Element mkAbstractNum marker nsid = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = - mknode "w:lvl" [("w:ilvl",show lvl)] $ + mknode "w:lvl" [("w:ilvl",tshow lvl)] $ [ mknode "w:start" [("w:val",start)] () | marker /= NoMarker && marker /= BulletMarker ] ++ [ mknode "w:numFmt" [("w:val",fmt)] () - , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlText" [("w:val", lvltxt)] () , mknode "w:lvlJc" [("w:val","left")] () , mknode "w:pPr" [] - [ mknode "w:ind" [ ("w:left",show $ lvl * step + step) - , ("w:hanging",show (hang :: Int)) + [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step) + , ("w:hanging",tshow (hang :: Int)) ] () ] ] @@ -743,8 +740,8 @@ mkLvl marker lvl = NoMarker -> ("bullet"," ","1") BulletMarker -> ("bullet",bulletFor lvl,"1") NumberMarker st de n -> (styleFor st lvl - ,patternFor de ("%" ++ show (lvl + 1)) - ,show n) + ,patternFor de ("%" <> tshow (lvl + 1)) + ,tshow n) step = 720 hang = 480 bulletFor 0 = "\x2022" -- filled circle @@ -767,9 +764,9 @@ mkLvl marker lvl = styleFor DefaultStyle 5 = "lowerRoman" styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" - patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor OneParen s = s <> ")" + patternFor TwoParens s = "(" <> s <> ")" + patternFor _ s = s <> "." getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists @@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts = do - let depth = "1-"++show (writerTOCDepth opts) - let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + let depth = "1-" <> tshow (writerTOCDepth opts) + let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) return @@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] + mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs] [ mknode "w:p" [] $ map Elem [ mknode "w:pPr" [] @@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do pStyleMap <- gets (smParaStyle . stStyleMaps) let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () -getUniqueId :: (PandocMonad m) => WS m String +getUniqueId :: (PandocMonad m) => WS m Text -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel getUniqueId = do n <- gets stCurId modify $ \st -> st{stCurId = n + 1} - return $ show n + return $ tshow n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: T.Text +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. @@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst) blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ - Text (CData CDataRaw (T.unpack str) Nothing) + Text (CData CDataRaw str Nothing) ] | otherwise = do report $ BlockNotRendered b @@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" - [("w:w", show (floor (textwidth * w) :: Integer))] () + [("w:w", tshow (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers modify $ \s -> s { stInTable = False } -- for compatibility with Word <= 2007, we include a val with a bitmask @@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") ,("w:noVBand","0") - ,("w:val", printf "%04x" tblLookVal) + ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : - [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () + [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths @@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> Text alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do listLevel <- asks envListLevel numid <- asks envListNumId let listPr = [mknode "w:numPr" [] - [ mknode "w:ilvl" [("w:val",show listLevel)] () - , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara] + [ mknode "w:ilvl" [("w:val",tshow listLevel)] () + , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara] return $ case listPr ++ squashProps props of [] -> [] ps -> [mknode "w:pPr" [] ps] @@ -1185,7 +1182,7 @@ withParaPropM md p = do d <- md withParaProp d p -formattedString :: PandocMonad m => T.Text -> WS m [Element] +formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens case splitTextBy (=='\173') str of @@ -1194,7 +1191,7 @@ formattedString str = sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => T.Text -> WS m [Element] +formattedString' :: PandocMonad m => Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel formattedRun [ mktnode (if inDel then "w:delText" else "w:t") @@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) = mknode "w:r" [] (mknode "w:t" [("xml:space","preserve")] - ("\t" :: String))] ++) + ("\t" :: Text))] ++) <$> inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) = inlinesToOpenXML opts ils @@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] + return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) in return . map Elem $ - [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () + [ mknode "w:commentRangeEnd" [("w:id", ident')] () , mknode "w:r" [] [ mknode "w:rPr" [] [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of @@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do defaultAuthor <- asks envChangesAuthor let author = fromMaybe defaultAuthor (lookup "author" kvs) let mdate = lookup "date" kvs - return $ ("w:author", T.unpack author) : - maybe [] (\date -> [("w:date", T.unpack date)]) mdate + return $ ("w:author", author) : + maybe [] (\date -> [("w:date", date)]) mdate insmod <- if "insertion" `elem` classes then do changeAuthorDate <- getChangeAuthorDate @@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [Elem $ mknode "w:ins" - (("w:id", show insId) : changeAuthorDate) x] + (("w:id", tshow insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do @@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do return $ \f -> local (\env->env{envInDel=True}) $ do x <- f return [Elem $ mknode "w:del" - (("w:id", show delId) : changeAuthorDate) x] + (("w:id", tshow delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) = inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) | f == Format "openxml" = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] | otherwise = do report $ InlineNotRendered il return [] @@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do when (mathType == DisplayMath) setFirstPara res <- (lift . lift) (convertMath writeOMML mathType str) case res of - Right r -> return [Elem r] + Right r -> return [Elem $ fromXLElement r] Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do @@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do mknode "w:r" [] [ mknode "w:rPr" [] $ maybeToList (lookup toktype tokTypesMap) - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] + , mknode "w:t" [("xml:space","preserve")] tok ] withTextPropM (rStyleM "Verbatim Char") $ if isNothing (writerHighlightStyle opts) then unhighlighted @@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] + [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup (T.unpack src) extlinks of + id' <- case M.lookup src extlinks of Just i -> return i Nothing -> do - i <- ("rId"++) `fmap` getUniqueId + i <- ("rId" <>) <$> getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert (T.unpack src) i extlinks } + M.insert src i extlinks } return i return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do @@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",T.unpack src),("id","0"),("name","Picture")] () + [("descr",src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () + [ mknode "a:blip" [("r:embed",T.pack ident)] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] xfrm = mknode "a:xfrm" [] [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu) - ,("cy",show yemu)] () ] + , mknode "a:ext" [("cx",tshow xemu) + ,("cy",tshow yemu)] () ] prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () ln = mknode "a:ln" [("w","9525")] @@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt = mknode "w:r" [] $ mknode "w:drawing" [] $ mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", T.unpack $ stringify alt) - , ("title", T.unpack title) + [ ("descr", stringify alt) + , ("title", title) , ("id","1") , ("name","Picture") ] () @@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just imgData -> return [Elem $ generateImgElt imgData] Nothing -> ( do --try (img, mt) <- P.fetchItem src - ident <- ("rId"++) `fmap` getUniqueId + ident <- ("rId" <>) <$> getUniqueId let imgext = case mt >>= extensionFromMimeType of @@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Svg -> ".svg" Just Emf -> ".emf" Nothing -> "" - imgpath = "media/" <> ident <> T.unpack imgext - mbMimeType = mt <|> getMimeType imgpath + imgpath = "media/" <> ident <> imgext + mbMimeType = mt <|> getMimeType (T.unpack imgpath) - imgData = (ident, imgpath, mbMimeType, img) + imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img) if T.null imgext then -- without an extension there is no rule for content type @@ -1538,20 +1535,20 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content] +wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content] wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", T.unpack $ toBookmarkName ident)] () + ,("w:name", toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: T.Text -> T.Text +toBookmarkName :: Text -> Text toBookmarkName s | Just (c, _) <- T.uncons s , isLetter c diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 171ffe582..3f10cb437 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, gets, lift, modify) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.Char (isAlphaNum, isAscii, isDigit) import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) import qualified Data.Set as Set -import qualified Data.Text as TS +import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import System.FilePath (takeExtension, takeFileName, makeRelative) @@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent, tshow) + stringify, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) -import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, - ppElement, showElement, strContent, unode, unqual) -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] , stMediaNextId :: Int - , stEpubSubdir :: String + , stEpubSubdir :: FilePath } type E m = StateT EPUBState m @@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] , epubDate :: [Date] - , epubLanguage :: String + , epubLanguage :: Text , epubCreator :: [Creator] , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubBelongsToCollection :: Maybe String - , epubGroupPosition :: Maybe String - , epubCoverImage :: Maybe String + , epubSubject :: [Text] + , epubDescription :: Maybe Text + , epubType :: Maybe Text + , epubFormat :: Maybe Text + , epubPublisher :: Maybe Text + , epubSource :: Maybe Text + , epubRelation :: Maybe Text + , epubCoverage :: Maybe Text + , epubRights :: Maybe Text + , epubBelongsToCollection :: Maybe Text + , epubGroupPosition :: Maybe Text + , epubCoverImage :: Maybe FilePath , epubStylesheets :: [FilePath] , epubPageDirection :: Maybe ProgressionDirection - , epubIbooksFields :: [(String, String)] - , epubCalibreFields :: [(String, String)] + , epubIbooksFields :: [(Text, Text)] + , epubCalibreFields :: [(Text, Text)] } deriving Show data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: Text + , dateEvent :: Maybe Text } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: Text + , creatorRole :: Maybe Text + , creatorFileAs :: Maybe Text } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: Text + , identifierScheme :: Maybe Text } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: Text + , titleFileAs :: Maybe Text + , titleType :: Maybe Text } deriving Show data ProgressionDirection = LTR | RTL deriving Show -dcName :: String -> QName +dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") -dcNode :: Node t => String -> t -> Element +dcNode :: Node t => Text -> t -> Element dcNode = node . dcName -opfName :: String -> QName +opfName :: Text -> QName opfName n = QName n Nothing (Just "opf") -toId :: FilePath -> String -toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' +toId :: FilePath -> Text +toId = T.pack . + map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName @@ -141,8 +140,8 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -toVal' :: String -> Val TS.Text -toVal' = toVal . TS.pack +toVal' :: Text -> Val T.Text +toVal' = toVal mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry mkEntry path content = do @@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do if null (epubIdentifier m) then do randomId <- getRandomUUID - return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] } + return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] } else return m let addLanguage m = - if null (epubLanguage m) + if T.null (epubLanguage m) then case lookupContext "lang" (writerVariables opts) of - Just x -> return m{ epubLanguage = TS.unpack x } + Just x -> return m{ epubLanguage = x } Nothing -> do mLang <- lift $ P.lookupEnv "LANG" let localeLang = case mLang of Just lang -> - TS.map (\c -> if c == '_' then '-' else c) $ - TS.takeWhile (/='.') lang + T.map (\c -> if c == '_' then '-' else c) $ + T.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = TS.unpack localeLang } + return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) @@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = TS.unpack name + let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md where getAttr n = lookupAttr (opfName n) attrs addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = case getAttr "property" of - Just s | "ibooks:" `isPrefixOf` s -> - md{ epubIbooksFields = (drop 7 s, strContent e) : + Just s | "ibooks:" `T.isPrefixOf` s -> + md{ epubIbooksFields = (T.drop 7 s, strContent e) : epubIbooksFields md } _ -> case getAttr "name" of - Just s | "calibre:" `isPrefixOf` s -> + Just s | "calibre:" `T.isPrefixOf` s -> md{ epubCalibreFields = - (drop 8 s, fromMaybe "" $ getAttr "content") : + (T.drop 8 s, fromMaybe "" $ getAttr "content") : epubCalibreFields md } _ -> md where getAttr n = lookupAttr (unqual n) attrs addMetadataFromXML _ md = md -metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = TS.unpack s -metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils -metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs +metaValueToString :: MetaValue -> Text +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" metaValueToPaths :: MetaValue -> [FilePath] -metaValueToPaths (MetaList xs) = map metaValueToString xs -metaValueToPaths x = [metaValueToString x] +metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs +metaValueToPaths x = [T.unpack $ metaValueToString x] -getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] +getList :: T.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: TS.Text -> Meta -> [Creator] +getCreator :: T.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: TS.Text -> Meta -> [Date] +getDate :: T.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: TS.Text -> Meta -> [String] +simpleList :: T.Text -> Meta -> [Text] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubBelongsToCollection = belongsToCollection - , epubGroupPosition = groupPosition + , epubGroupPosition = groupPosition , epubCoverImage = coverImage , epubStylesheets = stylesheets , epubPageDirection = pageDirection @@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{ coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta - groupPosition = metaValueToString <$> lookupMeta "group-position" meta - coverImage = - (TS.unpack <$> lookupContext "epub-cover-image" - (writerVariables opts)) + groupPosition = metaValueToString <$> lookupMeta "group-position" meta + coverImage = T.unpack <$> + lookupContext "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta stylesheets = maybe [] metaValueToPaths mCss ++ case lookupContext "css" (writerVariables opts) of - Just xs -> map TS.unpack xs + Just xs -> map T.unpack xs Nothing -> case lookupContext "css" (writerVariables opts) of - Just x -> [TS.unpack x] + Just x -> [T.unpack x] Nothing -> [] - pageDirection = case map toLower . metaValueToString <$> + pageDirection = case T.toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } + let initState = EPUBState { stMediaPaths = [] + , stMediaNextId = 0 + , stEpubSubdir = T.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> TS.unpack $ stringify x + x -> stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do (ListVal $ map (\e -> toVal' $ (if useprefix then "../" else "") <> - makeRelative epubSubdir (eRelativePath e)) + T.pack + (makeRelative epubSubdir (eRelativePath e))) stylesheetEntries) mempty @@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize (TS.pack img) err') + (CouldNotDetermineImageSize (T.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle), - ("cover-image", toVal' coverImageName), + escapeStringForXML plainTitle), + ("cover-image", + toVal' $ T.pack coverImageName), ("cover-image-width", toVal' $ - show coverImageWidth), + tshow coverImageWidth), ("cover-image-height", toVal' $ - show coverImageHeight)]) <> + tshow coverImageHeight)]) <> cssvars True <> vars } (Pandoc meta []) coverEntry <- mkEntry "text/cover.xhtml" cpContent @@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" + report $ CouldNotFetchResource (T.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] + let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Link (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Image (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (RawInline fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] + let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Table (ident,_,_) _ _ _ _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (RawBlock fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL num b = query (extractLinkURL' num) b @@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do let fixInternalReferences :: Inline -> Inline fixInternalReferences (Link attr lab (src, tit)) - | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of + | Just ('#', xs) <- T.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) Nothing -> Link attr lab (src, tit) fixInternalReferences x = x @@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do chapters' let chapToEntry num (Chapter bs) = - mkEntry ("text/" ++ showChapter num) =<< + mkEntry ("text/" ++ T.unpack (showChapter num)) =<< writeHtml opts'{ writerVariables = Context (M.fromList [("body-type", toVal' bodyType), @@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do let chapterNode ent = unode "item" ! ([("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of [] -> [] - xs -> [("properties", unwords xs)]) + xs -> [("properties", T.unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! @@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do let pictureNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", - maybe "application/octet-stream" TS.unpack + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), - ("media-type", maybe "" TS.unpack $ + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let tocTitle = maybe plainTitle @@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do (x:_) -> return $ identifierText x -- use first identifier as UUID [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen currentTime <- lift P.getTimestamp - let contentsData = UTF8.fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $ unode "package" ! ([("version", case version of EPUB2 -> "2.0" @@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ - [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp) + [ unode "item" ! [("id","stylesheet" <> tshow n) + , ("href", T.pack fp) ,("media-type","text/css")] $ () | (n :: Int, fp) <- zip [1..] (map (makeRelative epubSubdir . eRelativePath) @@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> TS.Text -> [Element] -> Element) + => (Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = @@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (TS.null num) + let tit = if writerNumberSections opts && not (T.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils @@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit - , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () + [("id", "navPoint-" <> tshow n)] $ + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" <> src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (concat <$> mapM (navPointNode navMapFormatter) secs) 1 - let tocData = UTF8.fromStringLazy $ ppTopElement $ + let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" ++ show n)] $ + [("id", "toc-li-" <> tshow n)] $ (unode "a" ! - [("href", "text/" <> TS.unpack src)] + [("href", "text/" <> src)] $ titElements) : case subs of [] -> [] @@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do , writerVariables = Context (M.fromList [("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of @@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle @@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do [ unode "a" ! [("href", "text/title_page.xhtml") ,("epub:type", "titlepage")] $ - ("Title Page" :: String) ] : + ("Title Page" :: Text) ] : [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ - ("Cover" :: String)] | + ("Cover" :: Text)] | isJust (epubCoverImage metadata) ] ++ [ unode "li" [ unode "a" ! [("href", "#toc") ,("epub:type", "toc")] $ - ("Table of Contents" :: String) + ("Table of Contents" :: Text) ] | writerTableOfContents opts ] else [] - let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $ + let landmarks = [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ @@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = UTF8.fromStringLazy $ ppTopElement $ + let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ unode "rootfile" ! [("full-path", (if null epubSubdir then "" - else epubSubdir ++ "/") ++ "content.opf") + else T.pack epubSubdir <> "/") <> "content.opf") ,("media-type","application/oebps-package+xml")] $ () containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = UTF8.fromStringLazy $ ppTopElement $ + let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ - unode "option" ! [("name","specified-fonts")] $ ("true" :: String) + unode "option" ! [("name","specified-fonts")] $ ("true" :: Text) appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive @@ -947,7 +950,8 @@ metadataElement version md currentTime = ++ publisherNodes ++ sourceNodes ++ relationNodes ++ coverageNodes ++ rightsNodes ++ coverImageNodes ++ modifiedNodes ++ belongsToCollectionNodes - withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + withIds base f = concat . zipWith f (map (\x -> base <> + T.cons '-' (tshow x)) ([1..] :: [Int])) identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md @@ -961,9 +965,9 @@ metadataElement version md currentTime = (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] ibooksNodes = map ibooksNode (epubIbooksFields md) - ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v calibreNodes = map calibreNode (epubCalibreFields md) - calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k), + calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k), ("content", v)] $ () languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ @@ -989,12 +993,12 @@ metadataElement version md currentTime = maybe [] (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection ) : - [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ]) + [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ]) (epubBelongsToCollection md)++ maybe [] (\groupPosition -> [unode "meta" ! [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ]) (epubGroupPosition md) - dcTag n s = unode ("dc:" ++ n) s + dcTag n s = unode ("dc:" <> n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) | version == EPUB2 = [dcNode "identifier" ! @@ -1002,7 +1006,7 @@ metadataElement version md currentTime = txt] | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) : maybe [] ((\x -> [unode "meta" ! - [ ("refines",'#':id') + [ ("refines","#" <> id') , ("property","identifier-type") , ("scheme","onix:codelist5") ] @@ -1018,10 +1022,10 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator)) $ creatorText creator] | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (creatorFileAs creator) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","role"), + [("refines","#" <> id'),("property","role"), ("scheme","marc:relators")] $ x]) (creatorRole creator >>= toRelator) toTitleNode id' title @@ -1033,16 +1037,16 @@ metadataElement version md currentTime = | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (titleFileAs title) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","title-type")] $ x]) + [("refines","#" <> id'),("property","title-type")] $ x]) (titleType title) toDateNode id' date = [dcNode "date" ! (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix :: String -> String + schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" @@ -1060,48 +1064,48 @@ metadataElement version md currentTime = schemeToOnix "OLCC" = "28" schemeToOnix _ = "01" -showDateTimeISO8601 :: UTCTime -> String -showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +showDateTimeISO8601 :: UTCTime -> Text +showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag TS.Text - -> E m (Tag TS.Text) + => Tag T.Text + -> E m (Tag T.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef $ TS.unpack src - newposter <- modifyMediaRef $ TS.unpack poster + newsrc <- modifyMediaRef $ T.unpack src + newposter <- modifyMediaRef $ T.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ - [("poster", "../" <> newposter) | not (TS.null newposter)] + [("src", "../" <> newsrc) | not (T.null newsrc)] ++ + [("poster", "../" <> newposter) | not (T.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m TS.Text + -> E m T.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return $ TS.pack n + Just (n,_) -> return $ T.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc - let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return $ TS.pack newPath) + return $ T.pack newPath) (\e -> do - report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) - return $ TS.pack oldsrc) + report $ CouldNotFetchResource (T.pack oldsrc) (tshow e) + return $ T.pack oldsrc) -getMediaNextNewName :: PandocMonad m => String -> E m String +getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } @@ -1128,11 +1132,11 @@ transformInline :: PandocMonad m -> Inline -> E m Inline transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef $ TS.unpack src + newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) + newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] @@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw) return $ RawInline fmt (renderTags' tags') transformInline _ x = return x -(!) :: (t -> Element) -> [(String, String)] -> t -> Element +(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) --- | Version of 'ppTopElement' that specifies UTF-8 encoding. -ppTopElement :: Element -> String -ppTopElement = ("\n" ++) . unEntity . ppElement - -- unEntity removes numeric entities introduced by ppElement - -- (kindlegen seems to choke on these). - where unEntity [] = "" - unEntity ('&':'#':xs) = - let (ds,ys) = break (==';') xs - rest = drop 1 ys - in case safeRead (TS.pack $ "'\\" <> ds <> "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs - unEntity (x:xs) = x : unEntity xs - mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. -showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" +showChapter :: Int -> Text +showChapter = T.pack . printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if TS.null ident + let ident' = if T.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty go x = return x -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM -normalizeDate' :: String -> Maybe String -normalizeDate' = fmap TS.unpack . go . trim . TS.pack +normalizeDate' :: Text -> Maybe Text +normalizeDate' = go . T.strip where go xs - | TS.length xs == 4 -- YYY - , TS.all isDigit xs = Just xs - | (y, s) <- TS.splitAt 4 xs -- YYY-MM - , Just ('-', m) <- TS.uncons s - , TS.length m == 2 - , TS.all isDigit y && TS.all isDigit m = Just xs + | T.length xs == 4 -- YYY + , T.all isDigit xs = Just xs + | (y, s) <- T.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- T.uncons s + , T.length m == 2 + , T.all isDigit y && T.all isDigit m = Just xs | otherwise = normalizeDate xs -toRelator :: String -> Maybe String +toRelator :: Text -> Maybe Text toRelator x | x `elem` relators = Just x - | otherwise = lookup (map toLower x) relatorMap + | otherwise = lookup (T.toLower x) relatorMap -relators :: [String] +relators :: [Text] relators = map snd relatorMap -relatorMap :: [(String, String)] +relatorMap :: [(Text, Text)] relatorMap = [("abridger", "abr") ,("actor", "act") diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 9334d6e9a..3b5d04427 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) -import Text.XML.Light -import qualified Text.XML.Light as X -import qualified Text.XML.Light.Cursor as XC -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P @@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, makeSections, tshow, stringify) import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable) +import Data.Generics (everywhere, mkT) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" + return $ xml_head <> showContent fb2_xml <> "\n" where xml_head = "\n" fb2_attrs = @@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" ("unrecognised" :: String) - s -> el "genre" (T.unpack s) + "" -> el "genre" ("unrecognised" :: Text) + s -> el "genre" s bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -112,7 +110,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -124,7 +122,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" ("pandoc" :: String)] + , el "document-info" [el "program-used" ("pandoc" :: Text)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta') author :: [Inline] -> [Content] author ss = - let ws = words . cMap plain $ ss - email = el "email" <$> take 1 (filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws + let ws = T.words $ mconcat $ map plain ss + email = el "email" <$> take 1 (filter (T.any (=='@')) ws) + ws' = filter (not . T.any (== '@')) ws names = case ws' of [nickname] -> [ el "nickname" nickname ] [fname, lname] -> [ el "first-name" fname , el "last-name" lname ] (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) + , el "middle-name" (T.concat . init $ rest) , el "last-name" (last rest) ] [] -> [] in list $ el "author" (names ++ email) @@ -206,7 +204,7 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = el "title" (el "p" (show n)) : cs + let fn_texts = el "title" (el "p" (tshow n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. @@ -282,7 +280,7 @@ isMimeType s = where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` ("()<>@,;:\\\"/[]?=" :: String) + c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char]) footnoteID :: Int -> Text footnoteID i = "n" <> tshow i @@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)]) = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code" . T.unpack) . T.lines $ s + map (el "p" . el "code") . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" then @@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd <> bd), el "p" c] where - mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content + mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content + mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) @@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" $ T.unpack s] +toXml (Code _ s) = return [el "code" s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -456,7 +454,7 @@ insertMath immode formula = do let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" $ T.unpack formula] + _ -> return [el "code" formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do @@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do el "image" $ [ attr ("l","href") ("#" <> fname) , attr ("l","type") (tshow immode) - , uattr "alt" (T.pack $ cMap plain alt) ] + , uattr "alt" (mconcat $ map plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" replaceImagesWithAlt :: [Text] -> Content -> Content -replaceImagesWithAlt missingHrefs body = - let cur = XC.fromContent body - cur' = replaceAll cur - in XC.toTree . XC.root $ cur' +replaceImagesWithAlt missingHrefs = everywhere (mkT go) where - -- - replaceAll :: XC.Cursor -> XC.Cursor - replaceAll c = - let n = XC.current c - c' = if isImage n && isMissing n - then XC.modifyContent replaceNode c - else c - in case XC.nextDF c' of - (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document - -- - isImage :: Content -> Bool - isImage (Elem e) = elName e == uname "image" - isImage _ = False - -- + go c = if isMissing c + then replaceNode c + else c isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs @@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body = replaceNode :: Content -> Content replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img - alt = getAttrVal attrs (uname "alt") + alt = getAttrVal attrs (unqual "alt") imtype = getAttrVal attrs (qname "l" "type") in case (alt, imtype) of (Just alt', Just imtype') -> - if imtype' == show NormalImage + if imtype' == tshow NormalImage then el "p" alt' - else txt $ T.pack alt' - (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- - getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal :: [X.Attr] -> QName -> Maybe Text getAttrVal attrs name = case filter ((name ==) . attrKey) attrs of (a:_) -> Just (attrVal a) @@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: PandocMonad m => String -> [Inline] -> FBM m Content +wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -532,31 +515,31 @@ list :: a -> [a] list = (:[]) -- | Convert an 'Inline' to plaintext. -plain :: Inline -> String -plain (Str s) = T.unpack s -plain (Emph ss) = cMap plain ss -plain (Underline ss) = cMap plain ss -plain (Span _ ss) = cMap plain ss -plain (Strong ss) = cMap plain ss -plain (Strikeout ss) = cMap plain ss -plain (Superscript ss) = cMap plain ss -plain (Subscript ss) = cMap plain ss -plain (SmallCaps ss) = cMap plain ss -plain (Quoted _ ss) = cMap plain ss -plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = T.unpack s +plain :: Inline -> Text +plain (Str s) = s +plain (Emph ss) = mconcat $ map plain ss +plain (Underline ss) = mconcat $ map plain ss +plain (Span _ ss) = mconcat $ map plain ss +plain (Strong ss) = mconcat $ map plain ss +plain (Strikeout ss) = mconcat $ map plain ss +plain (Superscript ss) = mconcat $ map plain ss +plain (Subscript ss) = mconcat $ map plain ss +plain (SmallCaps ss) = mconcat $ map plain ss +plain (Quoted _ ss) = mconcat $ map plain ss +plain (Cite _ ss) = mconcat $ map plain ss -- FIXME +plain (Code _ s) = s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = T.unpack s +plain (Math _ s) = s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) -plain (Image _ alt _) = cMap plain alt +plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"]) +plain (Image _ alt _) = mconcat $ map plain alt plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) - => String -- ^ unqualified element name + => Text -- ^ unqualified element name -> t -- ^ node contents -> Content -- ^ XML content el name cs = Elem $ unode name cs @@ -569,22 +552,18 @@ spaceBeforeAfter cs = -- | Create a plain-text XML content. txt :: Text -> Content -txt s = Text $ CData CDataText (T.unpack s) Nothing +txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> Text -> Text.XML.Light.Attr -uattr name = Attr (uname name) . T.unpack +uattr :: Text -> Text -> X.Attr +uattr name = Attr (unqual name) -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> Text -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) . T.unpack - --- | Unqualified name -uname :: String -> QName -uname name = QName name Nothing Nothing +attr :: (Text, Text) -> Text -> X.Attr +attr (ns, name) = Attr (qname ns name) -- | Qualified name -qname :: String -> String -> QName +qname :: Text -> Text -> QName qname ns name = QName name Nothing (Just ns) -- | Abbreviation for 'concatMap'. diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 06369b4db..101b236aa 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import Text.TeXMath -import Text.XML.Light +import qualified Text.XML.Light as XL newtype ODTState = ODTState { stEntries :: [Entry] } @@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do PandocXMLError "styles.xml" msg Right d -> return $ toEntry "styles.xml" epochtime - ( fromStringLazy + ( fromTextLazy + . TL.fromStrict . ppTopElement . addLang lang $ d ) else return e) (zEntries arch) return arch{ zEntries = entries } +-- TODO FIXME avoid this generic traversal! addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (T.unpack $ langLanguage lang) + = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (T.unpack $ langRegion lang) + = Attr n (langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do - let conf = useShortEmptyTags (const False) defaultConfigPP - let mathml = ppcTopElement conf r + let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP + let mathml = XL.ppcTopElement conf r epochtime <- floor `fmap` lift P.getPOSIXTime let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 8f60e70d5..0533d6c12 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -29,33 +29,32 @@ import Control.Monad.Except (throwError) import Text.Pandoc.Error import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) import qualified Data.Text as T +import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode :: Node t => Text -> [(Text,Text)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) -mktnode :: String -> [(String,String)] -> T.Text -> Element -mktnode s attrs = mknode s attrs . T.unpack +mktnode :: Text -> [(Text,Text)] -> T.Text -> Element +mktnode s attrs = mknode s attrs -nodename :: String -> QName +nodename :: Text -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) + where (name, prefix) = case T.break (==':') s of + (xs,ys) -> case T.uncons ys of + Nothing -> (xs, Nothing) + Just (_,zs) -> (zs, Just xs) toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "\n" <> - UTF8.fromStringLazy (showElement elt) +renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt)) parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element parseXml refArchive distArchive relpath = @@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath = -- Copied from Util -attrToNSPair :: XML.Attr -> Maybe (String, String) -attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair :: Attr -> Maybe (Text, Text) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] -- | Scales the image to fit the page -- sizes are passed in emu diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 0a7060895..5caeb0753 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader import Control.Monad.State import Codec.Archive.Zip -import Data.Char (toUpper) import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.Default +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Read import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light as XML import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext)) import Text.TeXMath import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation +import Text.Pandoc.Shared (tshow) import Skylighting (fromColor) -- |The 'EMU' type is used to specify sizes in English Metric Units. @@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize cyS <- findAttr (QName "cy" Nothing Nothing) sldSize - (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) - (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return (cx `div` 12700, cy `div` 12700) +readTextAsInteger :: Text -> Maybe Integer +readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal + data WriterEnv = WriterEnv { envRefArchive :: Archive , envDistArchive :: Archive , envUTCTime :: UTCTime @@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText n = fmap T.pack . findAttr n - monospaceFont :: Monad m => P m T.Text monospaceFont = do vars <- writerVariables <$> asks envOpts @@ -171,10 +172,9 @@ monospaceFont = do Just s -> return s Nothing -> return "Courier" --- Kept as string for XML.Light -fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] +fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", show $ sz * 100)] + return [("sz", tshow $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -396,10 +396,10 @@ getShapeDimensions ns element ext <- findChild (elemName ns "a" "ext") xfrm cxS <- findAttr (QName "cx" Nothing Nothing) ext cyS <- findAttr (QName "cy" Nothing Nothing) ext - (x, _) <- listToMaybe $ reads xS - (y, _) <- listToMaybe $ reads yS - (cx, _) <- listToMaybe $ reads cxS - (cy, _) <- listToMaybe $ reads cyS + x <- readTextAsInteger xS + y <- readTextAsInteger yS + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) | otherwise = Nothing @@ -430,7 +430,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttrText (QName "id" Nothing Nothing) >>= + findAttr (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes = fn _ = True replaceNamedChildren :: NameSpaces - -> String - -> String + -> Text + -> Text -> [Element] -> Element -> Element @@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do ] , mknode "p:spPr" [] [ mknode "a:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), - ("y", show $ 12700 * (y + cy - captionHeight))] () - , mknode "a:ext" [("cx", show $ 12700 * cx), - ("cy", show $ 12700 * captionHeight)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * captionHeight)] () ] , mknode "a:prstGeom" [("prst", "rect")] [ mknode "a:avLst" [] () @@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do ,("noChangeAspect","1")] () -- cNvPr will contain the link information so we do that separately, -- and register the link if necessary. - let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo), + ("id","0"), + ("name","Picture 1")] cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr @@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] [ mknode "a:blip" [("r:embed", "rId" <> - show (mInfoLocalId mInfo))] () + tshow (mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () - , mknode "a:ext" [("cx",show dimX') - ,("cy",show dimY')] () ] + [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] () + , mknode "a:ext" [("cx", tshow dimX') + ,("cy", tshow dimY')] () ] let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () let ln = mknode "a:ln" [("w","9525")] @@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do Just DoubleStrike -> [("strike", "dblStrike")] Nothing -> []) <> (case rBaseline rpr of - Just n -> [("baseline", show n)] + Just n -> [("baseline", tshow n)] Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] @@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] let colorContents = case rSolidFill rpr of Just color -> case fromColor color of - '#':hx -> [mknode "a:solidFill" [] - [mknode "a:srgbClr" [("val", map toUpper hx)] ()] - ] + '#':hx -> + [mknode "a:solidFill" [] + [mknode "a:srgbClr" + [("val", T.toUpper $ T.pack hx)] ()]] _ -> [] Nothing -> [] codeFont <- monospaceFont let codeContents = - [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr] + [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr] let propContents = linkProps <> colorContents <> codeContents return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents - , mknode "a:t" [] $ T.unpack s + , mknode "a:t" [] s ]] paraElemToElements (MathElem mathType texStr) = do isInSpkrNotes <- asks envInSpeakerNotes if isInSpkrNotes then paraElemToElements $ Run def $ unTeXString texStr else do res <- convertMath writeOMML mathType (unTeXString texStr) - case res of + case fromXLElement <$> res of Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r] Left (Str s) -> paraElemToElements (Run def s) Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" paraElemToElements (RawOOXMLParaElem str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] -- This is a bit of a kludge -- really requires adding an option to @@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return -- step at a time. addMathInfo :: Element -> Element addMathInfo element = - let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns") - , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" - } + let mathspace = + Attr { attrKey = QName "m" Nothing (Just "xmlns") + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } in add_attr mathspace element -- We look through the element to see if it contains an a14:m @@ -849,13 +853,13 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> + attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of - Just px -> [("marL", show $ pixelsToEmu px)] + Just px -> [("marL", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropIndent (paraProps par) of - Just px -> [("indent", show $ pixelsToEmu px)] + Just px -> [("indent", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropAlign (paraProps par) of @@ -867,7 +871,7 @@ paragraphToElement par = do props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ - mknode "a:spcPts" [("val", show $ 100 * px)] () + mknode "a:spcPts" [("val", tshow $ 100 * px)] () ] ] Nothing -> [] @@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> graphicFrameToElements layout tbls cptn shapeToElements _ (RawOOXMLShape str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] shapeToElements layout shp = do element <- shapeToElement layout shp return [Elem element] @@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do [mknode "p:ph" [("idx", "1")] ()] ] , mknode "p:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () - , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * y)] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * cy)] () ] ] <> elements @@ -957,7 +963,7 @@ getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" - [("w", show ((12700 * w) :: Integer))] () + [("w", tshow ((12700 * w) :: Integer))] () let hasHeader = not (all null hdrCells) mbDefTblStyle <- getDefaultTableStyle @@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) + Just sty -> [mknode "a:tableStyleId" [] sty]) return $ mknode "a:graphic" [] [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] @@ -1037,7 +1043,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttrText (QName "type" Nothing Nothing) phElem of + case findAttr (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError @@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] - [ mknode "a:fld" [ ("id", T.unpack fieldId) + [ mknode "a:fld" [ ("id", fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () - , mknode "a:t" [] (show pgNum) + , mknode "a:t" [] (tshow pgNum) ] , mknode "a:endParaRPr" [("lang", "en-US")] () ] @@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " <> T.pack (show sldId) <> " not found." + "Slide Id " <> tshow sldId <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide @@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" <> T.pack (show $ n + offset) + return $ "rId" <> tshow (n + offset) data Relationship = Relationship { relId :: Int @@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship elementToRel element | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = do rId <- findAttr (QName "Id" Nothing Nothing) element - numStr <- stripPrefix "rId" rId - num <- case reads numStr :: [(Int, String)] of - (n, _) : _ -> Just n - [] -> Nothing - type' <- findAttrText (QName "Type" Nothing Nothing) element + numStr <- T.stripPrefix "rId" rId + num <- fromIntegral <$> readTextAsInteger numStr + type' <- findAttr (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship num type' target + return $ Relationship num type' (T.unpack target) | otherwise = Nothing slideToPresRel :: PandocMonad m => Slide -> P m Relationship @@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" <> - show (relId rel)) - , ("Type", T.unpack $ relType rel) - , ("Target", relTarget rel) ] () +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel)) + , ("Type", relType rel) + , ("Target", T.pack $ relTarget rel) ] () relsToElement :: [Relationship] -> Element relsToElement rels = mknode "Relationships" @@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> + ".xml") element _ -> return Nothing @@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" <> show idNum <> ".xml") + , ("Target", "../slides/slide" <> tshow idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element linkRelElement (rIdNum, InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" <> show targetIdNum <> ".xml") + , ("Target", "slide" <> tshow targetIdNum <> ".xml") ] () linkRelElement (rIdNum, ExternalTarget (url, _)) = return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", T.unpack url) + , ("Target", url) , ("TargetMode", "External") ] () @@ -1567,10 +1571,10 @@ mediaRelElement mInfo = let ext = fromMaybe "" (mInfoExt mInfo) in mknode "Relationship" [ ("Id", "rId" <> - show (mInfoLocalId mInfo)) + tshow (mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") , ("Target", "../media/image" <> - show (mInfoGlobalId mInfo) <> T.unpack ext) + tshow (mInfoGlobalId mInfo) <> ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" <> show n <> ".xml" + let target = "../notesSlides/notesSlide" <> tshow n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide - let id' = show $ n + 255 + let id' = tshow $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" <> show notesMasterRId)] + [("r:id", "rId" <> tshow notesMasterRId)] () ] @@ -1702,17 +1706,17 @@ docPropsElement docProps = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ - mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps) + mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps) : - mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps) + mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps) : - mknode "cp:keywords" [] (T.unpack keywords) - : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)]) - <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)]) - <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)]) + mknode "cp:keywords" [] keywords + : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)]) + <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)]) + <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)]) <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) docPropsToEntry :: PandocMonad m => DocProps -> P m Entry docPropsToEntry docProps = docPropsElement docProps >>= @@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) - ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) + ,("pid", tshow pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1743,7 +1747,7 @@ viewPropsElement = do distArchive <- asks envDistArchive viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml" -- remove "lastView" if it exists: - let notLastView :: Text.XML.Light.Attr -> Bool + let notLastView :: XML.Attr -> Bool notLastView attr = qName (attrKey attr) /= "lastView" return $ @@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", T.unpack $ defContentTypesExt dct), - ("ContentType", T.unpack $ defContentTypesType dct)] + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" - [("PartName", overrideContentTypesPart oct), - ("ContentType", T.unpack $ overrideContentTypesType oct)] + [("PartName", T.pack $ overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") + notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1885,11 +1890,11 @@ getContentType fp | otherwise = Nothing -- Kept as String for XML.Light -autoNumAttrs :: ListAttributes -> [(String, String)] +autoNumAttrs :: ListAttributes -> [(Text, Text)] autoNumAttrs (startNum, numStyle, numDelim) = numAttr <> typeAttr where - numAttr = [("startAt", show startNum) | startNum /= 1] + numAttr = [("startAt", tshow startNum) | startNum /= 1] typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs new file mode 100644 index 000000000..38e4df218 --- /dev/null +++ b/src/Text/Pandoc/XML/Light.hs @@ -0,0 +1,586 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +xml-light, which we used in pandoc's the XML-based readers, has +some limitations: in particular, it produces nodes with String +instead of Text, and the parser falls over on processing instructions +(see #7091). + +This module exports much of the API of xml-light, but using Text instead +of String. In addition, the xml-light parsers are replaced by xml-conduit's +well-tested parser. (The xml-conduit types are mapped to types +isomorphic to xml-light's, to avoid the need for massive code modifications +elsewhere.) Bridge functions to map xml-light types to this module's +types are also provided (since libraries like texmath still use xml-light). + +Another advantage of the xml-conduit parser is that it gives us +detailed information on xml parse errors. + +In the future we may want to move to using xml-conduit or another +xml library in the code base, but this change gives us +better performance and accuracy without much change in the +code that used xml-light. +-} +module Text.Pandoc.XML.Light + ( -- * Basic types, duplicating those from xml-light but with Text + -- instead of String + Line + , Content(..) + , Element(..) + , Attr(..) + , CData(..) + , CDataKind(..) + , QName(..) + , Node(..) + , unode + , unqual + , add_attr + , add_attrs + -- * Conversion functions from xml-light types + , fromXLQName + , fromXLCData + , fromXLElement + , fromXLAttr + , fromXLContent + -- * Replacement for xml-light's Text.XML.Proc + , strContent + , onlyElems + , elChildren + , onlyText + , findChildren + , filterChildren + , filterChildrenName + , findChild + , filterChild + , filterChildName + , findElement + , filterElement + , filterElementName + , findElements + , filterElements + , filterElementsName + , findAttr + , lookupAttr + , lookupAttrBy + , findAttrBy + -- * Replacement for xml-light's Text.XML.Output + , ppTopElement + , ppElement + , ppContent + , ppcElement + , ppcContent + , showTopElement + , showElement + , showContent + , useShortEmptyTags + , defaultConfigPP + , ConfigPP(..) + -- * Replacement for xml-light's Text.XML.Input + , parseXMLElement + , parseXMLContents + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText) +import qualified Data.Map as M +import Data.Data (Data) +import Data.Typeable (Typeable) +import Data.Maybe (mapMaybe, listToMaybe) +import Data.List(find) +import qualified Text.XML.Light as XL + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("" <> t <> "") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Element +elementToElement (Conduit.Element name attribMap nodes) = + Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Attr (nameToQname n) v) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing -> + case T.stripPrefix "xmlns:" localName of + Just rest -> QName rest mbns (Just "xmlns") + Nothing -> QName localName mbns mbpref + _ -> QName localName mbns mbpref + +nodeToContent :: Conduit.Node -> Maybe Content +nodeToContent (Conduit.NodeElement el) = + Just (Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Text (CData CDataText t Nothing)) +nodeToContent _ = Nothing + +unqual :: Text -> QName +unqual x = QName x Nothing Nothing + +-- | Add an attribute to an element. +add_attr :: Attr -> Element -> Element +add_attr a e = add_attrs [a] e + +-- | Add some attributes to an element. +add_attrs :: [Attr] -> Element -> Element +add_attrs as e = e { elAttribs = as ++ elAttribs e } + +-- +-- type definitions lightly modified from xml-light +-- + +-- | A line is an Integer +type Line = Integer + +-- | XML content +data Content = Elem Element + | Text CData + | CRef Text + deriving (Show, Typeable, Data) + +-- | XML elements +data Element = Element { + elName :: QName, + elAttribs :: [Attr], + elContent :: [Content], + elLine :: Maybe Line + } deriving (Show, Typeable, Data) + +-- | XML attributes +data Attr = Attr { + attrKey :: QName, + attrVal :: Text + } deriving (Eq, Ord, Show, Typeable, Data) + +-- | XML CData +data CData = CData { + cdVerbatim :: CDataKind, + cdData :: Text, + cdLine :: Maybe Line + } deriving (Show, Typeable, Data) + +data CDataKind + = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. + | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in case (qURI q1, qURI q2) of + (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) + (u1,u2) -> compare u1 u2 + x -> x + +class Node t where + node :: QName -> t -> Element + +instance Node ([Attr],[Content]) where + node n (attrs,cont) = Element { elName = n + , elAttribs = attrs + , elContent = cont + , elLine = Nothing + } + +instance Node [Attr] where node n as = node n (as,[]::[Content]) +instance Node Attr where node n a = node n [a] +instance Node () where node n () = node n ([]::[Attr]) + +instance Node [Content] where node n cs = node n ([]::[Attr],cs) +instance Node Content where node n c = node n [c] +instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) + +instance Node ([Attr],[Element]) where + node n (as,cs) = node n (as,map Elem cs) + +instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Element) where node n (a,c) = node n ([a],c) +instance Node [Element] where node n es = node n ([]::[Attr],es) +instance Node Element where node n e = node n [e] + +instance Node ([Attr],[CData]) where + node n (as,cs) = node n (as,map Text cs) + +instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) +instance Node (Attr,CData) where node n (a,c) = node n ([a],c) +instance Node [CData] where node n es = node n ([]::[Attr],es) +instance Node CData where node n e = node n [e] + +instance Node ([Attr],Text) where + node n (as,t) = node n (as, CData { cdVerbatim = CDataText + , cdData = t + , cdLine = Nothing }) + +instance Node (Attr,Text ) where node n (a,t) = node n ([a],t) +instance Node Text where node n t = node n ([]::[Attr],t) + +-- | Create node with unqualified name +unode :: Node t => Text -> t -> Element +unode = node . unqual + +-- +-- conversion from xml-light +-- + +fromXLQName :: XL.QName -> QName +fromXLQName qn = QName { qName = T.pack $ XL.qName qn + , qURI = T.pack <$> XL.qURI qn + , qPrefix = T.pack <$> XL.qPrefix qn } + +fromXLCData :: XL.CData -> CData +fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of + XL.CDataText -> CDataText + XL.CDataVerbatim -> CDataVerbatim + XL.CDataRaw -> CDataRaw + , cdData = T.pack $ XL.cdData cd + , cdLine = XL.cdLine cd } + +fromXLElement :: XL.Element -> Element +fromXLElement el = Element { elName = fromXLQName $ XL.elName el + , elAttribs = map fromXLAttr $ XL.elAttribs el + , elContent = map fromXLContent $ XL.elContent el + , elLine = XL.elLine el } + +fromXLAttr :: XL.Attr -> Attr +fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s) + +fromXLContent :: XL.Content -> Content +fromXLContent (XL.Elem el) = Elem $ fromXLElement el +fromXLContent (XL.Text cd) = Text $ fromXLCData cd +fromXLContent (XL.CRef s) = CRef (T.pack s) + +-- +-- copied from xml-light Text.XML.Proc +-- + +-- | Get the text value of an XML element. This function +-- ignores non-text elements, and concatenates all text elements. +strContent :: Element -> Text +strContent = mconcat . map cdData . onlyText . elContent + +-- | Select only the elements from a list of XML content. +onlyElems :: [Content] -> [Element] +onlyElems xs = [ x | Elem x <- xs ] + +-- | Select only the elements from a parent. +elChildren :: Element -> [Element] +elChildren e = [ x | Elem x <- elContent e ] + +-- | Select only the text from a list of XML content. +onlyText :: [Content] -> [CData] +onlyText xs = [ x | Text x <- xs ] + +-- | Find all immediate children with the given name. +findChildren :: QName -> Element -> [Element] +findChildren q e = filterChildren ((q ==) . elName) e + +-- | Filter all immediate children wrt a given predicate. +filterChildren :: (Element -> Bool) -> Element -> [Element] +filterChildren p e = filter p (onlyElems (elContent e)) + + +-- | Filter all immediate children wrt a given predicate over their names. +filterChildrenName :: (QName -> Bool) -> Element -> [Element] +filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) + + +-- | Find an immediate child with the given name. +findChild :: QName -> Element -> Maybe Element +findChild q e = listToMaybe (findChildren q e) + +-- | Find an immediate child with the given name. +filterChild :: (Element -> Bool) -> Element -> Maybe Element +filterChild p e = listToMaybe (filterChildren p e) + +-- | Find an immediate child with name matching a predicate. +filterChildName :: (QName -> Bool) -> Element -> Maybe Element +filterChildName p e = listToMaybe (filterChildrenName p e) + +-- | Find the left-most occurrence of an element matching given name. +findElement :: QName -> Element -> Maybe Element +findElement q e = listToMaybe (findElements q e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElement :: (Element -> Bool) -> Element -> Maybe Element +filterElement p e = listToMaybe (filterElements p e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElementName :: (QName -> Bool) -> Element -> Maybe Element +filterElementName p e = listToMaybe (filterElementsName p e) + +-- | Find all non-nested occurances of an element. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +findElements :: QName -> Element -> [Element] +findElements qn e = filterElementsName (qn==) e + +-- | Find all non-nested occurrences of an element wrt. given predicate. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElements :: (Element -> Bool) -> Element -> [Element] +filterElements p e + | p e = [e] + | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e + +-- | Find all non-nested occurences of an element wrt a predicate over element names. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElementsName :: (QName -> Bool) -> Element -> [Element] +filterElementsName p e = filterElements (p.elName) e + +-- | Lookup the value of an attribute. +findAttr :: QName -> Element -> Maybe Text +findAttr x e = lookupAttr x (elAttribs e) + +-- | Lookup attribute name from list. +lookupAttr :: QName -> [Attr] -> Maybe Text +lookupAttr x = lookupAttrBy (x ==) + +-- | Lookup the first attribute whose name satisfies the given predicate. +lookupAttrBy :: (QName -> Bool) -> [Attr] -> Maybe Text +lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as + +-- | Lookup the value of the first attribute whose name +-- satisfies the given predicate. +findAttrBy :: (QName -> Bool) -> Element -> Maybe Text +findAttrBy p e = lookupAttrBy p (elAttribs e) + + +-- +-- duplicates functinos from Text.XML.Output +-- + +-- | The XML 1.0 header +xmlHeader :: Text +xmlHeader = "" + + +-------------------------------------------------------------------------------- +data ConfigPP = ConfigPP + { shortEmptyTag :: QName -> Bool + , prettify :: Bool + } + +-- | Default pretty orinting configuration. +-- * Always use abbreviate empty tags. +defaultConfigPP :: ConfigPP +defaultConfigPP = ConfigPP { shortEmptyTag = const True + , prettify = False + } + +-- | The predicate specifies for which empty tags we should use XML's +-- abbreviated notation . This is useful if we are working with +-- some XML-ish standards (such as certain versions of HTML) where some +-- empty tags should always be displayed in the form. +useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP +useShortEmptyTags p c = c { shortEmptyTag = p } + + +-- | Specify if we should use extra white-space to make document more readable. +-- WARNING: This adds additional white-space to text elements, +-- and so it may change the meaning of the document. +useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP +useExtraWhiteSpace p c = c { prettify = p } + +-- | A configuration that tries to make things pretty +-- (possibly at the cost of changing the semantics a bit +-- through adding white space.) +prettyConfigPP :: ConfigPP +prettyConfigPP = useExtraWhiteSpace True defaultConfigPP + + +-------------------------------------------------------------------------------- + + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppTopElement :: Element -> Text +ppTopElement = ppcTopElement prettyConfigPP + +-- | Pretty printing elements +ppElement :: Element -> Text +ppElement = ppcElement prettyConfigPP + +-- | Pretty printing content +ppContent :: Content -> Text +ppContent = ppcContent prettyConfigPP + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppcTopElement :: ConfigPP -> Element -> Text +ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e] + +-- | Pretty printing elements +ppcElement :: ConfigPP -> Element -> Text +ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty + +-- | Pretty printing content +ppcContent :: ConfigPP -> Content -> Text +ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty + +ppcCData :: ConfigPP -> CData -> Text +ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty + +type Indent = Builder + +-- | Pretty printing content using ShowT +ppContentS :: ConfigPP -> Indent -> Content -> Builder +ppContentS c i x = case x of + Elem e -> ppElementS c i e + Text t -> ppCDataS c i t + CRef r -> showCRefS r + +ppElementS :: ConfigPP -> Indent -> Element -> Builder +ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <> + (case elContent e of + [] | "?" `T.isPrefixOf` qName name -> fromText " ?>" + | shortEmptyTag c name -> fromText " />" + [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name + cs -> singleton '>' <> nl <> + mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <> + i <> tagEnd name + where (nl,sp) = if prettify c then ("\n"," ") else ("","") + ) + where name = elName e + +ppCDataS :: ConfigPP -> Indent -> CData -> Builder +ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c) + then showCDataS t + else foldr cons mempty (T.unpack (showCData t)) + where cons :: Char -> Builder -> Builder + cons '\n' ys = singleton '\n' <> i <> ys + cons y ys = singleton y <> ys + + + +-------------------------------------------------------------------------------- + +-- | Adds the header. +showTopElement :: Element -> Text +showTopElement c = xmlHeader <> showElement c + +showContent :: Content -> Text +showContent = ppcContent defaultConfigPP + +showElement :: Element -> Text +showElement = ppcElement defaultConfigPP + +showCData :: CData -> Text +showCData = ppcCData defaultConfigPP + +-- Note: crefs should not contain '&', ';', etc. +showCRefS :: Text -> Builder +showCRefS r = singleton '&' <> fromText r <> singleton ';' + +-- | Convert a text element to characters. +showCDataS :: CData -> Builder +showCDataS cd = + case cdVerbatim cd of + CDataText -> escStr (cdData cd) + CDataVerbatim -> fromText " escCData (cdData cd) <> + fromText "]]>" + CDataRaw -> fromText (cdData cd) + +-------------------------------------------------------------------------------- +escCData :: Text -> Builder +escCData t + | "]]>" `T.isPrefixOf` t = + fromText "]]]]>" <> fromText (T.drop 3 t) +escCData t + = case T.uncons t of + Nothing -> mempty + Just (c,t') -> singleton c <> escCData t' + +escChar :: Char -> Builder +escChar c = case c of + '<' -> fromText "<" + '>' -> fromText ">" + '&' -> fromText "&" + '"' -> fromText """ + -- we use ' instead of ' because IE apparently has difficulties + -- rendering ' in xhtml. + -- Reported by Rohan Drape . + '\'' -> fromText "'" + _ -> singleton c + + {- original xml-light version: + -- NOTE: We escape '\r' explicitly because otherwise they get lost + -- when parsed back in because of then end-of-line normalization rules. + _ | isPrint c || c == '\n' -> singleton c + | otherwise -> showText "&#" . showsT oc . singleton ';' + where oc = ord c + -} + +escStr :: Text -> Builder +escStr cs = if T.any needsEscape cs + then mconcat (map escChar (T.unpack cs)) + else fromText cs + where + needsEscape '<' = True + needsEscape '>' = True + needsEscape '&' = True + needsEscape '"' = True + needsEscape '\'' = True + needsEscape _ = False + +tagEnd :: QName -> Builder +tagEnd qn = fromText " showQName qn <> singleton '>' + +tagStart :: QName -> [Attr] -> Builder +tagStart qn as = singleton '<' <> showQName qn <> as_str + where as_str = if null as + then mempty + else mconcat (map showAttr as) + +showAttr :: Attr -> Builder +showAttr (Attr qn v) = singleton ' ' <> showQName qn <> + singleton '=' <> + singleton '"' <> escStr v <> singleton '"' + +showQName :: QName -> Builder +showQName q = + case qPrefix q of + Nothing -> fromText (qName q) + Just p -> fromText p <> singleton ':' <> fromText (qName q) diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs deleted file mode 100644 index 8ad22a66a..000000000 --- a/src/Text/Pandoc/XMLParser.hs +++ /dev/null @@ -1,66 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.XMLParser - Copyright : Copyright (C) 2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Bridge to allow using xml-conduit's parser with xml-light's types. --} -module Text.Pandoc.XMLParser - ( parseXMLElement - , parseXMLContents - , module Text.XML.Light.Types - ) where - -import qualified Control.Exception as E -import qualified Text.XML as Conduit -import Text.XML.Unresolved (InvalidEventStream(..)) -import qualified Text.XML.Light as Light -import Text.XML.Light.Types -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL -import qualified Data.Map as M -import Data.Maybe (mapMaybe) - --- Drop in replacement for parseXMLDoc in xml-light. -parseXMLElement :: TL.Text -> Either T.Text Light.Element -parseXMLElement t = - elementToElement . Conduit.documentRoot <$> - either (Left . T.pack . E.displayException) Right - (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) - -parseXMLContents :: TL.Text -> Either T.Text [Light.Content] -parseXMLContents t = - case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of - Left e -> - case E.fromException e of - Just (ContentAfterRoot _) -> - elContent <$> parseXMLElement ("" <> t <> "") - _ -> Left . T.pack . E.displayException $ e - Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] - -elementToElement :: Conduit.Element -> Light.Element -elementToElement (Conduit.Element name attribMap nodes) = - Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing - where - attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ - M.toList attribMap - nameToQname (Conduit.Name localName mbns mbpref) = - case mbpref of - Nothing | "xmlns:" `T.isPrefixOf` localName -> - Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) - (Just "xmlns") - _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) - (T.unpack <$> mbpref) - -nodeToContent :: Conduit.Node -> Maybe Light.Content -nodeToContent (Conduit.NodeElement el) = - Just (Light.Elem (elementToElement el)) -nodeToContent (Conduit.NodeContent t) = - Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) -nodeToContent _ = Nothing - diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index 376f02c55..c1e47622d 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -43,7 +43,8 @@ compareXMLBool _ _ = False displayDiff :: Content -> Content -> String displayDiff elemA elemB = - showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB) + showDiff (1,1) + (getDiff (lines $ showContent elemA) (lines $ showContent elemB)) goldenArchive :: FilePath -> IO Archive goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index d3b16d0f2..ed7d1165c 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 6293ef493..07ae75676 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index 4205a1516..e5f034378 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index adb3f23db..174942135 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 92c8137fe..b5c31a851 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index f53470617..c42ca1b05 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index d6af90a72..1cb4c1fd7 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index e18dbe853..7122456ea 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 820299043..616ba0f81 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index ae0f41d12..c30dcdee9 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 94cd35dfa..8a704b41e 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 879f2a25b..b1906c8c4 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 93f86478f..8adf1cf75 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 967d297f2..584117503 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index c5614e2fa..8859fe55c 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 0f39a831f..b80f3b3ba 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 07046f223..35beed68a 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3656618e6..2c29fd674 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 8798253d5..10a948886 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 0a24d1840..5b90e74a0 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 52bb7a217..cc81b46d1 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 182c06c64..1394dc442 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 7b69a56a3..0d1688694 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 3d3a35701..be1caef2d 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 5ae37b406..a1d2323c2 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index c29aa6716..2f3a831a7 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 664493246..af066107c 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index b6d15340e..9cc7a075f 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f8e1092d2..f8b8dcfde 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index b4cda82f2..1c3baf0bf 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index ee222efa0..28686970d 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c6f8d9c96..7051cefbd 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index ea8146690..9df631640 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx index 58070eb3f..5e9c2c630 100644 Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx index db9b7e371..15232c32d 100644 Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx index c7b1ed7d5..aab0cc6f5 100644 Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx index 6944d92bf..fe5b675f3 100644 Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx index ae0d28429..de5e68151 100644 Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx index 37c74c69a..89e6fbdf2 100644 Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx index 324e443a1..6bcbd1b9c 100644 Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx index c81b983e3..79d48560b 100644 Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx index 30ce33db6..9d46036fe 100644 Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx index d6c604968..54ec7f305 100644 Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx index 000e17ecd..a028b346f 100644 Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx index fdcd2e29b..1158c16fc 100644 Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx index e73126376..670a825de 100644 Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx index e3f968e9e..6d297ef11 100644 Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx index eadb9372e..473b9498d 100644 Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx index 8ca6bab2b..2cdf54474 100644 Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx index ae188ee68..ffc2eb9f7 100644 Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx index 60301fa50..676954cb8 100644 Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx index 17124a50d..29164af15 100644 Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx index 19ae7dd4e..1742b3296 100644 Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx index b650b7585..c6df8e18e 100644 Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx index 0ab029614..cf6e52eef 100644 Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx index 2a6e35080..e06d9079d 100644 Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx index a7bcf6a4b..449339778 100644 Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx index 21b018c25..258098082 100644 Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx index 4ec4772a4..2f0213919 100644 Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx index 5983657b6..9dbfa41a0 100644 Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx index dd54c7082..f288dde14 100644 Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx index b3e5ed5b9..0ab1302da 100644 Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx index 1078854bb..6343bffe4 100644 Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx index 5116c6c4e..5d4465f64 100644 Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx index 0c8e49bd9..d581681aa 100644 Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx index 68695939d..c922df3a8 100644 Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx index 7ed9b946d..13f564bf0 100644 Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx index 79fc82345..f0b302738 100644 Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx index 9f943c279..2d8d00242 100644 Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx index ac72d8ced..4320128b3 100644 Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx index 15c7b5469..0a3f6e56d 100644 Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx index 926c5e699..e41219844 100644 Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx index a37e72d2c..82b8aa13d 100644 Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx index 7f86533fe..270a7eeac 100644 Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx index 89e3db0ab..44985d701 100644 Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ -- cgit v1.2.3 From eed18d231cc706e27a1495d46e8c05dd18a0938f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Mar 2021 12:05:52 -0800 Subject: Use integral values for w:tblW in docx. Cloess #7141. --- src/Text/Pandoc/Writers/Docx.hs | 2 +- test/docx/golden/table_one_row.docx | Bin 9908 -> 9906 bytes test/docx/golden/table_with_list_cell.docx | Bin 10227 -> 10225 bytes test/docx/golden/tables.docx | Bin 10244 -> 10241 bytes 4 files changed, 1 insertion(+), 1 deletion(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 89c71d773..c47bfb2ea 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1031,7 +1031,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do map mkcell cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct - let rowwidth = fullrow * sum widths + let (rowwidth :: Int) = round $ fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", tshow (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index a1d2323c2..1178f7c6e 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 2f3a831a7..84be2720f 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index af066107c..140366d8b 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From c3f9e8c12256d19ed6c89d15470945855ee16a94 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Mar 2021 22:31:20 -0700 Subject: Docx writer: make nsid in abstractNum deterministic. Previously we assigned a random number (though in a deterministic way). But changes in the random package mean we get different results now on different architectures, even with the same random seed. We don't need random values; so now we just assign a value based on the list number id, which is guaranteed to be unique to the list marker. --- src/Text/Pandoc/Writers/Docx.hs | 10 ++++------ test/docx/golden/block_quotes.docx | Bin 10071 -> 10067 bytes test/docx/golden/codeblock.docx | Bin 9920 -> 9916 bytes test/docx/golden/comments.docx | Bin 10258 -> 10254 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10021 -> 10017 bytes test/docx/golden/custom_style_preserve.docx | Bin 10650 -> 10646 bytes test/docx/golden/custom_style_reference.docx | Bin 12403 -> 12400 bytes test/docx/golden/definition_list.docx | Bin 9920 -> 9916 bytes .../docx/golden/document-properties-short-desc.docx | Bin 9925 -> 9922 bytes test/docx/golden/document-properties.docx | Bin 10404 -> 10399 bytes test/docx/golden/headers.docx | Bin 10059 -> 10055 bytes test/docx/golden/image.docx | Bin 26736 -> 26733 bytes test/docx/golden/inline_code.docx | Bin 9859 -> 9855 bytes test/docx/golden/inline_formatting.docx | Bin 10038 -> 10035 bytes test/docx/golden/inline_images.docx | Bin 26793 -> 26789 bytes test/docx/golden/link_in_notes.docx | Bin 10081 -> 10077 bytes test/docx/golden/links.docx | Bin 10251 -> 10248 bytes test/docx/golden/lists.docx | Bin 10332 -> 10314 bytes test/docx/golden/lists_continuing.docx | Bin 10123 -> 10110 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10210 -> 10192 bytes test/docx/golden/lists_restarting.docx | Bin 10122 -> 10108 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10216 -> 10212 bytes test/docx/golden/notes.docx | Bin 10028 -> 10024 bytes test/docx/golden/raw-blocks.docx | Bin 9960 -> 9956 bytes test/docx/golden/raw-bookmarks.docx | Bin 10094 -> 10090 bytes test/docx/golden/table_one_row.docx | Bin 9906 -> 9903 bytes test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10212 bytes test/docx/golden/tables.docx | Bin 10241 -> 10238 bytes test/docx/golden/track_changes_deletion.docx | Bin 9903 -> 9899 bytes test/docx/golden/track_changes_insertion.docx | Bin 9886 -> 9882 bytes test/docx/golden/track_changes_move.docx | Bin 9920 -> 9916 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10032 -> 10028 bytes test/docx/golden/unicode.docx | Bin 9845 -> 9841 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9892 -> 9888 bytes 34 files changed, 4 insertions(+), 6 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c47bfb2ea..20bcd0324 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,6 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import System.Random (randomRs, mkStdGen) import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P @@ -695,8 +694,7 @@ baseListId = 1000 mkNumbering :: [ListMarker] -> [Element] mkNumbering lists = elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] - where elts = zipWith mkAbstractNum (ordNub lists) $ - randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848 + where elts = map mkAbstractNum (ordNub lists) maxListLevel :: Int maxListLevel = 8 @@ -713,10 +711,10 @@ mkNum marker numid = $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] -mkAbstractNum :: ListMarker -> Integer -> Element -mkAbstractNum marker nsid = +mkAbstractNum :: ListMarker -> Element +mkAbstractNum marker = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index ed7d1165c..af1b33ca2 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 07ae75676..f748f1f01 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index e5f034378..ac9d56680 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 174942135..f27727edd 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index b5c31a851..1da499d6a 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index c42ca1b05..4d2fe245d 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 1cb4c1fd7..f386fcea3 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 7122456ea..debe9a3f6 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 616ba0f81..cd17400bf 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index c30dcdee9..d3af8a3dd 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 8a704b41e..1c4e738c0 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index b1906c8c4..35f43f19f 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 8adf1cf75..8de3f70f6 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 584117503..e76558be9 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 8859fe55c..88bae8142 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index b80f3b3ba..455adcfc7 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 35beed68a..081d9ddba 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 2c29fd674..fc9213fc5 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 10a948886..b636fd3f8 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 5b90e74a0..252623215 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index cc81b46d1..a8c3f5478 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 1394dc442..43e650ebd 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 0d1688694..fe4f7845b 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index be1caef2d..45e90608f 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 1178f7c6e..6eaea2ac2 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 84be2720f..45a97ccaa 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 140366d8b..115a16a48 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 9cc7a075f..247725aaa 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f8b8dcfde..3863afef2 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 1c3baf0bf..5c848b63a 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 28686970d..e0c843713 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 7051cefbd..78a773bdd 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 9df631640..c66a45b74 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 0921b82d98b6ec7fa80ffd522c129b3828b9c00b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 26 Apr 2021 18:07:01 +0200 Subject: Docx writer: autoset table width if no column has an explicit width. --- src/Text/Pandoc/Writers/Docx/Table.hs | 18 +++++++++++------- test/docx/golden/table_one_row.docx | Bin 9903 -> 9904 bytes test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10212 bytes test/docx/golden/tables.docx | Bin 10238 -> 10239 bytes 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index a6b137fc4..349f3a4ce 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -1,7 +1,8 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | -Module : Text.Pandoc.Writers.Docx +Module : Text.Pandoc.Writers.Docx.Table Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane @@ -87,7 +88,10 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : + mknode "w:tblW" (if all (== 0) widths + then [("w:type", "auto"), ("w:w", "0")] + else [("w:type", "pct"), ("w:w", tshow rowwidth)]) + () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") @@ -107,8 +111,8 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do )] alignmentToString :: Alignment -> Text -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" +alignmentToString = \case + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 6eaea2ac2..cab3fc31c 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 45a97ccaa..9238c7e20 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 115a16a48..6f0379def 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From ddbf83f62c8bb6516203c99acd894c404351b5ae Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sat, 1 May 2021 18:52:24 +0200 Subject: Docx writer: support colspans and rowspans in tables See: #6315 --- src/Text/Pandoc/Writers/Docx.hs | 6 +- src/Text/Pandoc/Writers/Docx/Table.hs | 200 +++++++++++++++++++---------- src/Text/Pandoc/Writers/GridTable.hs | 4 +- test/docx/golden/table_one_row.docx | Bin 9904 -> 9925 bytes test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10230 bytes test/docx/golden/tables.docx | Bin 10239 -> 10271 bytes 6 files changed, 140 insertions(+), 70 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 7064ded09..e11961bfd 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -53,6 +53,7 @@ import Text.Pandoc.Writers.Docx.Table import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Pandoc.Walk +import qualified Text.Pandoc.Writers.GridTable as Grid import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.TeXMath @@ -889,8 +890,9 @@ blockToOpenXML' _ HorizontalRule = do $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = - tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot +blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) = + tableToOpenXML (blocksToOpenXML opts) + (Grid.toTable attr caption colspecs thead tbodies tfoot) blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst | OrderedList (start, numstyle, numdelim) lst <- el diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 349f3a4ce..bb931bf08 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -14,65 +14,39 @@ module Text.Pandoc.Writers.Docx.Table ) where import Control.Monad.State.Strict +import Data.Array import Data.Text (Text) import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Printf (printf) +import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML -import Text.Pandoc.XML.Light as XML +import Text.Pandoc.XML.Light as XML hiding (Attr) import qualified Data.Text as T +import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) - -> Caption - -> [ColSpec] - -> TableHead - -> [TableBody] - -> TableFoot + -> Grid.Table -> WS m [Content] -tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do - let (caption, aligns, widths, headers, rows) = - toLegacyTable blkCapt specs thead tbody tfoot +tableToOpenXML blocksToOpenXML gridTable = do setFirstPara modify $ \s -> s { stInTable = True } - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaPropM (pStyleM "Table Caption") - $ blocksToOpenXML [Para caption] - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - -- Table cells require a element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. And - -- apparently the last element must be a , see #6983. - let cellToOpenXML (al, cell) = do - es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell - return $ - case reverse (onlyElems es) of - b:e:_ | qName (elName b) == "bookmarkEnd" - , qName (elName e) == "p" -> es - e:_ | qName (elName e) == "p" -> es - _ -> es ++ [Elem $ mknode "w:p" [] ()] - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - compactStyle <- pStyleM "Compact" - let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell contents = mknode "w:tc" [] - $ if null contents - then emptyCell' - else contents - let mkrow cells = - mknode "w:tr" [] $ - map mkcell cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let (rowwidth :: Int) = round $ fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", tshow (floor (textwidth * w) :: Integer))] () - let hasHeader = not $ all null headers - modify $ \s -> s { stInTable = False } + let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = + gridTable + let (Caption _maybeShortCaption captionBlocks) = caption + let captionStr = stringify captionBlocks + captionXml <- if null captionBlocks + then return [] + else withParaPropM (pStyleM "Table Caption") + $ blocksToOpenXML captionBlocks + head' <- cellGridToOpenXML blocksToOpenXML thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML tfoot + + let hasHeader = not . null . indices . partRowAttrs $ thead -- for compatibility with Word <= 2007, we include a val with a bitmask -- 0×0020 Apply first row conditional formatting -- 0×0040 Apply last row conditional formatting @@ -80,18 +54,12 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do -- 0×0100 Apply last column conditional formatting -- 0×0200 Do not apply row banding conditional formatting -- 0×0400 Do not apply column banding conditional formattin - let tblLookVal :: Int - tblLookVal = if hasHeader then 0x20 else 0 - return $ - caption' ++ - [Elem $ - mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" (if all (== 0) widths - then [("w:type", "auto"), ("w:w", "0")] - else [("w:type", "pct"), ("w:w", tshow rowwidth)]) - () : + let tblLookVal = if hasHeader then (0x20 :: Int) else 0 + let (gridCols, tblWattr) = tableLayout (elems colspecs) + let tbl = mknode "w:tbl" [] + ( mknode "w:tblPr" [] + ( mknode "w:tblStyle" [("w:val","Table")] () : + mknode "w:tblW" tblWattr () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") @@ -100,15 +68,14 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do ,("w:noVBand","0") ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow headers' | hasHeader ] ++ - map mkrow rows' - )] + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (T.null captionStr) ] + ) + : mknode "w:tblGrid" [] gridCols + : head' ++ mconcat bodies ++ foot' + ) + modify $ \s -> s { stInTable = False } + return $ captionXml ++ [Elem tbl] alignmentToString :: Alignment -> Text alignmentToString = \case @@ -116,3 +83,104 @@ alignmentToString = \case AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" + +tableLayout :: [ColSpec] -> ([Element], [(Text, Text)]) +tableLayout specs = + let + textwidth = 7920 -- 5.5 in in twips (1 twip == 1/20 pt) + fullrow = 5000 -- 100% specified in pct (1 pct == 1/50th of a percent) + ncols = length specs + getWidth = \case + ColWidth n -> n + _ -> 0 + widths = map (getWidth . snd) specs + rowwidth = round (fullrow * sum widths) :: Int + widthToTwips w = floor (textwidth * w) :: Int + mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] () + in if all (== 0) widths + then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols) + , [ ("w:type", "auto"), ("w:w", "0")]) + else ( map mkGridCol widths + , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ]) + +cellGridToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> Part + -> WS m [Element] +cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) = + if null (indices rowAttrs) + then return mempty + else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part + +data OOXMLCell + = OOXMLCell Attr Alignment RowSpan ColSpan [Block] + | OOXMLCellMerge ColSpan + +data OOXMLRow = OOXMLRow Attr [OOXMLCell] + +partToRows :: Part -> [OOXMLRow] +partToRows part = + let + toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell] + toOOXMLCell ridx cidx = \case + ContentCell attr align rowspan colspan blocks -> + [OOXMLCell attr align rowspan colspan blocks] + ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' -> + case (partCellArray part)!idx' of + (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan] + x -> error $ "Content cell expected, got, " ++ show x ++ + " at index " ++ show idx' + _ -> mempty + mkRow :: (RowIndex, Attr) -> OOXMLRow + mkRow (ridx, attr) = OOXMLRow attr + . concatMap (uncurry $ toOOXMLCell ridx) + . assocs + . rowArray ridx + $ partCellArray part + in map mkRow $ assocs (partRowAttrs part) + +rowToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLRow + -> WS m Element +rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do + xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells + -- let align' = case align of + -- AlignDefault -> colAlign + -- _ -> align + return $ mknode "w:tr" [] xmlcells + +ooxmlCellToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLCell + -> WS m Element +ooxmlCellToOpenXML blocksToOpenXML = \case + OOXMLCellMerge (ColSpan colspan) -> do + return $ mknode "w:tc" [] + [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] () + , mknode "w:vMerge" [("w:val", "continue")] () ] + , mknode "w:p" [] [mknode "w:pPr" [] ()]] + OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do + -- we handle rowspans via 'leftpad', so we can ignore those here + + compactStyle <- pStyleM "Compact" + es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents + -- Table cells require a element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a , see #6983. + return . mknode "w:tc" [] $ + Elem + (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] () + | colspan > 1] ++ + [ mknode "w:vMerge" [("w:val", "restart")] () + | rowspan > RowSpan 1 ])) : + if null contents + then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + else case reverse (onlyElems es) of + b:e:_ | qName (elName b) == "bookmarkEnd" -- y tho? + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] + +alignmentFor :: Alignment -> Element +alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs index c6f4cf456..bc468febc 100644 --- a/src/Text/Pandoc/Writers/GridTable.hs +++ b/src/Text/Pandoc/Writers/GridTable.hs @@ -87,8 +87,8 @@ toTable attr caption colSpecs thead tbodies tfoot = tbGrids = map bodyToGrid tbodies tfGrid = let (TableFoot footAttr rows) = tfoot in rowsToPart footAttr rows - bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) = - rowsToPart bodyAttr rows + bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) = + rowsToPart bodyAttr (headRows ++ rows) data BuilderCell = FilledCell GridCell diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index cab3fc31c..e60bb303f 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 9238c7e20..a4037cf32 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 6f0379def..bc1bc27f8 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 5eb7ad7d1ebbfe27a282a2d75f199bacf2052be3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 11 May 2021 22:31:38 -0600 Subject: Improve integration of settings from reference.docx. The settings we can carry over from a reference.docx are autoHyphenation, consecutiveHyphenLimit, hyphenationZone, doNotHyphenateCap, evenAndOddHeaders, and proofState. Previously this was implemented in a buggy way, so that the reference doc's values AND the new values were included. This change allows users to create a reference.docx that sets w:proofState for spelling or grammar to "dirty," so that spell/grammar checking will be triggered on the generated docx. Closes #1209. --- src/Text/Pandoc/Writers/Docx.hs | 27 +++++++++++---------- test/docx/golden/block_quotes.docx | Bin 10067 -> 10061 bytes test/docx/golden/codeblock.docx | Bin 9916 -> 9910 bytes test/docx/golden/comments.docx | Bin 10254 -> 10248 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10017 -> 10011 bytes test/docx/golden/custom_style_preserve.docx | Bin 10646 -> 10640 bytes test/docx/golden/custom_style_reference.docx | Bin 12400 -> 12397 bytes test/docx/golden/definition_list.docx | Bin 9916 -> 9910 bytes .../golden/document-properties-short-desc.docx | Bin 9922 -> 9916 bytes test/docx/golden/document-properties.docx | Bin 10399 -> 10393 bytes test/docx/golden/headers.docx | Bin 10055 -> 10049 bytes test/docx/golden/image.docx | Bin 26733 -> 26727 bytes test/docx/golden/inline_code.docx | Bin 9855 -> 9849 bytes test/docx/golden/inline_formatting.docx | Bin 10035 -> 10029 bytes test/docx/golden/inline_images.docx | Bin 26789 -> 26783 bytes test/docx/golden/link_in_notes.docx | Bin 10077 -> 10071 bytes test/docx/golden/links.docx | Bin 10248 -> 10242 bytes test/docx/golden/lists.docx | Bin 10314 -> 10308 bytes test/docx/golden/lists_continuing.docx | Bin 10110 -> 10104 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10192 -> 10186 bytes test/docx/golden/lists_restarting.docx | Bin 10108 -> 10102 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10212 -> 10206 bytes test/docx/golden/notes.docx | Bin 10024 -> 10018 bytes test/docx/golden/raw-blocks.docx | Bin 9956 -> 9950 bytes test/docx/golden/raw-bookmarks.docx | Bin 10090 -> 10084 bytes test/docx/golden/table_one_row.docx | Bin 9925 -> 9920 bytes test/docx/golden/table_with_list_cell.docx | Bin 10230 -> 10225 bytes test/docx/golden/tables.docx | Bin 10271 -> 10266 bytes test/docx/golden/track_changes_deletion.docx | Bin 9899 -> 9893 bytes test/docx/golden/track_changes_insertion.docx | Bin 9882 -> 9876 bytes test/docx/golden/track_changes_move.docx | Bin 9916 -> 9910 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10028 -> 10022 bytes test/docx/golden/unicode.docx | Bin 9841 -> 9835 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9888 -> 9882 bytes 34 files changed, 14 insertions(+), 13 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e11961bfd..75bed1595 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -469,12 +469,12 @@ writeDocx opts doc = do -- adds references to footnotes or endnotes we don't have... -- we do, however, copy some settings over from reference let settingsPath = "word/settings.xml" - settingsList = [ "w:autoHyphenation" - , "w:consecutiveHyphenLimit" - , "w:hyphenationZone" - , "w:doNotHyphenateCap" - , "w:evenAndOddHeaders" - , "w:proofState" + settingsList = [ "autoHyphenation" + , "consecutiveHyphenLimit" + , "hyphenationZone" + , "doNotHyphenateCap" + , "evenAndOddHeaders" + , "proofState" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList @@ -577,16 +577,17 @@ copyChildren :: (PandocMonad m) copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path + let elsToCopy = + map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref + let elsToKeep = + [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)] return $ toEntry path timestamp $ renderXml dist{ - elContent = elContent dist ++ copyContent ref + elContent = map Elem elsToKeep ++ map Elem elsToCopy } where - strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p <> ":" <> name - | otherwise = name - shouldCopy = (`elem` elNames) . strName - cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} - copyContent = map cleanElem . filterChildrenName shouldCopy + hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) = + qName n1 == qName n2 + cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}} -- this is the lowest number used for a list numId baseListId :: Int diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index af1b33ca2..b6973fcfd 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index f748f1f01..f0d35d3ad 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index ac9d56680..fb1c15dec 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index f27727edd..43d536b65 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 1da499d6a..8865010d1 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index 4d2fe245d..10f7bf661 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index f386fcea3..2a7d81a34 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index debe9a3f6..2fcd14908 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index cd17400bf..39533f42d 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index d3af8a3dd..c8d67c45b 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 1c4e738c0..8a7aeec10 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 35f43f19f..969237cec 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 8de3f70f6..cda936a39 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index e76558be9..69991e791 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 88bae8142..40e848195 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 455adcfc7..28237a30d 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 081d9ddba..bf075805e 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index fc9213fc5..e7d308e13 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index b636fd3f8..9763e347e 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 252623215..b717ca619 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index a8c3f5478..873e731d2 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 43e650ebd..134cb2eaf 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index fe4f7845b..c49ed79c8 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 45e90608f..1f2cbb214 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index e60bb303f..a0160cdb4 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index a4037cf32..6427d475c 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index bc1bc27f8..470eac2ae 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 247725aaa..3542b8f9c 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index 3863afef2..b36b4485e 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 5c848b63a..e30ab06ae 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index e0c843713..11597d578 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 78a773bdd..c7bff82e5 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index c66a45b74..c70f6946e 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 17d96404f5b6f5f080329e220a0784f10c364f2d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 14 May 2021 16:19:20 +0200 Subject: Docx writer: allow multirow table headers --- src/Text/Pandoc/Writers/Docx/Table.hs | 42 +++++++++++++++++------------ test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10238 bytes test/docx/golden/tables.docx | Bin 10266 -> 10279 bytes 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index bb931bf08..ccc325fbe 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -42,11 +42,12 @@ tableToOpenXML blocksToOpenXML gridTable = do then return [] else withParaPropM (pStyleM "Table Caption") $ blocksToOpenXML captionBlocks - head' <- cellGridToOpenXML blocksToOpenXML thead - bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies - foot' <- cellGridToOpenXML blocksToOpenXML tfoot + head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot let hasHeader = not . null . indices . partRowAttrs $ thead + let hasFooter = not . null . indices . partRowAttrs $ tfoot -- for compatibility with Word <= 2007, we include a val with a bitmask -- 0×0020 Apply first row conditional formatting -- 0×0040 Apply last row conditional formatting @@ -61,7 +62,7 @@ tableToOpenXML blocksToOpenXML gridTable = do ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" tblWattr () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") - ,("w:lastRow","0") + ,("w:lastRow",if hasFooter then "1" else "0") ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") @@ -77,6 +78,9 @@ tableToOpenXML blocksToOpenXML gridTable = do modify $ \s -> s { stInTable = False } return $ captionXml ++ [Elem tbl] +-- | Parts of a table +data RowType = HeadRow | BodyRow | FootRow + alignmentToString :: Alignment -> Text alignmentToString = \case AlignLeft -> "left" @@ -104,22 +108,23 @@ tableLayout specs = , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ]) cellGridToOpenXML :: PandocMonad m - => ([Block] -> WS m [Content]) - -> Part - -> WS m [Element] -cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) = + => ([Block] -> WS m [Content]) + -> RowType + -> Part + -> WS m [Element] +cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) = if null (indices rowAttrs) then return mempty - else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part + else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part data OOXMLCell = OOXMLCell Attr Alignment RowSpan ColSpan [Block] | OOXMLCellMerge ColSpan -data OOXMLRow = OOXMLRow Attr [OOXMLCell] +data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell] -partToRows :: Part -> [OOXMLRow] -partToRows part = +partToRows :: RowType -> Part -> [OOXMLRow] +partToRows rowType part = let toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell] toOOXMLCell ridx cidx = \case @@ -132,7 +137,7 @@ partToRows part = " at index " ++ show idx' _ -> mempty mkRow :: (RowIndex, Attr) -> OOXMLRow - mkRow (ridx, attr) = OOXMLRow attr + mkRow (ridx, attr) = OOXMLRow rowType attr . concatMap (uncurry $ toOOXMLCell ridx) . assocs . rowArray ridx @@ -143,12 +148,17 @@ rowToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) -> OOXMLRow -> WS m Element -rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do +rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells + let addTrPr = case rowType of + HeadRow -> (mknode "w:trPr" [] + [mknode "w:tblHeader" [("w:val", "true")] ()] :) + BodyRow -> id + FootRow -> id -- let align' = case align of -- AlignDefault -> colAlign -- _ -> align - return $ mknode "w:tr" [] xmlcells + return $ mknode "w:tr" [] (addTrPr xmlcells) ooxmlCellToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) @@ -161,8 +171,6 @@ ooxmlCellToOpenXML blocksToOpenXML = \case , mknode "w:vMerge" [("w:val", "continue")] () ] , mknode "w:p" [] [mknode "w:pPr" [] ()]] OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do - -- we handle rowspans via 'leftpad', so we can ignore those here - compactStyle <- pStyleM "Compact" es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents -- Table cells require a element, even an empty one! diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 6427d475c..e51910770 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 470eac2ae..8029774a9 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 2cf971cf56cbcdfcd6ee245df0d6e9811dc3573b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 15 May 2021 10:54:05 -0700 Subject: docx writer: Remove rsids from settings.docx. Word will add these when revisions are made. But it's pointless to start out with a set of them. --- data/docx/word/settings.xml | 12 +----------- test/docx/golden/block_quotes.docx | Bin 10061 -> 9981 bytes test/docx/golden/codeblock.docx | Bin 9910 -> 9830 bytes test/docx/golden/comments.docx | Bin 10248 -> 10168 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10011 -> 9931 bytes test/docx/golden/custom_style_preserve.docx | Bin 10640 -> 10560 bytes test/docx/golden/custom_style_reference.docx | Bin 12397 -> 12317 bytes test/docx/golden/definition_list.docx | Bin 9910 -> 9830 bytes .../docx/golden/document-properties-short-desc.docx | Bin 9916 -> 9835 bytes test/docx/golden/document-properties.docx | Bin 10393 -> 10314 bytes test/docx/golden/headers.docx | Bin 10049 -> 9969 bytes test/docx/golden/image.docx | Bin 26727 -> 26647 bytes test/docx/golden/inline_code.docx | Bin 9849 -> 9769 bytes test/docx/golden/inline_formatting.docx | Bin 10029 -> 9949 bytes test/docx/golden/inline_images.docx | Bin 26783 -> 26703 bytes test/docx/golden/link_in_notes.docx | Bin 10071 -> 9991 bytes test/docx/golden/links.docx | Bin 10242 -> 10162 bytes test/docx/golden/lists.docx | Bin 10308 -> 10228 bytes test/docx/golden/lists_continuing.docx | Bin 10104 -> 10024 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10186 -> 10106 bytes test/docx/golden/lists_restarting.docx | Bin 10102 -> 10022 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10206 -> 10126 bytes test/docx/golden/notes.docx | Bin 10018 -> 9938 bytes test/docx/golden/raw-blocks.docx | Bin 9950 -> 9870 bytes test/docx/golden/raw-bookmarks.docx | Bin 10084 -> 10004 bytes test/docx/golden/table_one_row.docx | Bin 9920 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10238 -> 10159 bytes test/docx/golden/tables.docx | Bin 10279 -> 10200 bytes test/docx/golden/track_changes_deletion.docx | Bin 9893 -> 9813 bytes test/docx/golden/track_changes_insertion.docx | Bin 9876 -> 9796 bytes test/docx/golden/track_changes_move.docx | Bin 9910 -> 9830 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10022 -> 9942 bytes test/docx/golden/unicode.docx | Bin 9835 -> 9755 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9882 -> 9802 bytes 34 files changed, 1 insertion(+), 11 deletions(-) (limited to 'test/docx/golden') diff --git a/data/docx/word/settings.xml b/data/docx/word/settings.xml index afa0199c9..fca255b90 100644 --- a/data/docx/word/settings.xml +++ b/data/docx/word/settings.xml @@ -17,16 +17,6 @@ - - - - - - - - - - @@ -44,4 +34,4 @@ - \ No newline at end of file + diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index b6973fcfd..f5e4b6428 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index f0d35d3ad..0bbe42bd7 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index fb1c15dec..6b759b522 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 43d536b65..74e30f651 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 8865010d1..7ee99c87b 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index 10f7bf661..dfaf16e2b 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 2a7d81a34..02992c6bd 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 2fcd14908..9bfe56dca 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 39533f42d..2a37045f2 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index c8d67c45b..ca2c7a261 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 8a7aeec10..c4447d021 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 969237cec..1824c0aae 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index cda936a39..82565a5b1 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 69991e791..0416bcc15 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 40e848195..b51ec5220 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 28237a30d..ba0d100c8 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index bf075805e..9632c598b 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index e7d308e13..3269a0528 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 9763e347e..716129170 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index b717ca619..af6edfe86 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 873e731d2..f141425f7 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 134cb2eaf..93b4222f6 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index c49ed79c8..9d1aa9853 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 1f2cbb214..b57289fdd 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index a0160cdb4..edb23cc72 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index e51910770..f9cbed156 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 8029774a9..b585c803d 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 3542b8f9c..313942750 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index b36b4485e..f18b3f85f 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index e30ab06ae..50bdab767 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 11597d578..230b27006 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c7bff82e5..627276e4b 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index c70f6946e..790278108 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 0a4c6925b6db433bdb8b9d57c94a7c36be3daea7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 15 May 2021 15:40:49 -0700 Subject: Docx writer: copy over more settings from referenc.odcx. From settings.xml in the reference-doc, we now include: `zoom`, `embedSystemFonts`, `doNotTrackMoves`, `defaultTabStop`, `drawingGridHorizontalSpacing`, `drawingGridVerticalSpacing`, `displayHorizontalDrawingGridEvery`, `displayVerticalDrawingGridEvery`, `characterSpacingControl`, `savePreviewPicture`, `mathPr`, `themeFontLang`, `decimalSymbol`, `listSeparator`, `autoHyphenation`, `compat`. Closes #7240. --- src/Text/Pandoc/Writers/Docx.hs | 17 ++++++++++++++++- test/docx/golden/block_quotes.docx | Bin 9981 -> 9981 bytes test/docx/golden/codeblock.docx | Bin 9830 -> 9830 bytes test/docx/golden/comments.docx | Bin 10168 -> 10168 bytes test/docx/golden/custom_style_no_reference.docx | Bin 9931 -> 9931 bytes test/docx/golden/custom_style_preserve.docx | Bin 10560 -> 10560 bytes test/docx/golden/custom_style_reference.docx | Bin 12317 -> 12368 bytes test/docx/golden/definition_list.docx | Bin 9830 -> 9830 bytes .../docx/golden/document-properties-short-desc.docx | Bin 9835 -> 9835 bytes test/docx/golden/document-properties.docx | Bin 10314 -> 10314 bytes test/docx/golden/headers.docx | Bin 9969 -> 9969 bytes test/docx/golden/image.docx | Bin 26647 -> 26647 bytes test/docx/golden/inline_code.docx | Bin 9769 -> 9769 bytes test/docx/golden/inline_formatting.docx | Bin 9949 -> 9949 bytes test/docx/golden/inline_images.docx | Bin 26703 -> 26703 bytes test/docx/golden/link_in_notes.docx | Bin 9991 -> 9991 bytes test/docx/golden/links.docx | Bin 10162 -> 10162 bytes test/docx/golden/lists.docx | Bin 10228 -> 10228 bytes test/docx/golden/lists_continuing.docx | Bin 10024 -> 10024 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10106 -> 10106 bytes test/docx/golden/lists_restarting.docx | Bin 10022 -> 10022 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10126 -> 10126 bytes test/docx/golden/notes.docx | Bin 9938 -> 9938 bytes test/docx/golden/raw-blocks.docx | Bin 9870 -> 9870 bytes test/docx/golden/raw-bookmarks.docx | Bin 10004 -> 10004 bytes test/docx/golden/table_one_row.docx | Bin 9840 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10159 -> 10159 bytes test/docx/golden/tables.docx | Bin 10200 -> 10200 bytes test/docx/golden/track_changes_deletion.docx | Bin 9813 -> 9813 bytes test/docx/golden/track_changes_insertion.docx | Bin 9796 -> 9796 bytes test/docx/golden/track_changes_move.docx | Bin 9830 -> 9830 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 9942 -> 9942 bytes test/docx/golden/unicode.docx | Bin 9755 -> 9755 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9802 -> 9802 bytes 34 files changed, 16 insertions(+), 1 deletion(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 75bed1595..e7a49ba02 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -469,12 +469,27 @@ writeDocx opts doc = do -- adds references to footnotes or endnotes we don't have... -- we do, however, copy some settings over from reference let settingsPath = "word/settings.xml" - settingsList = [ "autoHyphenation" + settingsList = [ "zoom" + , "embedSystemFonts" + , "doNotTrackMoves" + , "defaultTabStop" + , "drawingGridHorizontalSpacing" + , "drawingGridVerticalSpacing" + , "displayHorizontalDrawingGridEvery" + , "displayVerticalDrawingGridEvery" + , "characterSpacingControl" + , "savePreviewPicture" + , "mathPr" + , "themeFontLang" + , "decimalSymbol" + , "listSeparator" + , "autoHyphenation" , "consecutiveHyphenLimit" , "hyphenationZone" , "doNotHyphenateCap" , "evenAndOddHeaders" , "proofState" + , "compat" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index f5e4b6428..d05020f82 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 0bbe42bd7..616c9b1d0 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index 6b759b522..aa200c8d6 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 74e30f651..49cf42f38 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 7ee99c87b..e24940478 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index dfaf16e2b..e7da8f06d 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 02992c6bd..df148cfed 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 9bfe56dca..2d9e96b15 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 2a37045f2..d8f091956 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index ca2c7a261..8c1b3e870 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index c4447d021..48b72e283 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 1824c0aae..048ac8f15 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 82565a5b1..cf1301c4b 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 0416bcc15..662e70556 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index b51ec5220..d800a5fb4 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index ba0d100c8..bffdbbaf8 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 9632c598b..2b201df28 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3269a0528..257053a78 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 716129170..0a3bf1016 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index af6edfe86..0aa69805f 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index f141425f7..88dd21abd 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 93b4222f6..f02d5951c 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 9d1aa9853..58a101b3f 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index b57289fdd..484c363a2 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index edb23cc72..f75e567ab 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index f9cbed156..a49f70643 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index b585c803d..f24e27516 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 313942750..de7c44bf4 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f18b3f85f..958533459 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 50bdab767..04fa05062 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 230b27006..ef2dc96f8 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 627276e4b..a9de2b367 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 790278108..3ebadc59d 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001 From: Emily Bourke Date: Sun, 11 Apr 2021 21:42:53 +0100 Subject: Docx reader: Read table column widths. --- src/Text/Pandoc/Readers/Docx.hs | 5 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 2 +- test/Tests/Writers/Docx.hs | 5 ++ test/docx/0_level_headers.native | 4 +- test/docx/golden/table_one_row.docx | Bin 9840 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10159 -> 10162 bytes test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes test/docx/golden/tables.docx | Bin 10200 -> 10202 bytes test/docx/sdt_elements.native | 6 +- test/docx/table_one_row.native | 8 +-- test/docx/table_variable_width.native | 12 ++-- test/docx/table_with_list_cell.native | 6 +- test/docx/tables-default-widths.native | 92 ++++++++++++++++++++++++++++ test/docx/tables.native | 18 +++--- 14 files changed, 128 insertions(+), 30 deletions(-) create mode 100644 test/docx/golden/tables-default-widths.docx create mode 100644 test/docx/tables-default-widths.native (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 22dd54193..375bb7338 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) = bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty -bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do +bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do let cap' = simpleCaption $ plain $ text cap (hdr, rows) = case firstRowFormatting look of True | null rs -> (Nothing, [r]) @@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do -- so should be possible. Alignment might be more difficult, -- since there doesn't seem to be a column entity in docx. let alignments = replicate width AlignDefault - widths = replicate width ColWidthDefault + totalWidth = sum grid + widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid return $ table cap' (zip alignments widths) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7325ff300..978d6ff3a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildrenByName ns "w" "gridCol" element in - mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger)) + mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger)) cols elemToTblGrid _ _ = throwError WrongElem diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 2e0f1e3fb..da25b95e0 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -111,6 +111,11 @@ tests = [ testGroup "inlines" def "docx/tables.native" "docx/golden/tables.docx" + , docxTest + "tables without explicit column widths" + def + "docx/tables-default-widths.native" + "docx/golden/tables-default-widths.docx" , docxTest "tables with lists in cells" def diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 7f875891e..ed589b029 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -1,6 +1,6 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 1.0)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -49,4 +49,4 @@ ,Para [Strong [Str "Table",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] ,Header 1 ("introduction",[],[]) [Str "Introduction"] -,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] \ No newline at end of file +,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index f75e567ab..a7a8f2519 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index a49f70643..1362d4609 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx new file mode 100644 index 000000000..f24e27516 Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index f24e27516..9dcbbc9d0 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native index dca82f0a0..a072c0d39 100644 --- a/test/docx/sdt_elements.native +++ b/test/docx/sdt_elements.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.5586085939783689)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native index e9188b145..88d5e3af5 100644 --- a/test/docx/table_one_row.native +++ b/test/docx/table_one_row.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -15,4 +15,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "Table"]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native index 229cb83b1..43ac40cca 100644 --- a/test/docx/table_variable_width.native +++ b/test/docx/table_variable_width.native @@ -1,10 +1,10 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 2.0096205237840725e-2) + ,(AlignDefault,ColWidth 1.9882415820416888e-2) + ,(AlignDefault,ColWidth 0.22202030999465527) + ,(AlignDefault,ColWidth 0.4761090326028862) + ,(AlignDefault,ColWidth 1.0689470871191876e-4)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -42,4 +42,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) []]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native index 06d8606da..51a35184b 100644 --- a/test/docx/table_with_list_cell.native +++ b/test/docx/table_with_list_cell.native @@ -1,7 +1,7 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -22,4 +22,4 @@ ,[Para [Str "A"]] ,[Para [Str "Numbered",Space,Str "list."]]]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native new file mode 100644 index 000000000..e541e5a6e --- /dev/null +++ b/test/docx/tables-default-widths.native @@ -0,0 +1,92 @@ +[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Name"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Game"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Fame"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Blame"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Lebron",Space,Str "James"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Basketball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Very",Space,Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Leaving",Space,Str "Cleveland"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Ryan",Space,Str "Braun"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Baseball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Moderate"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Steroids"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Russell",Space,Str "Wilson"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Football"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Tacky",Space,Str "uniform"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Sinple"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Table"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Without"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Header"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Simple"] + ,Para [Str "Multiparagraph"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Table"] + ,Para [Str "Full"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Of"] + ,Para [Str "Paragraphs"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "In",Space,Str "each"] + ,Para [Str "Cell."]]]])] + (TableFoot ("",[],[]) + [])] \ No newline at end of file diff --git a/test/docx/tables.native b/test/docx/tables.native index e541e5a6e..5a89496be 100644 --- a/test/docx/tables.native +++ b/test/docx/tables.native @@ -1,10 +1,10 @@ [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -48,8 +48,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -68,8 +68,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -89,4 +89,4 @@ [Para [Str "In",Space,Str "each"] ,Para [Str "Cell."]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] -- cgit v1.2.3 From a3d745e48560a55d9a9ea9fa43ffdd5a8b84987f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Jun 2021 09:44:37 -0700 Subject: Docx writer: support figure numbers. These are set up in such a way that they will work with Word's automatic table of figures. Closes #7392. --- src/Text/Pandoc/Writers/Docx.hs | 22 +++++++++++++++++++--- src/Text/Pandoc/Writers/Docx/Types.hs | 2 ++ test/docx/golden/image.docx | Bin 26647 -> 26774 bytes 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d1065eb7d..b3e008b8a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,8 @@ import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting import Text.Collate.Lang (renderLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm) +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time import Text.Pandoc.UTF8 (fromTextLazy) @@ -854,14 +855,29 @@ blockToOpenXML' opts (Plain lst) = do -- title beginning with fig: indicates that the image is a figure blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara + fignum <- gets stNextFigureNum + modify $ \st -> st{ stNextFigureNum = fignum + 1 } + let figid = "fig" <> tshow fignum + figname <- translateTerm Term.Figure prop <- pStyleM $ if null alt then "Figure" else "Captioned Figure" paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] - captionNode <- withParaPropM (pStyleM "Image Caption") - $ blockToOpenXML opts (Para alt) + captionNode <- if null alt + then return [] + else withParaPropM (pStyleM "Image Caption") + $ blockToOpenXML opts + (Para $ Span (figid,[],[]) + [Str "Figure\160", + RawInline (Format "openxml") + (" figname + <> " \\* ARABIC \">" + <> tshow fignum + <> ""), + Str ":", Space] : alt) return $ Elem (mknode "w:p" [] (map Elem paraProps ++ contents)) : captionNode diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs index 006584c30..36ac45ad2 100644 --- a/src/Text/Pandoc/Writers/Docx/Types.hs +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -117,6 +117,7 @@ data WriterState = WriterState{ , stDynamicParaProps :: Set.Set ParaStyleName , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int + , stNextFigureNum :: Int } defaultWriterState :: WriterState @@ -137,6 +138,7 @@ defaultWriterState = WriterState{ , stDynamicParaProps = Set.empty , stDynamicTextProps = Set.empty , stCurId = 20 + , stNextFigureNum = 1 } setFirstPara :: PandocMonad m => WS m () diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 48b72e283..9fe65326f 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ -- cgit v1.2.3 From 0948af9cc549f0ea3b85fa760aa521b8deaad2c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 29 Jun 2021 11:15:40 -0700 Subject: Docx writer: Add table numbering for captioned tables. The numbers are added using fields, so that Word can create a list of tables that will update automatically. --- src/Text/Pandoc/Writers/Docx/Table.hs | 31 ++++++++++++++++++++++++++++--- src/Text/Pandoc/Writers/Docx/Types.hs | 2 ++ test/Tests/Writers/OOXML.hs | 4 +++- test/docx/golden/image.docx | Bin 26774 -> 26776 bytes 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'test/docx/golden') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 49917e315..7a84c5278 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -17,7 +17,7 @@ import Control.Monad.State.Strict import Data.Array import Data.Text (Text) import Text.Pandoc.Definition -import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Printf (printf) @@ -25,6 +25,7 @@ import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML hiding (Attr) import qualified Data.Text as T +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m @@ -33,15 +34,23 @@ tableToOpenXML :: PandocMonad m -> WS m [Content] tableToOpenXML blocksToOpenXML gridTable = do setFirstPara - let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = + let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) = gridTable let (Caption _maybeShortCaption captionBlocks) = caption + tablenum <- gets stNextTableNum + unless (null captionBlocks) $ + modify $ \st -> st{ stNextTableNum = tablenum + 1 } + let tableid = if T.null ident + then "table" <> tshow tablenum + else ident + tablename <- translateTerm Term.Table let captionStr = stringify captionBlocks let aligns = map fst $ elems colspecs captionXml <- if null captionBlocks then return [] else withParaPropM (pStyleM "Table Caption") - $ blocksToOpenXML captionBlocks + $ blocksToOpenXML + $ addLabel tableid tablename tablenum captionBlocks -- We set "in table" after processing the caption, because we don't -- want the "Table Caption" style to be overwritten with "Compact". modify $ \s -> s { stInTable = True } @@ -81,6 +90,22 @@ tableToOpenXML blocksToOpenXML gridTable = do modify $ \s -> s { stInTable = False } return $ captionXml ++ [Elem tbl] +addLabel :: Text -> Text -> Int -> [Block] -> [Block] +addLabel tableid tablename tablenum bs = + case bs of + (Para ils : rest) -> Para (label : Space : ils) : rest + (Plain ils : rest) -> Plain (label : Space : ils) : rest + _ -> Para [label] : bs + where + label = Span (tableid,[],[]) + [Str (tablename <> "\160"), + RawInline (Format "openxml") + (" " \\* ARABIC \">" + <> tshow tablenum + <> ""), + Str ":"] + -- | Parts of a table data RowType = HeadRow | BodyRow | FootRow diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs index 36ac45ad2..74b8d2753 100644 --- a/src/Text/Pandoc/Writers/Docx/Types.hs +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -118,6 +118,7 @@ data WriterState = WriterState{ , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int , stNextFigureNum :: Int + , stNextTableNum :: Int } defaultWriterState :: WriterState @@ -139,6 +140,7 @@ defaultWriterState = WriterState{ , stDynamicTextProps = Set.empty , stCurId = 20 , stNextFigureNum = 1 + , stNextTableNum = 1 } setFirstPara :: PandocMonad m => WS m () diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index c1e47622d..83f05cfec 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -55,7 +55,9 @@ testArchive :: (WriterOptions -> Pandoc -> PandocIO BL.ByteString) -> IO Archive testArchive writerFn opts fp = do txt <- T.readFile fp - bs <- runIOorExplode $ readNative def txt >>= writerFn opts + bs <- runIOorExplode $ do + setTranslations "en-US" + readNative def txt >>= writerFn opts return $ toArchive bs compareFileList :: FilePath -> Archive -> Archive -> Maybe String diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 9fe65326f..7c2d8a9ac 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ -- cgit v1.2.3