From c113ca6717d00870ec10716897d76a6fa62b1d41 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 15 Sep 2019 01:40:23 +0300 Subject: [Docx Reader] Use style names, not ids, for assigning semantic meaning Motivating issues: #5523, #5052, #5074 Style name comparisons are case-insensitive, since those are case-insensitive in Word. w:styleId will be used as style name if w:name is missing (this should only happen for malformed docx and is kept as a fallback to avoid failing altogether on malformed documents) Block quote detection code moved from Docx.Parser to Readers.Docx Code styles, i.e. "Source Code" and "Verbatim Char" now honor style inheritance Docx Reader now honours "Compact" style (used in Pandoc-generated docx). The side-effect is that "Compact" style no longer shows up in docx+styles output. Styles inherited from "Compact" will still show up. Removed obsolete list-item style from divsToKeep. That didn't really do anything for a while now. Add newtypes to differentiate between style names, ids, and different style types (that is, paragraph and character styles) Since docx style names can have spaces in them, and pandoc-markdown classes can't, anywhere when style name is used as a class name, spaces are replaced with ASCII dashes `-`. Get rid of extraneous intermediate types, carrying styleId information. Instead, styleId is saved with other style data. Use RunStyle for inline style definitions only (lacking styleId and styleName); for Character Styles use CharStyle type (which is basicaly RunStyle with styleId and StyleName bolted onto it). --- test/docx/compact-style-removal.docx | Bin 0 -> 9951 bytes test/docx/compact-style-removal.native | 5 +++++ test/docx/lists-compact.docx | Bin 0 -> 9952 bytes test/docx/lists-compact.native | 5 +++++ 4 files changed, 10 insertions(+) create mode 100644 test/docx/compact-style-removal.docx create mode 100644 test/docx/compact-style-removal.native create mode 100644 test/docx/lists-compact.docx create mode 100644 test/docx/lists-compact.native (limited to 'test/docx') diff --git a/test/docx/compact-style-removal.docx b/test/docx/compact-style-removal.docx new file mode 100644 index 000000000..fde0064db Binary files /dev/null and b/test/docx/compact-style-removal.docx differ diff --git a/test/docx/compact-style-removal.native b/test/docx/compact-style-removal.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/compact-style-removal.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] diff --git a/test/docx/lists-compact.docx b/test/docx/lists-compact.docx new file mode 100644 index 000000000..d7f9e4a06 Binary files /dev/null and b/test/docx/lists-compact.docx differ diff --git a/test/docx/lists-compact.native b/test/docx/lists-compact.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/lists-compact.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] -- cgit v1.2.3 From 5c5d1a65d95fcdde0769935a4776e67c336f113a Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sat, 14 Sep 2019 19:45:30 +0300 Subject: [Docx Reader] Update tests Notice this commit updates lists.docx. The old test file contained references to "ListParagraph" style, which should never leak outside of pandoc, so I'm not sure what that was supposed to test for exactly. --- test/docx/0_level_headers.native | 4 ++-- test/docx/adjacent_codeblocks.docx | Bin 22437 -> 22264 bytes test/docx/custom-style-with-styles.native | 6 +++--- test/docx/lists.docx | Bin 19845 -> 9473 bytes test/docx/lists.native | 2 +- test/docx/nested_anchors_in_header.native | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'test/docx') diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 804ad8732..6d8269b21 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -15,10 +15,10 @@ ,Para [Str "FIGURES",Space,Str "iv"] ,Para [Str "TABLES",Space,Str "v"] ,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Str "2"] -,Header 1 ("figures",["Heading0"],[]) [Str "FIGURES"] +,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"] ,Para [Strong [Str "Figure",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] -,Header 1 ("tables",["Heading0"],[]) [Str "TABLES"] +,Header 1 ("tables",["Heading-0"],[]) [Str "TABLES"] ,Para [Strong [Str "Table",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] ,Header 1 ("introduction",[],[]) [Str "Introduction"] diff --git a/test/docx/adjacent_codeblocks.docx b/test/docx/adjacent_codeblocks.docx index d61fb45d5..0fd44a183 100644 Binary files a/test/docx/adjacent_codeblocks.docx and b/test/docx/adjacent_codeblocks.docx differ diff --git a/test/docx/custom-style-with-styles.native b/test/docx/custom-style-with-styles.native index 61f11911d..1ad7d88cc 100644 --- a/test/docx/custom-style-with-styles.native +++ b/test/docx/custom-style-with-styles.native @@ -1,7 +1,7 @@ -[Div ("",[],[("custom-style","FirstParagraph")]) +[Div ("",[],[("custom-style","First Paragraph")]) [Para [Str "This",Space,Str "is",Space,Str "some",Space,Str "text."]] -,Div ("",[],[("custom-style","BodyText")]) +,Div ("",[],[("custom-style","Body Text")]) [Para [Str "This",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "an",Space,Span ("",[],[("custom-style","Emphatic")]) [Str "emphasized"],Space,Str "text",Space,Str "style.",Space,Str "And",Space,Str "this",Space,Str "is",Space,Str "text",Space,Str "with",Space,Str "a",Space,Span ("",[],[("custom-style","Strengthened")]) [Str "strengthened"],Space,Str "text",Space,Str "style."]] -,Div ("",[],[("custom-style","MyBlockStyle")]) +,Div ("",[],[("custom-style","My Block Style")]) [BlockQuote [Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "styled",Space,Str "paragraph",Space,Str "that",Space,Str "inherits",Space,Str "from",Space,Str "Block",Space,Str "Text."]]]] diff --git a/test/docx/lists.docx b/test/docx/lists.docx index 8b46351d9..356dc1ea9 100644 Binary files a/test/docx/lists.docx and b/test/docx/lists.docx differ diff --git a/test/docx/lists.native b/test/docx/lists.native index af922b335..1192da709 100644 --- a/test/docx/lists.native +++ b/test/docx/lists.native @@ -15,4 +15,4 @@ ,Para [Str "Sub",Space,Str "paragraph"]]]]]] ,[Para [Str "Same",Space,Str "list"]]] ,BulletList - [[Para [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] + [[Plain [Str "Different",Space,Str "list",Space,Str "adjacent",Space,Str "to",Space,Str "the",Space,Str "one",Space,Str "above."]]]] diff --git a/test/docx/nested_anchors_in_header.native b/test/docx/nested_anchors_in_header.native index 562f60215..314b31663 100644 --- a/test/docx/nested_anchors_in_header.native +++ b/test/docx/nested_anchors_in_header.native @@ -1,4 +1,4 @@ -[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOCHeading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"] +[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"] ,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Str "1"] ("#short-instructions","")] ,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Str "1"] ("#some-instructions","")] ,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] -- cgit v1.2.3