From c113ca6717d00870ec10716897d76a6fa62b1d41 Mon Sep 17 00:00:00 2001 From: Nikolay Yakimov Date: Sun, 15 Sep 2019 01:40:23 +0300 Subject: [Docx Reader] Use style names, not ids, for assigning semantic meaning Motivating issues: #5523, #5052, #5074 Style name comparisons are case-insensitive, since those are case-insensitive in Word. w:styleId will be used as style name if w:name is missing (this should only happen for malformed docx and is kept as a fallback to avoid failing altogether on malformed documents) Block quote detection code moved from Docx.Parser to Readers.Docx Code styles, i.e. "Source Code" and "Verbatim Char" now honor style inheritance Docx Reader now honours "Compact" style (used in Pandoc-generated docx). The side-effect is that "Compact" style no longer shows up in docx+styles output. Styles inherited from "Compact" will still show up. Removed obsolete list-item style from divsToKeep. That didn't really do anything for a while now. Add newtypes to differentiate between style names, ids, and different style types (that is, paragraph and character styles) Since docx style names can have spaces in them, and pandoc-markdown classes can't, anywhere when style name is used as a class name, spaces are replaced with ASCII dashes `-`. Get rid of extraneous intermediate types, carrying styleId information. Instead, styleId is saved with other style data. Use RunStyle for inline style definitions only (lacking styleId and styleName); for Character Styles use CharStyle type (which is basicaly RunStyle with styleId and StyleName bolted onto it). --- test/Tests/Readers/Docx.hs | 9 +++++++++ test/docx/compact-style-removal.docx | Bin 0 -> 9951 bytes test/docx/compact-style-removal.native | 5 +++++ test/docx/lists-compact.docx | Bin 0 -> 9952 bytes test/docx/lists-compact.native | 5 +++++ 5 files changed, 19 insertions(+) create mode 100644 test/docx/compact-style-removal.docx create mode 100644 test/docx/compact-style-removal.native create mode 100644 test/docx/lists-compact.docx create mode 100644 test/docx/lists-compact.native (limited to 'test') diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 9d0913e55..583a6ec18 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -255,6 +255,10 @@ tests = [ testGroup "document" "lists" "docx/lists.docx" "docx/lists.native" + , testCompare + "compact lists" + "docx/lists-compact.docx" + "docx/lists-compact.native" , testCompare "lists with level overrides" "docx/lists_level_override.docx" @@ -425,6 +429,11 @@ tests = [ testGroup "document" "custom styles (`+styles`) enabled" "docx/custom-style-reference.docx" "docx/custom-style-with-styles.native" + , testCompareWithOpts + def{readerExtensions=extensionsFromList [Ext_styles]} + "custom styles (`+styles`): Compact style is removed from output" + "docx/compact-style-removal.docx" + "docx/compact-style-removal.native" ] , testGroup "metadata" [ testCompareWithOpts def{readerStandalone=True} diff --git a/test/docx/compact-style-removal.docx b/test/docx/compact-style-removal.docx new file mode 100644 index 000000000..fde0064db Binary files /dev/null and b/test/docx/compact-style-removal.docx differ diff --git a/test/docx/compact-style-removal.native b/test/docx/compact-style-removal.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/compact-style-removal.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] diff --git a/test/docx/lists-compact.docx b/test/docx/lists-compact.docx new file mode 100644 index 000000000..d7f9e4a06 Binary files /dev/null and b/test/docx/lists-compact.docx differ diff --git a/test/docx/lists-compact.native b/test/docx/lists-compact.native new file mode 100644 index 000000000..340878ba0 --- /dev/null +++ b/test/docx/lists-compact.native @@ -0,0 +1,5 @@ +[OrderedList (1,Decimal,Period) + [[Plain [Str "One"]] + ,[Plain [Str "Two"]] + ,[Plain [Str "Three"]] + ,[Plain [Str "Four"]]]] -- cgit v1.2.3