From 840108a9c11850089a27a3b5458f8561ab1b6a2e Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 27 Jul 2014 15:11:18 -0400 Subject: Docx reader: Make metavalues out of styled paragraphs. This will make paragraphs styled with `Author`, `Title`, `Subtitle`, `Date`, and `Abstract` into pandoc metavalues, rather than text. The implementation only takes those elements from the beginning of the document (ignoring empty paragraphs). Multiple paragraphs in the `Author` style will be made into a metaList, one paragraph per item. Hard linebreaks (shift-return) in the paragraph will be maintained, and can be used for institution, email, etc. --- tests/Tests/Readers/Docx.hs | 15 +++++++++++++++ tests/docx.metadata.docx | Bin 0 -> 40487 bytes tests/docx.metadata.native | 2 ++ tests/docx.metadata_after_normal.docx | Bin 0 -> 57273 bytes tests/docx.metadata_after_normal.native | 7 +++++++ tests/docx.metadata_author_linebreak.docx | Bin 0 -> 40481 bytes tests/docx.metadata_author_linebreak.native | 2 ++ 7 files changed, 26 insertions(+) create mode 100644 tests/docx.metadata.docx create mode 100644 tests/docx.metadata.native create mode 100644 tests/docx.metadata_after_normal.docx create mode 100644 tests/docx.metadata_after_normal.native create mode 100644 tests/docx.metadata_author_linebreak.docx create mode 100644 tests/docx.metadata_author_linebreak.native (limited to 'tests') diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 7b737f95a..8ad9e08ba 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -164,5 +164,20 @@ tests = [ testGroup "inlines" "docx.track_changes_deletion.docx" "docx.track_changes_deletion_all.native" ] + , testGroup "metadata" + [ testCompareWithOpts def{readerStandalone=True} + "metadata fields" + "docx.metadata.docx" + "docx.metadata.native" + , testCompareWithOpts def{readerStandalone=True} + "linebreak between authors" + "docx.metadata_author_linebreak.docx" + "docx.metadata_author_linebreak.native" + , testCompareWithOpts def{readerStandalone=True} + "stop recording metadata with normal text" + "docx.metadata_after_normal.docx" + "docx.metadata_after_normal.native" + ] + ] diff --git a/tests/docx.metadata.docx b/tests/docx.metadata.docx new file mode 100644 index 000000000..34182a87e Binary files /dev/null and b/tests/docx.metadata.docx differ diff --git a/tests/docx.metadata.native b/tests/docx.metadata.native new file mode 100644 index 000000000..ed7ba63cf --- /dev/null +++ b/tests/docx.metadata.native @@ -0,0 +1,2 @@ +Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) +[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."]] diff --git a/tests/docx.metadata_after_normal.docx b/tests/docx.metadata_after_normal.docx new file mode 100644 index 000000000..16b8d583c Binary files /dev/null and b/tests/docx.metadata_after_normal.docx differ diff --git a/tests/docx.metadata_after_normal.native b/tests/docx.metadata_after_normal.native new file mode 100644 index 000000000..f0e31f8da --- /dev/null +++ b/tests/docx.metadata_after_normal.native @@ -0,0 +1,7 @@ +Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) +[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."] +,Para [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"] +,Para [Str "Mary",Space,Str "Ann",Space,Str "Evans"] +,Para [Str "Aurore",Space,Str "Dupin"] +,Para [Str "July",Space,Str "28,",Space,Str "2014"] +,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]] diff --git a/tests/docx.metadata_author_linebreak.docx b/tests/docx.metadata_author_linebreak.docx new file mode 100644 index 000000000..94f0e0753 Binary files /dev/null and b/tests/docx.metadata_author_linebreak.docx differ diff --git a/tests/docx.metadata_author_linebreak.native b/tests/docx.metadata_author_linebreak.native new file mode 100644 index 000000000..ed7ba63cf --- /dev/null +++ b/tests/docx.metadata_author_linebreak.native @@ -0,0 +1,2 @@ +Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) +[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."]] -- cgit v1.2.3 From 54708da371e767cd42598ea8f7fbd7d45c57421b Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 29 Jul 2014 09:26:18 -0400 Subject: Add and update docx tests in pandoc.cabal. --- pandoc.cabal | 12 ++++++++++++ tests/Tests/Readers/Docx.hs | 4 ---- tests/docx.metadata.docx | Bin 40487 -> 39538 bytes tests/docx.metadata_after_normal.docx | Bin 57273 -> 56276 bytes tests/docx.metadata_author_linebreak.docx | Bin 40481 -> 0 bytes tests/docx.metadata_author_linebreak.native | 2 -- 6 files changed, 12 insertions(+), 6 deletions(-) delete mode 100644 tests/docx.metadata_author_linebreak.docx delete mode 100644 tests/docx.metadata_author_linebreak.native (limited to 'tests') diff --git a/pandoc.cabal b/pandoc.cabal index 6597b27ed..9249723ff 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -172,13 +172,25 @@ Extra-Source-Files: tests/fb2.test.jpg, tests/docx.already_auto_ident.docx, tests/docx.block_quotes.docx, + tests/docx.codeblock.docx, + tests/docx.deep_normalize.docx, + tests/docx.definition_list.docx, + tests/docx.hanging_indent.docx, tests/docx.headers.docx, tests/docx.image.docx, + tests/docx.inline_code.docx, tests/docx.inline_formatting.docx, tests/docx.links.docx, tests/docx.lists.docx, + tests/docx.metadata.docx, + tests/docx.metadata_after_normal.docx, + tests/docx.normalize.docx, tests/docx.notes.docx, tests/docx.tables.docx, + tests/docx.tabs.docx, + tests/docx.track_changes_deletion.docx, + tests/docx.track_changes_insertion.docx, + tests/docx.trailing_spaces_in_formatting.docx, tests/docx.unicode.docx, tests/*.native, tests/txt2tags.t2t diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 8ad9e08ba..b1a966969 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -169,10 +169,6 @@ tests = [ testGroup "inlines" "metadata fields" "docx.metadata.docx" "docx.metadata.native" - , testCompareWithOpts def{readerStandalone=True} - "linebreak between authors" - "docx.metadata_author_linebreak.docx" - "docx.metadata_author_linebreak.native" , testCompareWithOpts def{readerStandalone=True} "stop recording metadata with normal text" "docx.metadata_after_normal.docx" diff --git a/tests/docx.metadata.docx b/tests/docx.metadata.docx index 34182a87e..ccf50b475 100644 Binary files a/tests/docx.metadata.docx and b/tests/docx.metadata.docx differ diff --git a/tests/docx.metadata_after_normal.docx b/tests/docx.metadata_after_normal.docx index 16b8d583c..b94a016cb 100644 Binary files a/tests/docx.metadata_after_normal.docx and b/tests/docx.metadata_after_normal.docx differ diff --git a/tests/docx.metadata_author_linebreak.docx b/tests/docx.metadata_author_linebreak.docx deleted file mode 100644 index 94f0e0753..000000000 Binary files a/tests/docx.metadata_author_linebreak.docx and /dev/null differ diff --git a/tests/docx.metadata_author_linebreak.native b/tests/docx.metadata_author_linebreak.native deleted file mode 100644 index ed7ba63cf..000000000 --- a/tests/docx.metadata_author_linebreak.native +++ /dev/null @@ -1,2 +0,0 @@ -Pandoc (Meta {unMeta = fromList [("abstract",MetaInlines [Str "This",Space,Str "is",Space,Str "a",Space,Str "test",Space,Str "of",Space,Str "how",Space,Str "this",Space,Str "all",Space,Str "works.",Space,Str "I\8217ve",Space,Str "skipped",Space,Str "lines",Space,Str "here,",Space,Str "which",Space,Str "pandoc",Space,Str "doesn\8217t",Space,Str "do,",Space,Str "but",Space,Str "which",Space,Str "shouldn\8217t",Space,Str "make",Space,Str "a",Space,Str "difference."]),("author",MetaList [MetaInlines [Str "Mary",Space,Str "Ann",Space,Str "Evans"],MetaInlines [Str "Aurore",Space,Str "Dupin"]]),("date",MetaInlines [Str "July",Space,Str "28,",Space,Str "2014"]),("title",MetaInlines [Str "This",Space,Str "Is",Space,Str "the",Space,Str "Title"])]}) -[Para [Str "And",Space,Str "now",Space,Str "this",Space,Str "is",Space,Str "normal",Space,Str "text."]] -- cgit v1.2.3