From 193f6bfebaa43d0d6749d10a4e7ca78a0d31361d Mon Sep 17 00:00:00 2001 From: Milan Bracke Date: Mon, 14 Jun 2021 15:00:36 +0200 Subject: Docx reader: fix handling of nested fields Fields delimited by fldChar elements can contain other fields. Before, the nested fields would be ignored, except for the end, which would be considered the end of the parent field. To fix this issue, fields needed to be considered containing ParParts instead of Runs, since a Run can't represent complex enough structures. This also impacted Hyperlinks since they can originate from a field. --- test/Tests/Readers/Docx.hs | 4 ++++ test/docx/nested_instrText.docx | Bin 0 -> 14112 bytes test/docx/nested_instrText.native | 5 +++++ 3 files changed, 9 insertions(+) create mode 100644 test/docx/nested_instrText.docx create mode 100644 test/docx/nested_instrText.native (limited to 'test') diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 2f28af317..af6023836 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -147,6 +147,10 @@ tests = [ testGroup "document" "hyperlinks in tag" "docx/instrText_hyperlink.docx" "docx/instrText_hyperlink.native" + , testCompare + "nested fields with tag" + "docx/nested_instrText.docx" + "docx/nested_instrText.native" , testCompare "inline image" "docx/image.docx" diff --git a/test/docx/nested_instrText.docx b/test/docx/nested_instrText.docx new file mode 100644 index 000000000..532584193 Binary files /dev/null and b/test/docx/nested_instrText.docx differ diff --git a/test/docx/nested_instrText.native b/test/docx/nested_instrText.native new file mode 100644 index 000000000..730b041f5 --- /dev/null +++ b/test/docx/nested_instrText.native @@ -0,0 +1,5 @@ +[Para [Str "\24076\26395\28145\20837\20102\35299\30340\35835\32773\21487\20197\21435\30475David",Space,Str "French",Space,Str "Belding\21644Kevin",Space,Str "J.",Space,Str "Mitchell\30340" + ,Link ("",[],[]) [Str "Foundations",Space,Str "of",Space,Str "Analysis,",Space,Str "1/16/18",Space,Str "8:40:00",Space,Str "AM,",Space,Str "2nd",Space,Str "Edition"] ("https://books.google.com/books?id=sp_Zcb9ot90C&lpg=PR4&hl=zh-CN&pg=PA19#v=onepage&q&f=true","") + ,Str ",\21487\20174\&19\39029\30475\36215\65292\25110D.C.",Space,Str "Goldrei\30340",Space + ,Link ("",[],[]) [Str "Classic",Space,Str "Set",Space,Str "Theory:",Space,Str "For",Space,Str "Guided",Space,Str "Independent",Space,Str "Study"] ("https://books.google.ae/books?id=dlc0DwAAQBAJ&lpg=PT29&hl=zh-CN&pg=PT26#v=onepage&q&f=true","") + ,Str "\65292\20174\31532\20108\31456\30475\36215\65292\38405\35835\26102\35201\27880\24847\26412\25991\19982\36825\20123\20070\25152\19981\21516\30340\26159\24182\27809\26377\25226\23454\25968\30475\20316\26159\26377\29702\25968\38598\30340\20998\21106\12290"]] -- cgit v1.2.3