From c343f1a90bc35d745de673de5ff771ddbe60be54 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Wed, 25 Jun 2014 08:10:19 -0400 Subject: Docx Reader: Add change types Insertion and deletion. Dates are just strings for now. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 18200bcf9..1cb5fe2e3 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -455,6 +455,8 @@ elemToCell ns element elemToCell _ _ = Nothing data ParPart = PlainRun Run + | Insertion ChangeId Author ChangeDate [Run] + | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink RelId [Run] @@ -604,4 +606,6 @@ type Target = String type Anchor = String type BookMarkId = String type RelId = String - +type ChangeId = String +type Author = String +type ChangeDate = String -- cgit v1.2.3 From 38e1d3e95b8240eeb35db0a1a56e308cfb4835e4 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Wed, 25 Jun 2014 10:32:48 -0400 Subject: Docx reader: Parse Insertions and Deletions. This is just for the Parse module, reading it into the Docx format. It still has to be translated into pandoc. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 1cb5fe2e3..c76ef7511 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -541,7 +541,7 @@ elemToRun _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> Maybe RunElem elemToRunElem ns element - | qName (elName element) == "t" && + | (qName (elName element) == "t" || qName (elName element) == "delText") && qURI (elName element) == (lookup "w" ns) = Just $ TextRun (strContent element) | qName (elName element) == "br" && @@ -581,6 +581,22 @@ elemToParPart ns element Nothing -> do r <- elemToRun ns element return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "ins" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Insertion cId cAuthor cDate runs +elemToParPart ns element + | qName (elName element) == "del" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Deletion cId cAuthor cDate runs elemToParPart ns element | qName (elName element) == "bookmarkStart" && qURI (elName element) == (lookup "w" ns) = do -- cgit v1.2.3 From ed44e4ca8c8f3d3c4c7ac65b98f16732c8173b88 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Wed, 25 Jun 2014 10:38:01 -0400 Subject: Docx reader: Add rudimentary track changes support. This will only read the insertions, and ignore the deletions. --- src/Text/Pandoc/Readers/Docx.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b787ca9fb..130e2a1e2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,6 +234,9 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines opts docx (Insertion _ _ _ runs) = + concatMap (runToInlines opts docx) runs +parPartToInlines _ _ (Deletion _ _ _ _) = [] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 9614ddfedc18cccbf9fbe1a23fae200c7e67d72d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Wed, 25 Jun 2014 11:00:15 -0400 Subject: Docx reader: Remove unnecessary filter in Parse. mapMaybe does the filtering for us. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76ef7511..dbbd65681 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -281,10 +281,6 @@ elemToBody ns element | qName (elName element) == "body" && qURI (elName element $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element elemToBody _ _ = Nothing -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) elemToNumInfo ns element | qName (elName element) == "p" && @@ -319,9 +315,8 @@ elemToBodyPart ns element | qName (elName element) == "p" && qURI (elName element) == (lookup "w" ns) = let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element + parparts = mapMaybe (elemToParPart ns) + $ elChildren element in case elemToNumInfo ns element of Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts -- cgit v1.2.3 From a2b6ab847cb1c997c6ae7b8ed36f543a7ed90ecd Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal <jrosenthal@jhu.edu> Date: Wed, 25 Jun 2014 11:09:28 -0400 Subject: Docx reader: Add tests for basic track changes This is what seems like the sensible default: read in insertions, and ignore deletions. In the future, it would be good if options were available for either taking in deletions or keeping both in some scriptable format. --- tests/Tests/Readers/Docx.hs | 10 ++++++++++ tests/docx.track_changes_deletion.docx | Bin 0 -> 13350 bytes tests/docx.track_changes_deletion_only_ins.native | 1 + tests/docx.track_changes_insertion.docx | Bin 0 -> 12956 bytes tests/docx.track_changes_insertion_only_ins.native | 1 + 5 files changed, 12 insertions(+) create mode 100644 tests/docx.track_changes_deletion.docx create mode 100644 tests/docx.track_changes_deletion_only_ins.native create mode 100644 tests/docx.track_changes_insertion.docx create mode 100644 tests/docx.track_changes_insertion_only_ins.native diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 4d062bbc0..f34e123ed 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -122,5 +122,15 @@ tests = [ testGroup "inlines" "docx.codeblock.native" ] + , testGroup "track changes" + [ testCompare + "insert insertion (insertions only)" + "docx.track_changes_insertion.docx" + "docx.track_changes_insertion_only_ins.native" + , testCompare + "skip deletion (insertions only)" + "docx.track_changes_deletion.docx" + "docx.track_changes_deletion_only_ins.native" + ] ] diff --git a/tests/docx.track_changes_deletion.docx b/tests/docx.track_changes_deletion.docx new file mode 100644 index 000000000..5cfdbeed8 Binary files /dev/null and b/tests/docx.track_changes_deletion.docx differ diff --git a/tests/docx.track_changes_deletion_only_ins.native b/tests/docx.track_changes_deletion_only_ins.native new file mode 100644 index 000000000..205c67810 --- /dev/null +++ b/tests/docx.track_changes_deletion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "a",Space,Str "deletion."]] diff --git a/tests/docx.track_changes_insertion.docx b/tests/docx.track_changes_insertion.docx new file mode 100644 index 000000000..fbdc9003e Binary files /dev/null and b/tests/docx.track_changes_insertion.docx differ diff --git a/tests/docx.track_changes_insertion_only_ins.native b/tests/docx.track_changes_insertion_only_ins.native new file mode 100644 index 000000000..ca2e46df0 --- /dev/null +++ b/tests/docx.track_changes_insertion_only_ins.native @@ -0,0 +1 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "text",Space,Str "with",Space,Str "two",Space,Str "exciting",Space,Str "insertions."]] -- cgit v1.2.3