From c343f1a90bc35d745de673de5ff771ddbe60be54 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 08:10:19 -0400 Subject: Docx Reader: Add change types Insertion and deletion. Dates are just strings for now. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 18200bcf9..1cb5fe2e3 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -455,6 +455,8 @@ elemToCell ns element elemToCell _ _ = Nothing data ParPart = PlainRun Run + | Insertion ChangeId Author ChangeDate [Run] + | Deletion ChangeId Author ChangeDate [Run] | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink RelId [Run] @@ -604,4 +606,6 @@ type Target = String type Anchor = String type BookMarkId = String type RelId = String - +type ChangeId = String +type Author = String +type ChangeDate = String -- cgit v1.2.3 From 38e1d3e95b8240eeb35db0a1a56e308cfb4835e4 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:32:48 -0400 Subject: Docx reader: Parse Insertions and Deletions. This is just for the Parse module, reading it into the Docx format. It still has to be translated into pandoc. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 1cb5fe2e3..c76ef7511 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -541,7 +541,7 @@ elemToRun _ _ = Nothing elemToRunElem :: NameSpaces -> Element -> Maybe RunElem elemToRunElem ns element - | qName (elName element) == "t" && + | (qName (elName element) == "t" || qName (elName element) == "delText") && qURI (elName element) == (lookup "w" ns) = Just $ TextRun (strContent element) | qName (elName element) == "br" && @@ -581,6 +581,22 @@ elemToParPart ns element Nothing -> do r <- elemToRun ns element return $ PlainRun r +elemToParPart ns element + | qName (elName element) == "ins" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Insertion cId cAuthor cDate runs +elemToParPart ns element + | qName (elName element) == "del" && + qURI (elName element) == (lookup "w" ns) = do + cId <- findAttr (QName "id" (lookup "w" ns) (Just "w")) element + cAuthor <- findAttr (QName "author" (lookup "w" ns) (Just "w")) element + cDate <- findAttr (QName "date" (lookup "w" ns) (Just "w")) element + let runs = mapMaybe (elemToRun ns) (elChildren element) + return $ Deletion cId cAuthor cDate runs elemToParPart ns element | qName (elName element) == "bookmarkStart" && qURI (elName element) == (lookup "w" ns) = do -- cgit v1.2.3 From ed44e4ca8c8f3d3c4c7ac65b98f16732c8173b88 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 10:38:01 -0400 Subject: Docx reader: Add rudimentary track changes support. This will only read the insertions, and ignore the deletions. --- src/Text/Pandoc/Readers/Docx.hs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index b787ca9fb..130e2a1e2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -234,6 +234,9 @@ runToInlines opts docx@(Docx _ notes _ _ _) (Endnote fnId) = parPartToInlines :: ReaderOptions -> Docx -> ParPart -> [Inline] parPartToInlines opts docx (PlainRun r) = runToInlines opts docx r +parPartToInlines opts docx (Insertion _ _ _ runs) = + concatMap (runToInlines opts docx) runs +parPartToInlines _ _ (Deletion _ _ _ _) = [] parPartToInlines _ _ (BookMark _ anchor) | anchor `elem` dummyAnchors = [] parPartToInlines _ _ (BookMark _ anchor) = [Span (anchor, ["anchor"], []) []] parPartToInlines _ (Docx _ _ _ rels _) (Drawing relid) = -- cgit v1.2.3 From 9614ddfedc18cccbf9fbe1a23fae200c7e67d72d Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Wed, 25 Jun 2014 11:00:15 -0400 Subject: Docx reader: Remove unnecessary filter in Parse. mapMaybe does the filtering for us. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76ef7511..dbbd65681 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -281,10 +281,6 @@ elemToBody ns element | qName (elName element) == "body" && qURI (elName element $ map (elemToBodyPart ns) $ filterChildrenName (isParOrTbl ns) element elemToBody _ _ = Nothing -isRunOrLinkOrBookmark :: NameSpaces -> QName -> Bool -isRunOrLinkOrBookmark ns q = qName q `elem` ["r", "hyperlink", "bookmarkStart"] && - qURI q == (lookup "w" ns) - elemToNumInfo :: NameSpaces -> Element -> Maybe (String, String) elemToNumInfo ns element | qName (elName element) == "p" && @@ -319,9 +315,8 @@ elemToBodyPart ns element | qName (elName element) == "p" && qURI (elName element) == (lookup "w" ns) = let parstyle = elemToParagraphStyle ns element - parparts = mapMaybe id - $ map (elemToParPart ns) - $ filterChildrenName (isRunOrLinkOrBookmark ns) element + parparts = mapMaybe (elemToParPart ns) + $ elChildren element in case elemToNumInfo ns element of Just (numId, lvl) -> Just $ ListItem parstyle numId lvl parparts -- cgit v1.2.3