From eda5540719e9771b48f15aa7f431033163e1f161 Mon Sep 17 00:00:00 2001 From: Diego Balseiro Date: Tue, 6 Oct 2020 23:03:00 -0500 Subject: DOCX reader: Allow empty dates in comments and tracked changes (#6726) For security reasons, some legal firms delete the date from comments and tracked changes. * Make date optional (Maybe) in tracked changes and comments datatypes * Add tests --- src/Text/Pandoc/Readers/Docx.hs | 15 ++++++++++----- src/Text/Pandoc/Readers/Docx/Parse.hs | 14 +++++++------- src/Text/Pandoc/Writers/Docx.hs | 18 +++++++----------- 3 files changed, 24 insertions(+), 23 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 4bfea6534..31c0660fd 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -365,7 +365,7 @@ parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author dat RejectChanges -> return mempty AllChanges -> do ils <- smushInlines <$> mapM runToInlines runs - let attr = ("", ["insertion"], [("author", author), ("date", date)]) + let attr = ("", ["insertion"], addAuthorAndDate author date) return $ spanWith attr ils parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do opts <- asks docxOptions @@ -374,7 +374,7 @@ parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date RejectChanges -> smushInlines <$> mapM runToInlines runs AllChanges -> do ils <- smushInlines <$> mapM runToInlines runs - let attr = ("", ["deletion"], [("author", author), ("date", date)]) + let attr = ("", ["deletion"], addAuthorAndDate author date) return $ spanWith attr ils parPartToInlines' (CommentStart cmtId author date bodyParts) = do opts <- asks docxOptions @@ -382,7 +382,7 @@ parPartToInlines' (CommentStart cmtId author date bodyParts) = do AllChanges -> do blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts ils <- blocksToInlinesWarn cmtId blks - let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) + let attr = ("", ["comment-start"], ("id", cmtId) : addAuthorAndDate author date) return $ spanWith attr ils _ -> return mempty parPartToInlines' (CommentEnd cmtId) = do @@ -593,7 +593,7 @@ bodyPartToBlocks (Paragraph pPr parparts) return mempty (Just (TrackedChange Insertion (ChangeInfo _ cAuthor cDate)) , AllChanges) -> do - let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) + let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty transform <- parStyleToTransform pPr' return $ transform $ @@ -605,7 +605,7 @@ bodyPartToBlocks (Paragraph pPr parparts) handleInsertion (Just (TrackedChange Deletion (ChangeInfo _ cAuthor cDate)) , AllChanges) -> do - let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) + let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty transform <- parStyleToTransform pPr' return $ transform $ @@ -732,3 +732,8 @@ docxToOutput :: PandocMonad m docxToOutput opts (Docx (Document _ body)) = let dEnv = def { docxOptions = opts} in evalDocxContext (bodyToOutput body) dEnv def + +addAuthorAndDate :: T.Text -> Maybe T.Text -> [(T.Text, T.Text)] +addAuthorAndDate author mdate = + ("author", author) : maybe [] (\date -> [("date", date)]) mdate + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 698d7a88a..fdcffcc3f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -213,7 +213,7 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer data ChangeType = Insertion | Deletion deriving Show -data ChangeInfo = ChangeInfo ChangeId Author ChangeDate +data ChangeInfo = ChangeInfo ChangeId Author (Maybe ChangeDate) deriving Show data TrackedChange = TrackedChange ChangeType ChangeInfo @@ -276,7 +276,7 @@ type Extent = Maybe (Double, Double) data ParPart = PlainRun Run | ChangedRuns TrackedChange [Run] - | CommentStart CommentId Author CommentDate [BodyPart] + | CommentStart CommentId Author (Maybe CommentDate) [BodyPart] | CommentEnd CommentId | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] @@ -852,7 +852,7 @@ elemToCommentStart ns element | isElem ns "w" "comment" element , Just cmtId <- findAttrTextByName ns "w" "id" element , Just cmtAuthor <- findAttrTextByName ns "w" "author" element - , Just cmtDate <- findAttrTextByName ns "w" "date" element = do + , cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem @@ -958,14 +958,14 @@ getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element , Just cId <- findAttrTextByName ns "w" "id" element , Just cAuthor <- findAttrTextByName ns "w" "author" element - , Just cDate <- findAttrTextByName ns "w" "date" element = - Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) + , mcDate <- findAttrTextByName ns "w" "date" element = + Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element , Just cId <- findAttrTextByName ns "w" "id" element , Just cAuthor <- findAttrTextByName ns "w" "author" element - , Just cDate <- findAttrTextByName ns "w" "date" element = - Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) + , mcDate <- findAttrTextByName ns "w" "date" element = + Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a7720eb53..93f7dd799 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1244,33 +1244,29 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do else id) getChangeAuthorDate = do defaultAuthor <- asks envChangesAuthor - defaultDate <- asks envChangesDate let author = fromMaybe defaultAuthor (lookup "author" kvs) - date = fromMaybe defaultDate (lookup "date" kvs) - return (author, date) + let mdate = lookup "date" kvs + return $ ("w:author", T.unpack author) : + maybe [] (\date -> [("w:date", T.unpack date)]) mdate insmod <- if "insertion" `elem` classes then do - (author, date) <- getChangeAuthorDate + changeAuthorDate <- getChangeAuthorDate insId <- gets stInsId modify $ \s -> s{stInsId = insId + 1} return $ \f -> do x <- f return [ mknode "w:ins" - [("w:id", show insId), - ("w:author", T.unpack author), - ("w:date", T.unpack date)] x ] + (("w:id", show insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do - (author, date) <- getChangeAuthorDate + changeAuthorDate <- getChangeAuthorDate delId <- gets stDelId modify $ \s -> s{stDelId = delId + 1} return $ \f -> local (\env->env{envInDel=True}) $ do x <- f return [mknode "w:del" - [("w:id", show delId), - ("w:author", T.unpack author), - ("w:date", T.unpack date)] x] + (("w:id", show delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils -- cgit v1.2.3