diff options
-rw-r--r-- | MANUAL.txt | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 53 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 55 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 32 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 2 | ||||
-rw-r--r-- | test/Tests/Readers/Docx.hs | 12 | ||||
-rw-r--r-- | test/docx/paragraph_insertion_deletion.docx | bin | 0 -> 12066 bytes | |||
-rw-r--r-- | test/docx/paragraph_insertion_deletion_accept.native | 2 | ||||
-rw-r--r-- | test/docx/paragraph_insertion_deletion_all.native | 3 | ||||
-rw-r--r-- | test/docx/paragraph_insertion_deletion_reject.native | 2 |
10 files changed, 125 insertions, 43 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 659e46dc3..f7932df8c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -554,8 +554,11 @@ Reader options `insertion`, `deletion`, `comment-start`, and `comment-end` classes, respectively. The author and time of change is included. `all` is useful for scripting: only accepting changes - from a certain reviewer, say, or before a certain date. This - option only affects the docx reader. + from a certain reviewer, say, or before a certain date. If a + paragraph is inserted or deleted, `track-changes=all` produces a + span with the class `paragraph-insertion`/`paragraph-deletion` + before the affected paragraph break. This option only affects the + docx reader. `--extract-media=`*DIR* diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index a2f22c1ea..f01a94550 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -1,7 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} - {- Copyright (C) 2014-2017 Jesse Rosenthal <jrosenthal@jhu.edu> @@ -127,6 +126,7 @@ data DState = DState { docxAnchorMap :: M.Map String String -- keep track of (numId, lvl) values for -- restarting , docxListState :: M.Map (String, String) Integer + , docxPrevPara :: Inlines } instance Default DState where @@ -137,6 +137,7 @@ instance Default DState where , docxDropCap = mempty , docxWarnings = [] , docxListState = M.empty + , docxPrevPara = mempty } data DEnv = DEnv { docxOptions :: ReaderOptions @@ -364,7 +365,7 @@ parPartToInlines parPart = parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines parPartToInlines' (PlainRun r) = runToInlines r -parPartToInlines' (Insertion _ author date runs) = do +parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> smushInlines <$> mapM runToInlines runs @@ -373,7 +374,7 @@ parPartToInlines' (Insertion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["insertion"], [("author", author), ("date", date)]) return $ spanWith attr ils -parPartToInlines' (Deletion _ author date runs) = do +parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> return mempty @@ -562,16 +563,56 @@ bodyPartToBlocks (Paragraph pPr parparts) headerWith ("", delete style (pStyle pPr), []) n ils | otherwise = do ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts + prevParaIls <- gets docxPrevPara dropIls <- gets docxDropCap let ils' = dropIls <> ils if dropCap pPr then do modify $ \s -> s { docxDropCap = ils' } return mempty else do modify $ \s -> s { docxDropCap = mempty } + let ils'' = prevParaIls <> + (if isNull prevParaIls then mempty else space) <> + ils' opts <- asks docxOptions - if isNull ils' && not (isEnabled Ext_empty_paragraphs opts) - then return mempty - else return $ parStyleToTransform pPr $ para ils' + case () of + + _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) -> + return mempty + _ | Just (TrackedChange Insertion _) <- pChange pPr + , AcceptChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = mempty} + return $ parStyleToTransform pPr $ para ils'' + _ | Just (TrackedChange Insertion _) <- pChange pPr + , RejectChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = ils''} + return mempty + _ | Just (TrackedChange Insertion cInfo) <- pChange pPr + , AllChanges <- readerTrackChanges opts + , ChangeInfo _ cAuthor cDate <- cInfo -> do + let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)]) + insertMark = spanWith attr mempty + return $ + parStyleToTransform pPr $ + para $ ils'' <> insertMark + _ | Just (TrackedChange Deletion _) <- pChange pPr + , AcceptChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = ils''} + return mempty + _ | Just (TrackedChange Deletion _) <- pChange pPr + , RejectChanges <- readerTrackChanges opts -> do + modify $ \s -> s {docxPrevPara = mempty} + return $ parStyleToTransform pPr $ para ils'' + _ | Just (TrackedChange Deletion cInfo) <- pChange pPr + , AllChanges <- readerTrackChanges opts + , ChangeInfo _ cAuthor cDate <- cInfo -> do + let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)]) + insertMark = spanWith attr mempty + return $ + parStyleToTransform pPr $ + para $ ils'' <> insertMark + _ | otherwise -> do + modify $ \s -> s {docxPrevPara = mempty} + return $ parStyleToTransform pPr $ para ils'' bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do -- We check whether this current numId has previously been used, -- since Docx expects us to pick up where we left off. diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 071f901b6..1fcbdf386 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -51,6 +51,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ParagraphStyle(..) , Row(..) , Cell(..) + , TrackedChange(..) + , ChangeType(..) + , ChangeInfo(..) , archiveToDocx , archiveToDocxWithWarnings ) where @@ -198,12 +201,22 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer , hangingParIndent :: Maybe Integer} deriving Show +data ChangeType = Insertion | Deletion + deriving Show + +data ChangeInfo = ChangeInfo ChangeId Author ChangeDate + deriving Show + +data TrackedChange = TrackedChange ChangeType ChangeInfo + deriving Show + data ParagraphStyle = ParagraphStyle { pStyle :: [String] , indentation :: Maybe ParIndentation , dropCap :: Bool , pHeading :: Maybe (String, Int) , pNumInfo :: Maybe (String, String) , pBlockQuote :: Maybe Bool + , pChange :: Maybe TrackedChange } deriving Show @@ -214,6 +227,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , pHeading = Nothing , pNumInfo = Nothing , pBlockQuote = Nothing + , pChange = Nothing } @@ -241,8 +255,7 @@ data Cell = Cell [BodyPart] type Extent = Maybe (Double, Double) data ParPart = PlainRun Run - | Insertion ChangeId Author ChangeDate [Run] - | Deletion ChangeId Author ChangeDate [Run] + | ChangedRuns TrackedChange [Run] | CommentStart CommentId Author CommentDate [BodyPart] | CommentEnd CommentId | BookMark BookMarkId Anchor @@ -727,19 +740,9 @@ elemToParPart ns element | isElem ns "w" "r" element = elemToRun ns element >>= (\r -> return $ PlainRun r) elemToParPart ns element - | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ Insertion cId cAuthor cDate runs -elemToParPart ns element - | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = do - runs <- mapD (elemToRun ns) (elChildren element) - return $ Deletion cId cAuthor cDate runs + | Just change <- getTrackedChange ns element = do + runs <- mapD (elemToRun ns) (elChildren element) + return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "smartTag" element = do runs <- mapD (elemToRun ns) (elChildren element) @@ -890,6 +893,21 @@ getParStyleField field stylemap styles = Just y getParStyleField _ _ _ = Nothing +getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange +getTrackedChange ns element + | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , Just cDate <- findAttrByName ns "w" "date" element = + Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) +getTrackedChange ns element + | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , Just cDate <- findAttrByName ns "w" "date" element = + Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) +getTrackedChange _ _ = Nothing + elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = @@ -913,6 +931,13 @@ elemToParagraphStyle ns element sty , pHeading = getParStyleField headingLev sty style , pNumInfo = getParStyleField numInfo sty style , pBlockQuote = getParStyleField isBlockQuote sty style + , pChange = findChildByName ns "w" "rPr" pPr >>= + filterChild (\e -> isElem ns "w" "ins" e || + isElem ns "w" "moveTo" e || + isElem ns "w" "del" e || + isElem ns "w" "moveFrom" e + ) >>= + getTrackedChange ns } elemToParagraphStyle _ _ _ = defaultParagraphStyle diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index f5e6512f8..4a09c2aad 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -148,25 +148,19 @@ litChar = escapedChar' -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) -inlinesInBalancedBrackets = do - char '[' - pos <- getPosition - (_, raw) <- withRaw $ charsInBalancedBrackets 1 - guard $ not $ null raw - parseFromString' (setPosition pos >> - trimInlinesF <$> inlines) (init raw) - -charsInBalancedBrackets :: PandocMonad m => Int -> MarkdownParser m () -charsInBalancedBrackets 0 = return () -charsInBalancedBrackets openBrackets = - (char '[' >> charsInBalancedBrackets (openBrackets + 1)) - <|> (char ']' >> charsInBalancedBrackets (openBrackets - 1)) - <|> (( (() <$ code) - <|> (() <$ escapedChar') - <|> (newline >> notFollowedBy blankline) - <|> skipMany1 (noneOf "[]`\n\\") - <|> (() <$ count 1 (oneOf "`\\")) - ) >> charsInBalancedBrackets openBrackets) +inlinesInBalancedBrackets = try $ char '[' >> go 1 + where go :: PandocMonad m => Int -> MarkdownParser m (F Inlines) + go 0 = return mempty + go openBrackets = + (mappend <$> (bracketedSpan <|> link <|> image) <*> + go openBrackets) + <|> ((if openBrackets > 1 + then (return (B.str "]") <>) + else id) <$> + (char ']' >> go (openBrackets - 1))) + <|> ((return (B.str "[") <>) <$> + (char '[' >> go (openBrackets + 1))) + <|> (mappend <$> inline <*> go openBrackets) -- -- document structure diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index ac8bef003..6343b314e 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1132,7 +1132,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do modify $ \s -> s{stDynamicTextProps = Set.insert sty (stDynamicTextProps s)} - return $ withTextProp (rCustomStyle sty) + return $ withTextPropM (rStyleM sty) _ -> return id let dirmod = case lookup "dir" kvs of Just "rtl" -> local (\env -> env { envRTL = True }) diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index d79b8a063..68c2e3476 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -335,6 +335,18 @@ tests = [ testGroup "inlines" "comments (all comments)" "docx/comments.docx" "docx/comments.native" + , testCompareWithOpts def{readerTrackChanges=AcceptChanges} + "paragraph insertion/deletion (accept)" + "docx/paragraph_insertion_deletion.docx" + "docx/paragraph_insertion_deletion_accept.native" + , testCompareWithOpts def{readerTrackChanges=RejectChanges} + "paragraph insertion/deletion (reject)" + "docx/paragraph_insertion_deletion.docx" + "docx/paragraph_insertion_deletion_reject.native" + , testCompareWithOpts def{readerTrackChanges=AllChanges} + "paragraph insertion/deletion (all)" + "docx/paragraph_insertion_deletion.docx" + "docx/paragraph_insertion_deletion_all.native" , testForWarningsWithOpts def{readerTrackChanges=AcceptChanges} "comment warnings (accept -- no warnings)" "docx/comments_warning.docx" diff --git a/test/docx/paragraph_insertion_deletion.docx b/test/docx/paragraph_insertion_deletion.docx Binary files differnew file mode 100644 index 000000000..9b62f9036 --- /dev/null +++ b/test/docx/paragraph_insertion_deletion.docx diff --git a/test/docx/paragraph_insertion_deletion_accept.native b/test/docx/paragraph_insertion_deletion_accept.native new file mode 100644 index 000000000..3b0f3b18a --- /dev/null +++ b/test/docx/paragraph_insertion_deletion_accept.native @@ -0,0 +1,2 @@ +[Para [Str "This",Space,Str "is",Space,Str "a"] +,Para [Str "split",Space,Str "Paragraph."]] diff --git a/test/docx/paragraph_insertion_deletion_all.native b/test/docx/paragraph_insertion_deletion_all.native new file mode 100644 index 000000000..4a12938ee --- /dev/null +++ b/test/docx/paragraph_insertion_deletion_all.native @@ -0,0 +1,3 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Span ("",["paragraph-insertion"],[("author","Seeley, Jason"),("date","2017-09-17T16:39:00Z")]) []] +,Para [Str "split",Span ("",["paragraph-deletion"],[("author","Seeley, Jason"),("date","2017-09-17T16:39:00Z")]) []] +,Para [Str "Paragraph."]] diff --git a/test/docx/paragraph_insertion_deletion_reject.native b/test/docx/paragraph_insertion_deletion_reject.native new file mode 100644 index 000000000..ef7b06423 --- /dev/null +++ b/test/docx/paragraph_insertion_deletion_reject.native @@ -0,0 +1,2 @@ +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "split"] +,Para [Str "Paragraph."]] |