diff options
author | Jesse Rosenthal <jrosenthal@jhu.edu> | 2016-06-22 13:49:19 -0400 |
---|---|---|
committer | Jesse Rosenthal <jrosenthal@jhu.edu> | 2016-06-23 10:50:46 -0400 |
commit | 8bb739f7ff353722981fe442ae0c137910604850 (patch) | |
tree | fa239e4f13e0174456fac3f96e84120d1b2f3cac /src | |
parent | cbc2c15f0ffb1737a0f6540fb282adab7094423b (diff) | |
download | pandoc-8bb739f7ff353722981fe442ae0c137910604850.tar.gz |
Docx reader: add simple comment functionality.
This adds simple track-changes comment parsing to the docx reader. It is
turned on with `--track-changes=all`. All comments are converted to
inlines, which can list some information. In the future a warning will
be added for comments with formatting that seems like it will be
excessively denatured.
Note that comments can extend across blocks. For that reason there are
two spans: `comment-start` and `comment-end`. `comment-start` will
contain the comment. `comment-end` will always be empty. The two will be
associated by a numeric id.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 58 |
2 files changed, 73 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9c7c3b264..610477f02 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -339,6 +339,22 @@ parPartToInlines (Deletion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["deletion"], [("author", author), ("date", date)]) return $ spanWith attr ils +parPartToInlines (CommentStart cmtId author date bodyParts) = do + opts <- asks docxOptions + case readerTrackChanges opts of + AllChanges -> do + blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts + let ils = fromList $ blocksToInlines $ toList blks + attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) + return $ spanWith attr ils + _ -> return mempty +parPartToInlines (CommentEnd cmtId) = do + opts <- asks docxOptions + case readerTrackChanges opts of + AllChanges -> do + let attr = ("", ["comment-end"], [("id", cmtId)]) + return $ spanWith attr mempty + _ -> return mempty parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = return mempty parPartToInlines (BookMark _ anchor) = diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7265ef8dd..055a67288 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -73,6 +73,7 @@ import Text.Pandoc.Readers.Docx.Util import Data.Char (readLitChar, ord, chr, isDigit) data ReaderEnv = ReaderEnv { envNotes :: Notes + , envComments :: Comments , envNumbering :: Numbering , envRelationships :: [Relationship] , envMedia :: Media @@ -160,6 +161,9 @@ data Notes = Notes NameSpaces (Maybe (M.Map String Element)) deriving Show +data Comments = Comments NameSpaces (M.Map String Element) + deriving Show + data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer , rightParIndent :: Maybe Integer , hangingParIndent :: Maybe Integer} @@ -210,6 +214,8 @@ type Extent = Maybe (Double, Double) data ParPart = PlainRun Run | Insertion ChangeId Author ChangeDate [Run] | Deletion ChangeId Author ChangeDate [Run] + | CommentStart CommentId Author CommentDate [BodyPart] + | CommentEnd CommentId | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] @@ -259,8 +265,10 @@ type URL = String type BookMarkId = String type RelId = String type ChangeId = String +type CommentId = String type Author = String type ChangeDate = String +type CommentDate = String archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive @@ -268,12 +276,13 @@ archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) archiveToDocxWithWarnings archive = do let notes = archiveToNotes archive + comments = archiveToComments archive numbering = archiveToNumbering archive rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive rEnv = - ReaderEnv notes numbering rels media Nothing styles parstyles InDocument + ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument rState = ReaderState { stateWarnings = [] } (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState case eitherDoc of @@ -384,6 +393,20 @@ archiveToNotes zf = in Notes ns fn en +archiveToComments :: Archive -> Comments +archiveToComments zf = + let cmtsElem = findEntryByPath "word/comments.xml" zf + >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + cmts_namespaces = case cmtsElem of + Just e -> elemToNameSpaces e + Nothing -> [] + cmts = (elemToComments cmts_namespaces) <$> cmtsElem + in + case cmts of + Just c -> Comments cmts_namespaces c + Nothing -> Comments cmts_namespaces M.empty + + filePathToRelType :: FilePath -> Maybe DocumentLocation filePathToRelType "word/_rels/document.xml.rels" = Just InDocument filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote @@ -504,6 +527,18 @@ elemToNotes ns notetype element Just $ M.fromList $ pairs elemToNotes _ _ _ = Nothing +elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments ns element + | isElem ns "w" "comments" element = + let pairs = mapMaybe + (\e -> findAttr (elemName ns "w" "id") e >>= + (\a -> Just (a, e))) + (findChildren (elemName ns "w" "comment") element) + in + M.fromList $ pairs +elemToComments _ _ = M.empty + + --------------------------------------------- --------------------------------------------- @@ -697,10 +732,31 @@ elemToParPart ns element runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element + | isElem ns "w" "commentRangeStart" element + , Just cmtId <- findAttr (elemName ns "w" "id") element = do + (Comments _ commentMap) <- asks envComments + case M.lookup cmtId commentMap of + Just cmtElem -> elemToCommentStart ns cmtElem + Nothing -> throwError WrongElem +elemToParPart ns element + | isElem ns "w" "commentRangeEnd" element + , Just cmtId <- findAttr (elemName ns "w" "id") element = + return $ CommentEnd cmtId +elemToParPart ns element | isElem ns "m" "oMath" element = (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath) elemToParPart _ _ = throwError WrongElem +elemToCommentStart :: NameSpaces -> Element -> D ParPart +elemToCommentStart ns element + | isElem ns "w" "comment" element + , Just cmtId <- findAttr (elemName ns "w" "id") element + , Just cmtAuthor <- findAttr (elemName ns "w" "author") element + , Just cmtDate <- findAttr (elemName ns "w" "date") element = do + bps <- mapD (elemToBodyPart ns) (elChildren element) + return $ CommentStart cmtId cmtAuthor cmtDate bps +elemToCommentStart _ _ = throwError WrongElem + lookupFootnote :: String -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s) |