diff options
author | Jesse Rosenthal <jrosenthal@jhu.edu> | 2017-12-31 09:09:22 -0500 |
---|---|---|
committer | Jesse Rosenthal <jrosenthal@jhu.edu> | 2017-12-31 09:29:51 -0500 |
commit | 836153de43933dca3205e4459f55979d467f927e (patch) | |
tree | daeb7a8301f6a63d3fc0c458013c305ae1ac4e2c | |
parent | a274e15f0d236140fec7e4554117fcb55a219566 (diff) | |
download | pandoc-836153de43933dca3205e4459f55979d467f927e.tar.gz |
Docx Reader: Combine adjacent anchors.
There isn't any reason to have numberous anchors in the same place,
since we can't maintain docx's non-nesting overlapping. So we reduce
to a single anchor, and have all links pointing to one of the
overlapping anchors point to that one. This changes the behavior from
commit e90c714c7 slightly (use the first anchor instead of the last)
so we change the expected test result.
Note that because this produces a state that has to be set after every
invocation of `parPartToInlines`, we make the main function into a
primed subfunction `parPartToInlines'`, and make `parPartToInlines` a
wrapper around that.
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 67 | ||||
-rw-r--r-- | test/docx/unused_anchors.native | 4 |
2 files changed, 49 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 248cb0b84..6ca1590a4 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -82,6 +82,7 @@ import qualified Data.ByteString.Lazy as B import Data.Default (Default) import Data.List (delete, intersect) import qualified Data.Map as M +import Data.Maybe (isJust) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set @@ -119,6 +120,7 @@ readDocx _ _ = data DState = DState { docxAnchorMap :: M.Map String String , docxAnchorSet :: Set.Set String + , docxImmedPrevAnchor :: Maybe String , docxMediaBag :: MediaBag , docxDropCap :: Inlines , docxWarnings :: [String] @@ -130,6 +132,7 @@ data DState = DState { docxAnchorMap :: M.Map String String instance Default DState where def = DState { docxAnchorMap = M.empty , docxAnchorSet = mempty + , docxImmedPrevAnchor = Nothing , docxMediaBag = mempty , docxDropCap = mempty , docxWarnings = [] @@ -341,9 +344,26 @@ blocksToInlinesWarn cmtId blks = do "Docx comment " ++ cmtId ++ " will not retain formatting" return $ blocksToInlines' blkList +-- The majority of work in this function is done in the primted +-- subfunction `partPartToInlines'`. We make this wrapper so that we +-- don't have to modify `docxImmedPrevAnchor` state after every function. parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines -parPartToInlines (PlainRun r) = runToInlines r -parPartToInlines (Insertion _ author date runs) = do +parPartToInlines parPart = + case parPart of + (BookMark _ anchor) | not $ anchor `elem` dummyAnchors -> do + inHdrBool <- asks docxInHeaderBlock + ils <- parPartToInlines' parPart + immedPrevAnchor <- gets docxImmedPrevAnchor + unless (isJust immedPrevAnchor || inHdrBool) + (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor}) + return ils + _ -> do + ils <- parPartToInlines' parPart + modify $ \s -> s{ docxImmedPrevAnchor = Nothing} + return ils +parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines +parPartToInlines' (PlainRun r) = runToInlines r +parPartToInlines' (Insertion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> smushInlines <$> mapM runToInlines runs @@ -352,7 +372,7 @@ parPartToInlines (Insertion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["insertion"], [("author", author), ("date", date)]) return $ spanWith attr ils -parPartToInlines (Deletion _ author date runs) = do +parPartToInlines' (Deletion _ author date runs) = do opts <- asks docxOptions case readerTrackChanges opts of AcceptChanges -> return mempty @@ -361,7 +381,7 @@ parPartToInlines (Deletion _ author date runs) = do ils <- smushInlines <$> mapM runToInlines runs let attr = ("", ["deletion"], [("author", author), ("date", date)]) return $ spanWith attr ils -parPartToInlines (CommentStart cmtId author date bodyParts) = do +parPartToInlines' (CommentStart cmtId author date bodyParts) = do opts <- asks docxOptions case readerTrackChanges opts of AllChanges -> do @@ -370,16 +390,16 @@ parPartToInlines (CommentStart cmtId author date bodyParts) = do let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)]) return $ spanWith attr ils _ -> return mempty -parPartToInlines (CommentEnd cmtId) = do +parPartToInlines' (CommentEnd cmtId) = do opts <- asks docxOptions case readerTrackChanges opts of AllChanges -> do let attr = ("", ["comment-end"], [("id", cmtId)]) return $ spanWith attr mempty _ -> return mempty -parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors = +parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors = return mempty -parPartToInlines (BookMark _ anchor) = +parPartToInlines' (BookMark _ anchor) = -- We record these, so we can make sure not to overwrite -- user-defined anchor links with header auto ids. do @@ -395,27 +415,34 @@ parPartToInlines (BookMark _ anchor) = -- of rewriting user-defined anchor links. However, since these -- are not defined in pandoc, it seems like a necessary evil to -- avoid an extra pass. - let newAnchor = - if not inHdrBool && anchor `elem` M.elems anchorMap - then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) - else anchor - unless inHdrBool - (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) - return $ spanWith (newAnchor, ["anchor"], []) mempty -parPartToInlines (Drawing fp title alt bs ext) = do + immedPrevAnchor <- gets docxImmedPrevAnchor + case immedPrevAnchor of + Just prevAnchor -> do + unless inHdrBool + (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap}) + return mempty + Nothing -> do + let newAnchor = + if not inHdrBool && anchor `elem` M.elems anchorMap + then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap) + else anchor + unless inHdrBool + (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) + return $ spanWith (newAnchor, ["anchor"], []) mempty +parPartToInlines' (Drawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs return $ imageWith (extentToAttr ext) fp title $ text alt -parPartToInlines Chart = +parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" -parPartToInlines (InternalHyperLink anchor runs) = do +parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link ('#' : anchor) "" ils -parPartToInlines (ExternalHyperLink target runs) = do +parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils -parPartToInlines (PlainOMath exps) = +parPartToInlines' (PlainOMath exps) = return $ math $ writeTeX exps -parPartToInlines (SmartTag runs) = do +parPartToInlines' (SmartTag runs) = do smushInlines <$> mapM runToInlines runs isAnchorSpan :: Inline -> Bool diff --git a/test/docx/unused_anchors.native b/test/docx/unused_anchors.native index 334269793..051dfe424 100644 --- a/test/docx/unused_anchors.native +++ b/test/docx/unused_anchors.native @@ -1,3 +1,3 @@ [Header 1 ("my-section",[],[]) [Str "My",Space,Str "Section"] -,Para [Link ("",[],[]) [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link."] ("#Bar","")] -,Para [Span ("Bar",["anchor"],[]) [],Str "Here",Space,Str "is",Space,Str "the",Space,Str "target."]] +,Para [Link ("",[],[]) [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link."] ("#Foo","")] +,Para [Span ("Foo",["anchor"],[]) [],Str "Here",Space,Str "is",Space,Str "the",Space,Str "target."]] |