aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2017-12-30 22:17:06 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2017-12-30 22:17:06 -0500
commite90c714c73be58ef09b08272c676f96e2a21c767 (patch)
tree15fa34f51bd263f383a06d89083f0db5f4a2e4f3 /src/Text/Pandoc/Readers
parente0cf8e64b5c88f342fd8521509a2e4723e772828 (diff)
downloadpandoc-e90c714c73be58ef09b08272c676f96e2a21c767.tar.gz
Docx reader: Remove unused anchors.
Docx produces a lot of anchors with nothing pointing to them -- we now remove these to produce cleaner output. Note that this has to occur at the end of the process because it has to follow link/anchor rewriting. Closes #3679.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs32
1 files changed, 27 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index d73da3085..248cb0b84 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -118,6 +118,7 @@ readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
+ , docxAnchorSet :: Set.Set String
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
, docxWarnings :: [String]
@@ -128,6 +129,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
instance Default DState where
def = DState { docxAnchorMap = M.empty
+ , docxAnchorSet = mempty
, docxMediaBag = mempty
, docxDropCap = mempty
, docxWarnings = []
@@ -561,7 +563,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
]
modify $ \st -> st{ docxListState = M.insert (numId, lvl) start listState}
blks <- bodyPartToBlocks (Paragraph pPr parparts)
- return $ divWith ("", ["list-item"], kvs) blks
+ return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
in
@@ -603,21 +605,41 @@ bodyPartToBlocks (OMathPara e) =
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils ('#':target, title)) = do
anchorMap <- gets docxAnchorMap
- return $ case M.lookup target anchorMap of
- Just newTarget -> Link attr ils ('#':newTarget, title)
- Nothing -> l
+ case M.lookup target anchorMap of
+ Just newTarget -> do
+ modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
+ return $ Link attr ils ('#':newTarget, title)
+ Nothing -> do
+ modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
+ return l
rewriteLink' il = return il
rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')
+removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
+removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
+ | "anchor" `elem` classes = do
+ anchorSet <- gets docxAnchorSet
+ return $ if ident `Set.member` anchorSet
+ then [s]
+ else ils
+removeOrphanAnchors'' il = return [il]
+
+removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
+removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils
+
+removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
+removeOrphanAnchors = mapM (walkM removeOrphanAnchors')
+
bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
- return (meta, blks')
+ blks'' <- removeOrphanAnchors blks'
+ return (meta, blks'')
docxToOutput :: PandocMonad m
=> ReaderOptions