From ef4efa5373a419edbb99355808ddc63d35ddef20 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 19 Aug 2021 10:49:20 -0700 Subject: Improve docx reader's robustness in extracting images. The docx reader made a couple assumptions about how docx containers were laid out that were not always true, with the result that some images in documents did not get found/extracted. Closes #7511. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index dbb16a821..eb048ab14 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -507,9 +507,7 @@ archiveToRelationships archive docXmlPath = filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = - let (dir, _) = splitFileName fp - in - (dir == "word/media/") + "media" `elem` splitPath (takeDirectory fp) lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do @@ -774,8 +772,11 @@ expandDrawingId s = do target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do - bytes <- asks (lookup ("word/" ++ filepath) . envMedia) - case bytes of + media <- asks envMedia + let filepath' = case filepath of + ('/':rest) -> rest + _ -> "word/" ++ filepath + case lookup filepath' media of Just bs -> return (filepath, bs) Nothing -> throwError DocxError Nothing -> throwError DocxError -- cgit v1.2.3