aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-08-19 10:49:20 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-08-19 10:50:34 -0700
commitef4efa5373a419edbb99355808ddc63d35ddef20 (patch)
treeb66613606c39a828d5d07ba181045462fe5d3c40 /src/Text
parent5159d6653b0e04f03066a50794bc24195bdf81aa (diff)
downloadpandoc-ef4efa5373a419edbb99355808ddc63d35ddef20.tar.gz
Improve docx reader's robustness in extracting images.
The docx reader made a couple assumptions about how docx containers were laid out that were not always true, with the result that some images in documents did not get found/extracted. Closes #7511.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs11
1 files changed, 6 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index dbb16a821..eb048ab14 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -507,9 +507,7 @@ archiveToRelationships archive docXmlPath =
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
- let (dir, _) = splitFileName fp
- in
- (dir == "word/media/")
+ "media" `elem` splitPath (takeDirectory fp)
lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
@@ -774,8 +772,11 @@ expandDrawingId s = do
target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
- bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
- case bytes of
+ media <- asks envMedia
+ let filepath' = case filepath of
+ ('/':rest) -> rest
+ _ -> "word/" ++ filepath
+ case lookup filepath' media of
Just bs -> return (filepath, bs)
Nothing -> throwError DocxError
Nothing -> throwError DocxError