From 3776e828a83048697e5c64d9fb4bedc0145197dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 10 Jun 2021 16:47:02 -0700 Subject: Fix MediaBag regressions. With the 2.14 release `--extract-media` stopped working as before; there could be mismatches between the paths in the rendered document and the extracted media. This patch makes several changes (while keeping the same API). The `mediaPath` in 2.14 was always constructed from the SHA1 hash of the media contents. Now, we preserve the original path unless it's an absolute path or contains `..` segments (in that case we use a path based on the SHA1 hash of the contents). When constructing a path from the SHA1 hash, we always use the original extension, if there is one. Otherwise we look up an appropriate extension for the mime type. `mediaDirectory` and `mediaItems` now use the `mediaPath`, rather than the mediabag key, for the first component of the tuple. This makes more sense, I think, and fits with the documentation of these functions; eventually, though, we should rework the API so that `mediaItems` returns both the keys and the MediaItems. Rewriting of source paths in `extractMedia` has been fixed. `fillMediaBag` has been modified so that it doesn't modify image paths (that was part of the problem in #7345). We now do path normalization (e.g. `\` separators on Windows) only in writing the media; the paths are left unchanged in the image links (sensibly, since they might be URLs and not file paths). These changes should restore the original behavior from before 2.14. Closes #7345. --- src/Text/Pandoc/Class/IO.hs | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'src/Text/Pandoc/Class/IO.hs') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia dir d = do media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d + let items = mediaItems media + if null items + then return d + else do + mapM_ (writeMedia dir) items + return $ walk (adjustImagePath dir media) d -- | Write the contents of a media bag to a path. writeMedia :: (PandocMonad m, MonadIO m) - => FilePath -> MediaBag -> FilePath + => FilePath + -> (FilePath, MimeType, BL.ByteString) -> m () -writeMedia dir mediabag subpath = do - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just item -> do - -- we normalize to get proper path separators for the platform - let fullpath = dir normalise (mediaPath item) - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do + -- we normalize to get proper path separators for the platform + let fullpath = normalise $ dir fp + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | unpack src `elem` paths - = Image attr lab (pack (normalise $ dir unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = + case lookupMedia (T.unpack src) mediabag of + Nothing -> Image attr lab (src, tit) + Just item -> + let fullpath = dir mediaPath item + in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x -- cgit v1.2.3