diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-06-10 16:47:02 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-06-10 16:47:02 -0700 |
commit | 3776e828a83048697e5c64d9fb4bedc0145197dc (patch) | |
tree | cad6f9754a013ea5c86d4559b1ceeb3187d0e301 /src/Text/Pandoc/Class | |
parent | aa79b3035c3343adf1bb41b37266049a65ab5da7 (diff) | |
download | pandoc-3776e828a83048697e5c64d9fb4bedc0145197dc.tar.gz |
Fix MediaBag regressions.
With the 2.14 release `--extract-media` stopped working as before;
there could be mismatches between the paths in the rendered document and
the extracted media.
This patch makes several changes (while keeping the same API).
The `mediaPath` in 2.14 was always constructed from the SHA1 hash of
the media contents. Now, we preserve the original path unless it's
an absolute path or contains `..` segments (in that case we use a path
based on the SHA1 hash of the contents).
When constructing a path from the SHA1 hash, we always use the
original extension, if there is one. Otherwise we look up an
appropriate extension for the mime type.
`mediaDirectory` and `mediaItems` now use the `mediaPath`, rather
than the mediabag key, for the first component of the tuple.
This makes more sense, I think, and fits with the documentation
of these functions; eventually, though, we should rework the API so that
`mediaItems` returns both the keys and the MediaItems.
Rewriting of source paths in `extractMedia` has been fixed.
`fillMediaBag` has been modified so that it doesn't modify
image paths (that was part of the problem in #7345).
We now do path normalization (e.g. `\` separators on Windows) only
in writing the media; the paths are left unchanged in the image
links (sensibly, since they might be URLs and not file paths).
These changes should restore the original behavior from before 2.14.
Closes #7345.
Diffstat (limited to 'src/Text/Pandoc/Class')
-rw-r--r-- | src/Text/Pandoc/Class/IO.hs | 41 | ||||
-rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 17 |
2 files changed, 27 insertions, 31 deletions
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia dir d = do media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d + let items = mediaItems media + if null items + then return d + else do + mapM_ (writeMedia dir) items + return $ walk (adjustImagePath dir media) d -- | Write the contents of a media bag to a path. writeMedia :: (PandocMonad m, MonadIO m) - => FilePath -> MediaBag -> FilePath + => FilePath + -> (FilePath, MimeType, BL.ByteString) -> m () -writeMedia dir mediabag subpath = do - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just item -> do - -- we normalize to get proper path separators for the platform - let fullpath = dir </> normalise (mediaPath item) - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do + -- we normalize to get proper path separators for the platform + let fullpath = normalise $ dir </> fp + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | unpack src `elem` paths - = Image attr lab (pack (normalise $ dir </> unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = + case lookupMedia (T.unpack src) mediabag of + Nothing -> Image attr lab (src, tit) + Just item -> + let fullpath = dir </> mediaPath item + in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 4eb80df29..439aec071 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag let fp = T.unpack src - src' <- T.pack <$> case lookupMedia fp mediabag of - Just item -> return $ mediaPath item - Nothing -> do - (bs, mt) <- fetchItem src - insertMedia fp mt (BL.fromStrict bs) - mediabag' <- getMediaBag - case lookupMedia fp mediabag' of - Just item -> return $ mediaPath item - Nothing -> throwError $ PandocSomeError $ - src <> " not successfully inserted into MediaBag" - return $ Image attr lab (src', tit)) + case lookupMedia fp mediabag of + Just _ -> return () + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + return $ Image attr lab (src, tit)) (\e -> case e of PandocResourceNotFound _ -> do |