aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class/IO.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-06-10 16:47:02 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-06-10 16:47:02 -0700
commit3776e828a83048697e5c64d9fb4bedc0145197dc (patch)
treecad6f9754a013ea5c86d4559b1ceeb3187d0e301 /src/Text/Pandoc/Class/IO.hs
parentaa79b3035c3343adf1bb41b37266049a65ab5da7 (diff)
downloadpandoc-3776e828a83048697e5c64d9fb4bedc0145197dc.tar.gz
Fix MediaBag regressions.
With the 2.14 release `--extract-media` stopped working as before; there could be mismatches between the paths in the rendered document and the extracted media. This patch makes several changes (while keeping the same API). The `mediaPath` in 2.14 was always constructed from the SHA1 hash of the media contents. Now, we preserve the original path unless it's an absolute path or contains `..` segments (in that case we use a path based on the SHA1 hash of the contents). When constructing a path from the SHA1 hash, we always use the original extension, if there is one. Otherwise we look up an appropriate extension for the mime type. `mediaDirectory` and `mediaItems` now use the `mediaPath`, rather than the mediabag key, for the first component of the tuple. This makes more sense, I think, and fits with the documentation of these functions; eventually, though, we should rework the API so that `mediaItems` returns both the keys and the MediaItems. Rewriting of source paths in `extractMedia` has been fixed. `fillMediaBag` has been modified so that it doesn't modify image paths (that was part of the problem in #7345). We now do path normalization (e.g. `\` separators on Windows) only in writing the media; the paths are left unchanged in the image links (sensibly, since they might be URLs and not file paths). These changes should restore the original behavior from before 2.14. Closes #7345.
Diffstat (limited to 'src/Text/Pandoc/Class/IO.hs')
-rw-r--r--src/Text/Pandoc/Class/IO.hs41
1 files changed, 21 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 6df39d4d0..169074860 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory)
+import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)
import Text.Pandoc.Walk (walk)
import qualified Control.Exception as E
import qualified Data.ByteString as B
@@ -200,31 +200,32 @@ alertIndent (l:ls) = do
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
extractMedia dir d = do
media <- getMediaBag
- case [fp | (fp, _, _) <- mediaDirectory media] of
- [] -> return d
- fps -> do
- mapM_ (writeMedia dir media) fps
- return $ walk (adjustImagePath dir fps) d
+ let items = mediaItems media
+ if null items
+ then return d
+ else do
+ mapM_ (writeMedia dir) items
+ return $ walk (adjustImagePath dir media) d
-- | Write the contents of a media bag to a path.
writeMedia :: (PandocMonad m, MonadIO m)
- => FilePath -> MediaBag -> FilePath
+ => FilePath
+ -> (FilePath, MimeType, BL.ByteString)
-> m ()
-writeMedia dir mediabag subpath = do
- let mbcontents = lookupMedia subpath mediabag
- case mbcontents of
- Nothing -> throwError $ PandocResourceNotFound $ pack subpath
- Just item -> do
- -- we normalize to get proper path separators for the platform
- let fullpath = dir </> normalise (mediaPath item)
- liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
- logIOError $ BL.writeFile fullpath $ mediaContents item
+writeMedia dir (fp, _mt, bs) = do
+ -- we normalize to get proper path separators for the platform
+ let fullpath = normalise $ dir </> fp
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ logIOError $ BL.writeFile fullpath bs
-- | If the given Inline element is an image with a @src@ path equal to
-- one in the list of @paths@, then prepends @dir@ to the image source;
-- returns the element unchanged otherwise.
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
- | unpack src `elem` paths
- = Image attr lab (pack (normalise $ dir </> unpack src), tit)
+adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline
+adjustImagePath dir mediabag (Image attr lab (src, tit)) =
+ case lookupMedia (T.unpack src) mediabag of
+ Nothing -> Image attr lab (src, tit)
+ Just item ->
+ let fullpath = dir </> mediaPath item
+ in Image attr lab (T.pack fullpath, tit)
adjustImagePath _ _ x = x