diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2021-06-10 16:47:02 -0700 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2021-06-10 16:47:02 -0700 | 
| commit | 3776e828a83048697e5c64d9fb4bedc0145197dc (patch) | |
| tree | cad6f9754a013ea5c86d4559b1ceeb3187d0e301 /src | |
| parent | aa79b3035c3343adf1bb41b37266049a65ab5da7 (diff) | |
| download | pandoc-3776e828a83048697e5c64d9fb4bedc0145197dc.tar.gz | |
Fix MediaBag regressions.
With the 2.14 release `--extract-media` stopped working as before;
there could be mismatches between the paths in the rendered document and
the extracted media.
This patch makes several changes (while keeping the same API).
The `mediaPath` in 2.14 was always constructed from the SHA1 hash of
the media contents.  Now, we preserve the original path unless it's
an absolute path or contains `..` segments (in that case we use a path
based on the SHA1 hash of the contents).
When constructing a path from the SHA1 hash, we always use the
original extension, if there is one. Otherwise we look up an
appropriate extension for the mime type.
`mediaDirectory` and `mediaItems` now use the `mediaPath`, rather
than the mediabag key, for the first component of the tuple.
This makes more sense, I think, and fits with the documentation
of these functions; eventually, though, we should rework the API so that
`mediaItems` returns both the keys and the MediaItems.
Rewriting of source paths in `extractMedia` has been fixed.
`fillMediaBag` has been modified so that it doesn't modify
image paths (that was part of the problem in #7345).
We now do path normalization (e.g. `\` separators on Windows) only
in writing the media; the paths are left unchanged in the image
links (sensibly, since they might be URLs and not file paths).
These changes should restore the original behavior from before 2.14.
Closes #7345.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Class/IO.hs | 41 | ||||
| -rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 17 | ||||
| -rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 25 | 
3 files changed, 41 insertions, 42 deletions
| diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))  import Text.Pandoc.Error (PandocError (..))  import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)  import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)  import Text.Pandoc.Walk (walk)  import qualified Control.Exception as E  import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do  extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc  extractMedia dir d = do    media <- getMediaBag -  case [fp | (fp, _, _) <- mediaDirectory media] of -    []  -> return d -    fps -> do -      mapM_ (writeMedia dir media) fps -      return $ walk (adjustImagePath dir fps) d +  let items = mediaItems media +  if null items +    then return d +    else do +      mapM_ (writeMedia dir) items +      return $ walk (adjustImagePath dir media) d  -- | Write the contents of a media bag to a path.  writeMedia :: (PandocMonad m, MonadIO m) -           => FilePath -> MediaBag -> FilePath +           => FilePath +           -> (FilePath, MimeType, BL.ByteString)             -> m () -writeMedia dir mediabag subpath = do -  let mbcontents = lookupMedia subpath mediabag -  case mbcontents of -       Nothing -> throwError $ PandocResourceNotFound $ pack subpath -       Just item -> do -         -- we normalize to get proper path separators for the platform -         let fullpath = dir </> normalise (mediaPath item) -         liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) -         logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do +  -- we normalize to get proper path separators for the platform +  let fullpath = normalise $ dir </> fp +  liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) +  logIOError $ BL.writeFile fullpath bs  -- | If the given Inline element is an image with a @src@ path equal to  -- one in the list of @paths@, then prepends @dir@ to the image source;  -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) -   | unpack src `elem` paths -     = Image attr lab (pack (normalise $ dir </> unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = +  case lookupMedia (T.unpack src) mediabag of +    Nothing -> Image attr lab (src, tit) +    Just item -> +      let fullpath = dir </> mediaPath item +      in  Image attr lab (T.pack fullpath, tit)  adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 4eb80df29..439aec071 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d          handleImage (Image attr lab (src, tit)) = catchError            (do mediabag <- getMediaBag                let fp = T.unpack src -              src' <- T.pack <$> case lookupMedia fp mediabag of -                        Just item -> return $ mediaPath item -                        Nothing -> do -                          (bs, mt) <- fetchItem src -                          insertMedia fp mt (BL.fromStrict bs) -                          mediabag' <- getMediaBag -                          case lookupMedia fp mediabag' of -                             Just item -> return $ mediaPath item -                             Nothing -> throwError $ PandocSomeError $ -                               src <> " not successfully inserted into MediaBag" -              return $ Image attr lab (src', tit)) +              case lookupMedia fp mediabag of +                Just _ -> return () +                Nothing -> do +                  (bs, mt) <- fetchItem src +                  insertMedia fp mt (BL.fromStrict bs) +              return $ Image attr lab (src, tit))            (\e ->                case e of                  PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a65f315fc..06fba5632 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -71,16 +71,21 @@ insertMedia :: FilePath       -- ^ relative path and canonical name of resource              -> MediaBag              -> MediaBag  insertMedia fp mbMime contents (MediaBag mediamap) = -  MediaBag (M.insert (canonicalize fp) mediaItem mediamap) -  where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> -                                 "." <> ext +  MediaBag (M.insert fp' mediaItem mediamap) +  where mediaItem = MediaItem{ mediaPath = newpath                               , mediaContents = contents                               , mediaMimeType = mt } +        fp' = canonicalize fp +        newpath = if isRelative fp && ".." `notElem` splitPath fp +                     then T.unpack fp' +                     else showDigest (sha1 contents) <> "." <> ext          fallback = case takeExtension fp of                          ".gz" -> getMimeTypeDef $ dropExtension fp                          _     -> getMimeTypeDef fp          mt = fromMaybe fallback mbMime -        ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt +        ext = case takeExtension fp of +               '.':e  -> e +               _      -> maybe "" T.unpack $ extensionFromMimeType mt  -- | Lookup a media item in a 'MediaBag', returning mime type and contents. @@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap  -- | Get a list of the file paths stored in a 'MediaBag', with  -- their corresponding mime types and the lengths in bytes of the contents.  mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] -mediaDirectory (MediaBag mediamap) = -  M.foldrWithKey (\fp item -> -      ((T.unpack fp, mediaMimeType item, -        fromIntegral (BL.length (mediaContents item))):)) [] mediamap +mediaDirectory mediabag = +  map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs))) +    (mediaItems mediabag)  mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]  mediaItems (MediaBag mediamap) = -  M.foldrWithKey (\fp item -> -      ((T.unpack fp, mediaMimeType item, mediaContents item):)) -     [] mediamap +  map (\item -> (mediaPath item, mediaMimeType item, mediaContents item)) +      (M.elems mediamap) | 
