aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt12
-rw-r--r--src/Text/Pandoc/Class/IO.hs41
-rw-r--r--src/Text/Pandoc/Class/PandocMonad.hs17
-rw-r--r--src/Text/Pandoc/MediaBag.hs25
4 files changed, 47 insertions, 48 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index b3a1f95e2..ef569433a 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -675,12 +675,12 @@ header when requesting a document from a URL:
: Extract images and other media contained in or linked from
the source document to the path *DIR*, creating it if
necessary, and adjust the images references in the document
- so they point to the extracted files. If the source format is
- a binary container (docx, epub, or odt), the media is
- extracted from the container and the original
- filenames are used. Otherwise the media is read from the
- file system or downloaded, and new filenames are constructed
- based on SHA1 hashes of the contents.
+ so they point to the extracted files. Media are downloaded,
+ read from the file system, or extracted from a binary
+ container (e.g. docx), as needed. The original file paths
+ are used if they are relative paths not containing `..`.
+ Otherwise filenames are constructed from the SHA1 hash of
+ the contents.
`--abbreviations=`*FILE*
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 6df39d4d0..169074860 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image))
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory)
+import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)
import Text.Pandoc.Walk (walk)
import qualified Control.Exception as E
import qualified Data.ByteString as B
@@ -200,31 +200,32 @@ alertIndent (l:ls) = do
extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
extractMedia dir d = do
media <- getMediaBag
- case [fp | (fp, _, _) <- mediaDirectory media] of
- [] -> return d
- fps -> do
- mapM_ (writeMedia dir media) fps
- return $ walk (adjustImagePath dir fps) d
+ let items = mediaItems media
+ if null items
+ then return d
+ else do
+ mapM_ (writeMedia dir) items
+ return $ walk (adjustImagePath dir media) d
-- | Write the contents of a media bag to a path.
writeMedia :: (PandocMonad m, MonadIO m)
- => FilePath -> MediaBag -> FilePath
+ => FilePath
+ -> (FilePath, MimeType, BL.ByteString)
-> m ()
-writeMedia dir mediabag subpath = do
- let mbcontents = lookupMedia subpath mediabag
- case mbcontents of
- Nothing -> throwError $ PandocResourceNotFound $ pack subpath
- Just item -> do
- -- we normalize to get proper path separators for the platform
- let fullpath = dir </> normalise (mediaPath item)
- liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
- logIOError $ BL.writeFile fullpath $ mediaContents item
+writeMedia dir (fp, _mt, bs) = do
+ -- we normalize to get proper path separators for the platform
+ let fullpath = normalise $ dir </> fp
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ logIOError $ BL.writeFile fullpath bs
-- | If the given Inline element is an image with a @src@ path equal to
-- one in the list of @paths@, then prepends @dir@ to the image source;
-- returns the element unchanged otherwise.
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
- | unpack src `elem` paths
- = Image attr lab (pack (normalise $ dir </> unpack src), tit)
+adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline
+adjustImagePath dir mediabag (Image attr lab (src, tit)) =
+ case lookupMedia (T.unpack src) mediabag of
+ Nothing -> Image attr lab (src, tit)
+ Just item ->
+ let fullpath = dir </> mediaPath item
+ in Image attr lab (T.pack fullpath, tit)
adjustImagePath _ _ x = x
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index 4eb80df29..439aec071 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d
handleImage (Image attr lab (src, tit)) = catchError
(do mediabag <- getMediaBag
let fp = T.unpack src
- src' <- T.pack <$> case lookupMedia fp mediabag of
- Just item -> return $ mediaPath item
- Nothing -> do
- (bs, mt) <- fetchItem src
- insertMedia fp mt (BL.fromStrict bs)
- mediabag' <- getMediaBag
- case lookupMedia fp mediabag' of
- Just item -> return $ mediaPath item
- Nothing -> throwError $ PandocSomeError $
- src <> " not successfully inserted into MediaBag"
- return $ Image attr lab (src', tit))
+ case lookupMedia fp mediabag of
+ Just _ -> return ()
+ Nothing -> do
+ (bs, mt) <- fetchItem src
+ insertMedia fp mt (BL.fromStrict bs)
+ return $ Image attr lab (src, tit))
(\e ->
case e of
PandocResourceNotFound _ -> do
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index a65f315fc..06fba5632 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -71,16 +71,21 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource
-> MediaBag
-> MediaBag
insertMedia fp mbMime contents (MediaBag mediamap) =
- MediaBag (M.insert (canonicalize fp) mediaItem mediamap)
- where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <>
- "." <> ext
+ MediaBag (M.insert fp' mediaItem mediamap)
+ where mediaItem = MediaItem{ mediaPath = newpath
, mediaContents = contents
, mediaMimeType = mt }
+ fp' = canonicalize fp
+ newpath = if isRelative fp && ".." `notElem` splitPath fp
+ then T.unpack fp'
+ else showDigest (sha1 contents) <> "." <> ext
fallback = case takeExtension fp of
".gz" -> getMimeTypeDef $ dropExtension fp
_ -> getMimeTypeDef fp
mt = fromMaybe fallback mbMime
- ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt
+ ext = case takeExtension fp of
+ '.':e -> e
+ _ -> maybe "" T.unpack $ extensionFromMimeType mt
-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
@@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap
-- | Get a list of the file paths stored in a 'MediaBag', with
-- their corresponding mime types and the lengths in bytes of the contents.
mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
-mediaDirectory (MediaBag mediamap) =
- M.foldrWithKey (\fp item ->
- ((T.unpack fp, mediaMimeType item,
- fromIntegral (BL.length (mediaContents item))):)) [] mediamap
+mediaDirectory mediabag =
+ map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs)))
+ (mediaItems mediabag)
mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
mediaItems (MediaBag mediamap) =
- M.foldrWithKey (\fp item ->
- ((T.unpack fp, mediaMimeType item, mediaContents item):))
- [] mediamap
+ map (\item -> (mediaPath item, mediaMimeType item, mediaContents item))
+ (M.elems mediamap)