diff options
-rw-r--r-- | src/Text/Pandoc/Class/IO.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 43 | ||||
-rw-r--r-- | src/Text/Pandoc/Lua/Module/MediaBag.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 35 | ||||
-rw-r--r-- | test/Tests/Readers/Docx.hs | 10 |
5 files changed, 55 insertions, 48 deletions
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index bb4e2b732..f12c0a938 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -213,14 +213,13 @@ writeMedia :: (PandocMonad m, MonadIO m) writeMedia dir mediabag subpath = do -- we join and split to convert a/b/c to a\b\c on Windows; -- in zip containers all paths use / - let fullpath = dir </> unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just (_, bs) -> do - report $ Extracting $ pack fullpath + Just item -> do + let fullpath = dir </> mediaPath item liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath bs + logIOError $ BL.writeFile fullpath $ mediaContents item -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index dd6499a73..ae6917e06 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -37,7 +37,6 @@ module Text.Pandoc.Class.PandocMonad , setUserDataDir , getUserDataDir , fetchItem - , fetchMediaResource , getInputFiles , setInputFiles , getOutputFile @@ -57,8 +56,6 @@ module Text.Pandoc.Class.PandocMonad import Codec.Archive.Zip import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.Maybe (fromMaybe) import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, @@ -67,7 +64,7 @@ import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) -import System.FilePath ((</>), (<.>), takeExtension, dropExtension, +import System.FilePath ((</>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) @@ -75,8 +72,8 @@ import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..)) import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) @@ -376,7 +373,8 @@ fetchItem :: PandocMonad m fetchItem s = do mediabag <- getMediaBag case lookupMedia (T.unpack s) mediabag of - Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Just item -> return (BL.toStrict (mediaContents item), + Just (mediaMimeType item)) Nothing -> downloadOrRead s -- | Returns the content and, if available, the MIME type of a resource. @@ -629,19 +627,6 @@ withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) --- | Fetch local or remote resource (like an image) and provide data suitable --- for adding it to the MediaBag. -fetchMediaResource :: PandocMonad m - => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) -fetchMediaResource src = do - (bs, mt) <- fetchItem src - let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> T.unpack ext - return (fname, mt, bs') - -- | Traverse tree, filling media bag for any images that -- aren't already in the media bag. fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc @@ -649,12 +634,18 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia (T.unpack src) mediabag of - Just (_, _) -> return $ Image attr lab (src, tit) - Nothing -> do - (fname, mt, bs) <- fetchMediaResource src - insertMedia fname mt bs - return $ Image attr lab (T.pack fname, tit)) + let fp = T.unpack src + src' <- T.pack <$> case lookupMedia fp mediabag of + Just item -> return $ mediaPath item + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + mediabag' <- getMediaBag + case lookupMedia fp mediabag' of + Just item -> return $ mediaPath item + Nothing -> throwError $ PandocSomeError $ + src <> " not successfully inserted into MediaBag" + return $ Image attr lab (src', tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 78b699176..3eed50fca 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -73,9 +73,9 @@ lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil - Just (mimeType, contents) -> do - Lua.push mimeType - Lua.push contents + Just item -> do + Lua.push $ MB.mediaMimeType item + Lua.push $ MB.mediaContents item return 2 list :: PandocLua NumResults diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 4a9b4efa1..a65f315fc 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -15,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an interface for interacting with it. -} module Text.Pandoc.MediaBag ( + MediaItem(..), MediaBag, deleteMedia, lookupMedia, @@ -28,15 +29,23 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T +import Data.Digest.Pure.SHA (sha1, showDigest) + +data MediaItem = + MediaItem + { mediaMimeType :: MimeType + , mediaPath :: FilePath + , mediaContents :: BL.ByteString + } deriving (Eq, Ord, Show, Data, Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text MediaItem) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -62,26 +71,34 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) - where mime = fromMaybe fallback mbMime + MediaBag (M.insert (canonicalize fp) mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> + "." <> ext + , mediaContents = contents + , mediaMimeType = mt } fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp + mt = fromMaybe fallback mbMime + ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (MimeType, BL.ByteString) + -> Maybe MediaItem lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, + fromIntegral (BL.length (mediaContents item))):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, contents):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, mediaContents item):)) + [] mediamap diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 2cce70cc5..939ff9939 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -24,7 +24,7 @@ import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import qualified Text.Pandoc.Class as P -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import qualified Text.Pandoc.MediaBag as MB import Text.Pandoc.UTF8 as UTF8 -- We define a wrapper around pandoc that doesn't normalize in the @@ -91,11 +91,11 @@ getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath -compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool compareMediaPathIO mediaPath mediaBag docxPath = do docxMedia <- getMedia docxPath mediaPath - let mbBS = case lookupMedia mediaPath mediaBag of - Just (_, bs) -> bs + let mbBS = case MB.lookupMedia mediaPath mediaBag of + Just item -> MB.mediaContents item Nothing -> error ("couldn't find " ++ mediaPath ++ " in media bag") @@ -110,7 +110,7 @@ compareMediaBagIO docxFile = do mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) - (mediaDirectory mb) + (MB.mediaDirectory mb) return $ and bools testMediaBagIO :: String -> FilePath -> IO TestTree |