diff options
| author | John MacFarlane <fiddlosopher@gmail.com> | 2014-07-31 11:04:40 -0700 | 
|---|---|---|
| committer | John MacFarlane <fiddlosopher@gmail.com> | 2014-07-31 11:05:35 -0700 | 
| commit | 00662faefbca0b9889d3d79dbb2985350356d18a (patch) | |
| tree | b0f6c39c3c91bc247c970297b4afa5a630d6d410 | |
| parent | 6e96f8b4783d46e2b9e245bf3144f023c5296a38 (diff) | |
| download | pandoc-00662faefbca0b9889d3d79dbb2985350356d18a.tar.gz | |
Made MediaBag a newtype, and added mime type information to media.
Shared now exports functions for interacting with a MediaBag:
- `emptyMediaBag`
- `lookuMedia`
- `insertMedia`
- `mediaDirectory`
- `extractMediaBag`
| -rw-r--r-- | pandoc.hs | 26 | ||||
| -rw-r--r-- | src/Text/Pandoc/Options.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 4 | ||||
| -rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 26 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 80 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 2 | ||||
| -rw-r--r-- | tests/Tests/Readers/Docx.hs | 5 | 
7 files changed, 98 insertions, 50 deletions
| @@ -37,7 +37,8 @@ import Text.Pandoc.Walk (walk)  import Text.Pandoc.Readers.LaTeX (handleIncludes)  import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,                              safeRead, headerShift, normalize, err, warn, -                            openURL ) +                            openURL, mediaDirectory, extractMediaBag, +                            emptyMediaBag )  import Text.Pandoc.XML ( toEntities )  import Text.Pandoc.SelfContained ( makeSelfContained )  import Text.Pandoc.Process (pipeProcess) @@ -50,8 +51,7 @@ import System.Console.GetOpt  import Data.Char ( toLower )  import Data.List ( intercalate, isPrefixOf, isSuffixOf, sort )  import System.Directory ( getAppUserDataDirectory, findExecutable, -                          doesFileExist, Permissions(..), getPermissions, -                          createDirectoryIfMissing ) +                          doesFileExist, Permissions(..), getPermissions )  import System.IO ( stdout, stderr )  import System.IO.Error ( isDoesNotExistError )  import qualified Control.Exception as E @@ -1206,15 +1206,6 @@ main = do                             then handleIncludes                             else return -  let writeMedia :: FilePath -> (FilePath, B.ByteString) -> IO () -      writeMedia dir (subpath, bs) = do -        -- we join and split to convert a/b/c to a\b\c on Windows; -        -- in zip containers all paths use / -        let fullpath = dir </> joinPath (splitPath subpath) -        createDirectoryIfMissing True $ takeDirectory fullpath -        warn $ "extracting " ++ fullpath -        B.writeFile fullpath bs -    let adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline        adjustImagePath dir paths (Image lab (src, tit))           | src `elem` paths = Image lab (dir ++ "/" ++ src, tit) @@ -1226,13 +1217,16 @@ main = do              inp <- readSources sources >>=                         handleIncludes' . convertTabs . intercalate "\n"              d <- r readerOpts inp -            return (d, M.empty) +            return (d, emptyMediaBag)            ByteStringReader r -> do                (d, media) <- readFiles sources >>= r readerOpts                d' <- case mbExtractMedia of -                       Just dir | not (M.null media) -> do -                         mapM_ (writeMedia dir) $ M.toList media -                         return $ walk (adjustImagePath dir (M.keys media)) d +                       Just dir -> do +                         case [fp | (fp, _, _) <- mediaDirectory media] of +                               []  -> return d +                               fps -> do +                                 extractMediaBag True dir media +                                 return $ walk (adjustImagePath dir fps) d                         _  -> return d                return (d', media) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 2de1a9e35..bf6b3d910 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -49,8 +49,7 @@ import Data.Set (Set)  import qualified Data.Set as Set  import Data.Default  import Text.Pandoc.Highlighting (Style, pygments) -import Text.Pandoc.Shared (MediaBag) -import qualified Data.Map as M +import Text.Pandoc.Shared (MediaBag, emptyMediaBag)  -- | Individually selectable syntax extensions.  data Extension = @@ -359,7 +358,7 @@ instance Default WriterOptions where                        , writerTOCDepth         = 3                        , writerReferenceODT     = Nothing                        , writerReferenceDocx    = Nothing -                      , writerMediaBag         = M.empty +                      , writerMediaBag         = emptyMediaBag                        }  -- | Returns True if the given extension is enabled. diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index f5fb6565a..2fb4da2d9 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -108,7 +108,7 @@ data DState = DState { docxAnchorMap :: M.Map String String  defaultDState :: DState  defaultDState = DState { docxAnchorMap = M.empty -                       , docxMediaBag  = M.empty +                       , docxMediaBag  = emptyMediaBag                         , docxInHeaderBlock = False                         , docxInTexSubscript = False} @@ -369,7 +369,7 @@ parPartToInlines (BookMark _ anchor) =      return [Span (newAnchor, ["anchor"], []) []]  parPartToInlines (Drawing fp bs) = do    mediaBag <- gets docxMediaBag -  modify $ \s -> s { docxMediaBag = M.insert fp bs mediaBag} +  modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }    return [Image [] (fp, "")]  parPartToInlines (InternalHyperLink anchor runs) = do    ils <- concatMapM runToInlines runs diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 777da3551..77f8b6530 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -41,11 +41,11 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))  import Data.Char (toLower, isAscii, isAlphaNum)  import Codec.Compression.GZip as Gzip  import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, +                           MediaBag, lookupMedia)  import Text.Pandoc.UTF8 (toString,  fromString)  import Text.Pandoc.MIME (getMimeType)  import System.Directory (doesFileExist) -import qualified Data.Map as M  isOk :: Char -> Bool  isOk c = isAscii c && isAlphaNum c @@ -110,16 +110,20 @@ getItem media userdata f =         -- this is needed for things like cmunrm.eot?#iefix,         -- which is used to get old versions of IE to work with web fonts.         let f' = takeWhile (\c -> c /= '?' && c /= '#') f -       let mime = case takeExtension f' of -                       ".gz" -> getMimeType $ dropExtension f' -                       x     -> getMimeType x +       let mbMime = case takeExtension f' of +                         ".gz" -> getMimeType $ dropExtension f' +                         x     -> getMimeType x         exists <- doesFileExist f' -       cont <- if exists -                  then B.readFile f' -                  else case M.lookup f media of -                            Just bs  -> return $ BS.concat $ L.toChunks bs -                            Nothing  -> readDataFile userdata f' -       return (cont, mime) +       if exists +          then do +            cont <- B.readFile f' +            return (cont, mbMime) +          else case lookupMedia f media of +                    Just (mime,bs) -> return (BS.concat $ L.toChunks bs, +                                              Just mime) +                    Nothing        -> do +                      cont <- readDataFile userdata f' +                      return (cont, mbMime)    where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e  getRaw :: MediaBag -> Maybe FilePath -> String -> String diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index ee48f5bc1..deab1abc0 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,5 +1,5 @@  {-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, -    FlexibleContexts, ScopedTypeVariables #-} +    FlexibleContexts, ScopedTypeVariables, GeneralizedNewtypeDeriving #-}  {-  Copyright (C) 2006-2014 John MacFarlane <jgm@berkeley.edu> @@ -50,7 +50,11 @@ module Text.Pandoc.Shared (                       tabFilter,                       -- * Media Handling                       MediaBag, +                     emptyMediaBag, +                     lookupMedia,                       insertMedia, +                     mediaDirectory, +                     extractMediaBag,                       -- * Date/time                       normalizeDate,                       -- * Pandoc block and inline list processing @@ -102,16 +106,18 @@ import Data.Char ( toLower, isLower, isUpper, isAlpha,                     isLetter, isDigit, isSpace )  import Data.List ( find, isPrefixOf, intercalate )  import qualified Data.Map as M +import Data.Maybe ( fromMaybe )  import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,                       unEscapeString, parseURIReference )  import qualified Data.Set as Set  import System.Directory  import Text.Pandoc.MIME (getMimeType) -import System.FilePath ( (</>), takeExtension, dropExtension ) +import System.FilePath ( (</>), takeExtension, dropExtension, takeDirectory, +                         splitPath, joinPath )  import Data.Generics (Typeable, Data)  import qualified Control.Monad.State as S  import qualified Control.Exception as E -import Control.Monad (msum, unless) +import Control.Monad (msum, unless, MonadPlus(..), when)  import Text.Pandoc.Pretty (charWidth)  import System.Locale (defaultTimeLocale)  import Data.Time @@ -292,15 +298,63 @@ tabFilter tabStop =  --- Media handling  --- --- | A map of media paths to their binary representations. -type MediaBag = M.Map String BL.ByteString +-- | A container for a collection of binary resources, with names and +-- mime types. +newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) +        deriving (Monoid) --- | Insert a media item into a `MediaBag` -insertMedia :: FilePath -            -> BL.ByteString +instance Show MediaBag where +  show bag = "MediaBag " ++ show (mediaDirectory bag) + +emptyMediaBag :: MediaBag +emptyMediaBag = MediaBag M.empty + +-- | Insert a media item into a 'MediaBag', replacing any existing +-- value with the same name. +insertMedia :: FilePath      -- ^ relative path and canonical name of resource +            -> Maybe String  -- ^ mime type (Nothing = determine from extension) +            -> BL.ByteString -- ^ contents of resource +            -> MediaBag              -> MediaBag +insertMedia fp mbMime contents (MediaBag mediamap) = +  MediaBag (M.insert fp (mime, contents) mediamap) +  where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) +        fallback = case takeExtension fp of +                        ".gz"   -> getMimeType $ dropExtension fp +                        _       -> getMimeType fp + +-- | Lookup a media item in a 'MediaBag', returning mime type and contents. +lookupMedia :: FilePath              -> MediaBag -insertMedia = M.insert +            -> Maybe (String, BL.ByteString) +lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap + +-- | Get a list of the file paths stored in a 'MediaBag', with +-- their corresponding mime types and the lengths in bytes of the contents. +mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory (MediaBag mediamap) = +  M.foldWithKey (\fp (mime,contents) -> +      ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + +-- | Extract contents of MediaBag to a given directory.  Print informational +-- messages if 'verbose' is true. +extractMediaBag :: Bool +                -> FilePath +                -> MediaBag +                -> IO () +extractMediaBag verbose dir (MediaBag mediamap) = do +  sequence_ $ M.foldWithKey +     (\fp (_ ,contents) -> +        ((writeMedia verbose dir (fp, contents)):)) [] mediamap + +writeMedia :: Bool -> FilePath -> (FilePath, BL.ByteString) -> IO () +writeMedia verbose dir (subpath, bs) = do +  -- we join and split to convert a/b/c to a\b\c on Windows; +  -- in zip containers all paths use / +  let fullpath = dir </> joinPath (splitPath subpath) +  createDirectoryIfMissing True $ takeDirectory fullpath +  when verbose $ warn $ "extracting " ++ fullpath +  BL.writeFile fullpath bs  --  -- Date/time @@ -803,13 +857,9 @@ fetchItem sourceURL s  fetchItem' :: MediaBag -> Maybe String -> String             -> IO (Either E.SomeException (BS.ByteString, Maybe String))  fetchItem' media sourceURL s = do -  case M.lookup s media of +  case lookupMedia s media of         Nothing -> fetchItem sourceURL s -       Just bs -> do -          let mime = case takeExtension s of -                          ".gz" -> getMimeType $ dropExtension s -                          x     -> getMimeType x -          return $ Right (BS.concat $ toChunks bs, mime) +       Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)  -- | Read from a URL and return raw data and maybe mime type.  openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 1f222b8b8..62dd70e73 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -793,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do      return $ Image lab (newsrc, tit)  transformInline opts _ (x@(Math _ _))    | WebTeX _ <- writerHTMLMathMethod opts = do -    raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x +    raw <- makeSelfContained emptyMediaBag Nothing $ writeHtmlInline opts x      return $ RawInline (Format "html") raw  transformInline opts mediaRef  (RawInline fmt raw)    | fmt == Format "html" = do diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 494669fd5..0eae20e22 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -12,6 +12,7 @@ import qualified Data.ByteString.Base64 as B64  import Text.Pandoc.Readers.Docx  import Text.Pandoc.Writers.Native (writeNative)  import qualified Data.Map as M +import Text.Pandoc.Shared (lookupMedia)  -- We define a wrapper around pandoc that doesn't normalize in the  -- tests. Since we do our own normalization, we want to make sure @@ -60,8 +61,8 @@ testCompareMediaIO name docxFile mediaPath mediaFile = do      df <- B.readFile docxFile      mf <- B.readFile mediaFile      let (_, mb) = readDocx def df -        dBytes = case M.lookup mediaPath mb of -          Just bs -> bs +        dBytes = case lookupMedia mediaPath mb of +          Just (_,bs) -> bs            Nothing -> error "Media file not found"          d64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks dBytes          m64 = B8.unpack $ B64.encode $ BS.concat $ B.toChunks mf | 
