diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-08-30 11:49:50 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-08-30 11:49:50 -0700 |
commit | f70e3c3297bb0d8f953971a84a1a606351fdda40 (patch) | |
tree | c95d0f44861578c828aabcade0d0584b25d5eb96 | |
parent | eb29aab9598eec04a8c2d37ef24f2eaa5ca1435d (diff) | |
parent | 357172f13af7e7ddb6a01e16c664776d01f55565 (diff) | |
download | pandoc-f70e3c3297bb0d8f953971a84a1a606351fdda40.tar.gz |
Merge branch 'mime' of https://github.com/Aelve/John into Aelve-mime
Conflicts:
src/Text/Pandoc/Writers/Docx.hs
-rw-r--r-- | src/Text/Pandoc/MIME.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/MediaBag.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 15 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 8 |
8 files changed, 67 insertions, 51 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 6e6284b25..3b3b3b5b3 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -27,24 +27,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Mime type lookup for ODT writer. -} -module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType ) -where +module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, + extensionFromMimeType )where import System.FilePath import Data.Char ( toLower ) +import Data.List (isPrefixOf, isSuffixOf) +import Data.Maybe (fromMaybe) import qualified Data.Map as M +type MimeType = String + -- | Determine mime type appropriate for file path. -getMimeType :: FilePath -> Maybe String -getMimeType "layout-cache" = Just "application/binary" -- in ODT -getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes - where mimeTypes = M.fromList mimeTypesList +getMimeType :: FilePath -> Maybe MimeType +getMimeType fp + -- ODT + | fp == "layout-cache" = + Just "application/binary" + | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp = + Just "application/vnd.oasis.opendocument.formula" + -- generic + | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes + +-- | Determime mime type appropriate for file path, defaulting to +-- “application/octet-stream” if nothing else fits. +getMimeTypeDef :: FilePath -> MimeType +getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType -extensionFromMimeType :: String -> Maybe String -extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes +extensionFromMimeType :: MimeType -> Maybe String +extensionFromMimeType mimetype = + M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes -- note: we just look up the basic mime type, dropping the content-encoding etc. - where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList -mimeTypesList :: [(String, String)] +reverseMimeTypes :: M.Map MimeType String +reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList + +mimeTypes :: M.Map String MimeType +mimeTypes = M.fromList mimeTypesList + +mimeTypesList :: [(String, MimeType)] mimeTypesList = -- List borrowed from happstack-server. [("gz","application/x-gzip") ,("cabal","application/x-cabal") diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index b19804b5f..5921b56cf 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -41,8 +41,8 @@ import System.Directory (createDirectoryIfMissing) import qualified Data.Map as M import qualified Data.ByteString.Lazy as BL import Data.Monoid (Monoid) -import Control.Monad (when, MonadPlus(..)) -import Text.Pandoc.MIME (getMimeType) +import Control.Monad (when) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Maybe (fromMaybe) import System.IO (stderr) @@ -51,7 +51,7 @@ import System.IO (stderr) -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString)) deriving (Monoid) instance Show MediaBag where @@ -59,27 +59,27 @@ instance Show MediaBag where -- | Insert a media item into a 'MediaBag', replacing any existing -- value with the same name. -insertMedia :: FilePath -- ^ relative path and canonical name of resource - -> Maybe String -- ^ mime type (Nothing = determine from extension) - -> BL.ByteString -- ^ contents of resource +insertMedia :: FilePath -- ^ relative path and canonical name of resource + -> Maybe MimeType -- ^ mime type (Nothing = determine from extension) + -> BL.ByteString -- ^ contents of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = MediaBag (M.insert fp (mime, contents) mediamap) - where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback) + where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of - ".gz" -> getMimeType $ dropExtension fp - _ -> getMimeType fp + ".gz" -> getMimeTypeDef $ dropExtension fp + _ -> getMimeTypeDef fp -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (String, BL.ByteString) + -> Maybe (MimeType, BL.ByteString) lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. -mediaDirectory :: MediaBag -> [(String, String, Int)] +mediaDirectory :: MediaBag -> [(String, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldWithKey (\fp (mime,contents) -> ((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 052d7b832..b061d8683 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Text.Pandoc.MediaBag (MediaBag, insertMedia) import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except) +import Text.Pandoc.MIME (MimeType) import qualified Text.Pandoc.Builder as B import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry , findEntryByPath, Entry) @@ -34,9 +35,7 @@ import Control.DeepSeq.Generics (deepseq, NFData) import Debug.Trace (trace) -type MIME = String - -type Items = M.Map String (FilePath, MIME) +type Items = M.Map String (FilePath, MimeType) readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag) readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes) @@ -65,13 +64,13 @@ archiveToEPUB os archive = do return $ (ast, mediaBag) where os' = os {readerParseRaw = True} - parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc + parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do when (readerTrace os) (traceM path) doc <- mimeToReader mime r path let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty return $ docSpan <> doc - mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc + mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do fname <- findEntryByPathE (root </> path) archive return $ fixInternalReferences path . @@ -84,7 +83,7 @@ archiveToEPUB os archive = do -- paths should be absolute when this function is called -- renameImages should do this -fetchImages :: [(FilePath, MIME)] +fetchImages :: [(FilePath, MimeType)] -> FilePath -- ^ Root -> Archive -> Pandoc @@ -110,7 +109,7 @@ renameImages _ x = x imageToPandoc :: FilePath -> Pandoc imageToPandoc s = B.doc . B.para $ B.image s "" mempty -imageMimes :: [String] +imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] type CoverImage = FilePath @@ -131,7 +130,7 @@ parseManifest content = do mime <- findAttrE (emptyName "media-type") e return (uid, (href, mime)) -parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MIME)] +parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do spine <- findElementE (dfName "spine") e let itemRefs = findChildren (dfName "itemref") spine diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 1a4e037cf..36839ddd0 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,6 +42,7 @@ import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L import Text.Pandoc.Shared (renderTags', err, fetchItem') import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.Options (WriterOptions(..)) @@ -98,8 +99,8 @@ cssURLs media sourceURL d orig = ";base64," `B.append` (encode raw) return $ x `B.append` "url(" `B.append` enc `B.append` rest -getRaw :: MediaBag -> Maybe String -> String -> String - -> IO (ByteString, String) +getRaw :: MediaBag -> Maybe String -> MimeType -> String + -> IO (ByteString, MimeType) getRaw media sourceURL mimetype src = do let ext = map toLower $ takeExtension src fetchResult <- fetchItem' media sourceURL src diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 1f89224ff..54d252d43 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -107,7 +107,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo, import qualified Data.Set as Set import System.Directory import System.FilePath (joinPath, splitDirectories) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (</>), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S @@ -779,7 +779,7 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. fetchItem :: Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe String)) + -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem sourceURL s = case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of (_, s') | isURI s' -> openURL s' @@ -801,14 +801,14 @@ fetchItem sourceURL s = -- | Like 'fetchItem', but also looks for items in a 'MediaBag'. fetchItem' :: MediaBag -> Maybe String -> String - -> IO (Either E.SomeException (BS.ByteString, Maybe String)) + -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem' media sourceURL s = do case lookupMedia s media of Nothing -> fetchItem sourceURL s Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) +openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) openURL u | Just u' <- stripPrefix "data:" u = let mime = takeWhile (/=',') u' diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 687a85f9c..09321d1cc 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -60,8 +60,9 @@ import Data.Unique (hashUnique, newUnique) import System.Random (randomRIO) import Text.Printf (printf) import qualified Control.Exception as E -import Text.Pandoc.MIME (getMimeType, extensionFromMimeType) -import Control.Applicative ((<|>), (<$>)) +import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef, + extensionFromMimeType) +import Control.Applicative ((<$>), (<|>)) import Data.Maybe (fromMaybe, mapMaybe) data ListMarker = NoMarker @@ -94,7 +95,7 @@ data WriterState = WriterState{ , stFootnotes :: [Element] , stSectionIds :: [String] , stExternalLinks :: M.Map String String - , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString) + , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString) , stListLevel :: Int , stListNumId :: Int , stLists :: [ListMarker] @@ -207,11 +208,10 @@ writeDocx opts doc@(Pandoc meta _) = do let mkOverrideNode (part', contentType') = mknode "Override" [("PartName",part'),("ContentType",contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _, _) = - mkOverrideNode ("/word/" ++ imgpath, - fromMaybe "application/octet-stream" mbMimeType) - let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath, - fromMaybe "application/octet-stream" - $ getMimeType imgpath) + mkOverrideNode ("/word/" ++ imgpath, + fromMaybe "application/octet-stream" mbMimeType) + let mkMediaOverride imgpath = + mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 2df2b9087..ffd5bf101 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -65,7 +65,7 @@ import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml) import Data.Char ( toLower, isDigit, isAlphaNum ) import Network.URI ( unEscapeString ) -import Text.Pandoc.MIME (getMimeType) +import Text.Pandoc.MIME (MimeType, getMimeType) import qualified Control.Exception as E import Text.Blaze.Html.Renderer.Utf8 (renderHtml) import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags) @@ -849,7 +849,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs -mediaTypeOf :: FilePath -> Maybe String +mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index feaa0167c..03f8e8ba4 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef -import Data.List ( isPrefixOf, isSuffixOf ) +import Data.List ( isPrefixOf ) import Data.Maybe ( fromMaybe ) import Text.XML.Light.Output import Text.TeXMath @@ -77,11 +77,7 @@ writeODT opts doc@(Pandoc meta _) = do $ contentEntry : picEntries -- construct META-INF/manifest.xml based on archive let toFileEntry fp = case getMimeType fp of - Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp - then selfClosingTag "manifest:file-entry" - [("manifest:media-type","application/vnd.oasis.opendocument.formula") - ,("manifest:full-path",fp)] - else empty + Nothing -> empty Just m -> selfClosingTag "manifest:file-entry" [("manifest:media-type", m) ,("manifest:full-path", fp) |