aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-30 11:49:50 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-30 11:49:50 -0700
commitf70e3c3297bb0d8f953971a84a1a606351fdda40 (patch)
treec95d0f44861578c828aabcade0d0584b25d5eb96 /src/Text/Pandoc
parenteb29aab9598eec04a8c2d37ef24f2eaa5ca1435d (diff)
parent357172f13af7e7ddb6a01e16c664776d01f55565 (diff)
downloadpandoc-f70e3c3297bb0d8f953971a84a1a606351fdda40.tar.gz
Merge branch 'mime' of https://github.com/Aelve/John into Aelve-mime
Conflicts: src/Text/Pandoc/Writers/Docx.hs
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/MIME.hs40
-rw-r--r--src/Text/Pandoc/MediaBag.hs22
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs15
-rw-r--r--src/Text/Pandoc/SelfContained.hs5
-rw-r--r--src/Text/Pandoc/Shared.hs8
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs16
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs4
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs8
8 files changed, 67 insertions, 51 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 6e6284b25..3b3b3b5b3 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -27,24 +27,44 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Mime type lookup for ODT writer.
-}
-module Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
-where
+module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef,
+ extensionFromMimeType )where
import System.FilePath
import Data.Char ( toLower )
+import Data.List (isPrefixOf, isSuffixOf)
+import Data.Maybe (fromMaybe)
import qualified Data.Map as M
+type MimeType = String
+
-- | Determine mime type appropriate for file path.
-getMimeType :: FilePath -> Maybe String
-getMimeType "layout-cache" = Just "application/binary" -- in ODT
-getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
- where mimeTypes = M.fromList mimeTypesList
+getMimeType :: FilePath -> Maybe MimeType
+getMimeType fp
+ -- ODT
+ | fp == "layout-cache" =
+ Just "application/binary"
+ | "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp =
+ Just "application/vnd.oasis.opendocument.formula"
+ -- generic
+ | otherwise = M.lookup (map toLower $ drop 1 $ takeExtension fp) mimeTypes
+
+-- | Determime mime type appropriate for file path, defaulting to
+-- “application/octet-stream” if nothing else fits.
+getMimeTypeDef :: FilePath -> MimeType
+getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType
-extensionFromMimeType :: String -> Maybe String
-extensionFromMimeType mimetype = M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
+extensionFromMimeType :: MimeType -> Maybe String
+extensionFromMimeType mimetype =
+ M.lookup (takeWhile (/=';') mimetype) reverseMimeTypes
-- note: we just look up the basic mime type, dropping the content-encoding etc.
- where reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
-mimeTypesList :: [(String, String)]
+reverseMimeTypes :: M.Map MimeType String
+reverseMimeTypes = M.fromList $ map (\(k,v) -> (v,k)) mimeTypesList
+
+mimeTypes :: M.Map String MimeType
+mimeTypes = M.fromList mimeTypesList
+
+mimeTypesList :: [(String, MimeType)]
mimeTypesList = -- List borrowed from happstack-server.
[("gz","application/x-gzip")
,("cabal","application/x-cabal")
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index b19804b5f..5921b56cf 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -41,8 +41,8 @@ import System.Directory (createDirectoryIfMissing)
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as BL
import Data.Monoid (Monoid)
-import Control.Monad (when, MonadPlus(..))
-import Text.Pandoc.MIME (getMimeType)
+import Control.Monad (when)
+import Text.Pandoc.MIME (MimeType, getMimeTypeDef)
import qualified Text.Pandoc.UTF8 as UTF8
import Data.Maybe (fromMaybe)
import System.IO (stderr)
@@ -51,7 +51,7 @@ import System.IO (stderr)
-- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty'
-- can be used for an empty 'MediaBag', and '<>' can be used to append
-- two 'MediaBag's.
-newtype MediaBag = MediaBag (M.Map String (String, BL.ByteString))
+newtype MediaBag = MediaBag (M.Map String (MimeType, BL.ByteString))
deriving (Monoid)
instance Show MediaBag where
@@ -59,27 +59,27 @@ instance Show MediaBag where
-- | Insert a media item into a 'MediaBag', replacing any existing
-- value with the same name.
-insertMedia :: FilePath -- ^ relative path and canonical name of resource
- -> Maybe String -- ^ mime type (Nothing = determine from extension)
- -> BL.ByteString -- ^ contents of resource
+insertMedia :: FilePath -- ^ relative path and canonical name of resource
+ -> Maybe MimeType -- ^ mime type (Nothing = determine from extension)
+ -> BL.ByteString -- ^ contents of resource
-> MediaBag
-> MediaBag
insertMedia fp mbMime contents (MediaBag mediamap) =
MediaBag (M.insert fp (mime, contents) mediamap)
- where mime = fromMaybe "application/octet-stream" (mbMime `mplus` fallback)
+ where mime = fromMaybe fallback mbMime
fallback = case takeExtension fp of
- ".gz" -> getMimeType $ dropExtension fp
- _ -> getMimeType fp
+ ".gz" -> getMimeTypeDef $ dropExtension fp
+ _ -> getMimeTypeDef fp
-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
lookupMedia :: FilePath
-> MediaBag
- -> Maybe (String, BL.ByteString)
+ -> Maybe (MimeType, BL.ByteString)
lookupMedia fp (MediaBag mediamap) = M.lookup fp mediamap
-- | Get a list of the file paths stored in a 'MediaBag', with
-- their corresponding mime types and the lengths in bytes of the contents.
-mediaDirectory :: MediaBag -> [(String, String, Int)]
+mediaDirectory :: MediaBag -> [(String, MimeType, Int)]
mediaDirectory (MediaBag mediamap) =
M.foldWithKey (\fp (mime,contents) ->
((fp, mime, fromIntegral $ BL.length contents):)) [] mediamap
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 052d7b832..b061d8683 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -16,6 +16,7 @@ import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
+import Text.Pandoc.MIME (MimeType)
import qualified Text.Pandoc.Builder as B
import Codec.Archive.Zip ( Archive (..), toArchive, fromEntry
, findEntryByPath, Entry)
@@ -34,9 +35,7 @@ import Control.DeepSeq.Generics (deepseq, NFData)
import Debug.Trace (trace)
-type MIME = String
-
-type Items = M.Map String (FilePath, MIME)
+type Items = M.Map String (FilePath, MimeType)
readEPUB :: ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)
readEPUB opts bytes = runEPUB (archiveToEPUB opts $ toArchive bytes)
@@ -65,13 +64,13 @@ archiveToEPUB os archive = do
return $ (ast, mediaBag)
where
os' = os {readerParseRaw = True}
- parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
+ parseSpineElem :: MonadError String m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
when (readerTrace os) (traceM path)
doc <- mimeToReader mime r path
let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
return $ docSpan <> doc
- mimeToReader :: MonadError String m => MIME -> FilePath -> FilePath -> m Pandoc
+ mimeToReader :: MonadError String m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (normalise -> root) (normalise -> path) = do
fname <- findEntryByPathE (root </> path) archive
return $ fixInternalReferences path .
@@ -84,7 +83,7 @@ archiveToEPUB os archive = do
-- paths should be absolute when this function is called
-- renameImages should do this
-fetchImages :: [(FilePath, MIME)]
+fetchImages :: [(FilePath, MimeType)]
-> FilePath -- ^ Root
-> Archive
-> Pandoc
@@ -110,7 +109,7 @@ renameImages _ x = x
imageToPandoc :: FilePath -> Pandoc
imageToPandoc s = B.doc . B.para $ B.image s "" mempty
-imageMimes :: [String]
+imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
type CoverImage = FilePath
@@ -131,7 +130,7 @@ parseManifest content = do
mime <- findAttrE (emptyName "media-type") e
return (uid, (href, mime))
-parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MIME)]
+parseSpine :: MonadError String m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
spine <- findElementE (dfName "spine") e
let itemRefs = findChildren (dfName "itemref") spine
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 1a4e037cf..36839ddd0 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -42,6 +42,7 @@ import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
import Text.Pandoc.Shared (renderTags', err, fetchItem')
import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.Options (WriterOptions(..))
@@ -98,8 +99,8 @@ cssURLs media sourceURL d orig =
";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest
-getRaw :: MediaBag -> Maybe String -> String -> String
- -> IO (ByteString, String)
+getRaw :: MediaBag -> Maybe String -> MimeType -> String
+ -> IO (ByteString, MimeType)
getRaw media sourceURL mimetype src = do
let ext = map toLower $ takeExtension src
fetchResult <- fetchItem' media sourceURL src
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 1f89224ff..54d252d43 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -107,7 +107,7 @@ import Network.URI ( escapeURIString, isURI, nonStrictRelativeTo,
import qualified Data.Set as Set
import System.Directory
import System.FilePath (joinPath, splitDirectories)
-import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.MIME (MimeType, getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
@@ -779,7 +779,7 @@ readDataFileUTF8 userDir fname =
-- | Fetch an image or other item from the local filesystem or the net.
-- Returns raw content and maybe mime type.
fetchItem :: Maybe String -> String
- -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+ -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem sourceURL s =
case (sourceURL >>= parseURIReference . ensureEscaped, ensureEscaped s) of
(_, s') | isURI s' -> openURL s'
@@ -801,14 +801,14 @@ fetchItem sourceURL s =
-- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
fetchItem' :: MediaBag -> Maybe String -> String
- -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+ -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem' media sourceURL s = do
case lookupMedia s media of
Nothing -> fetchItem sourceURL s
Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String))
+openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
openURL u
| Just u' <- stripPrefix "data:" u =
let mime = takeWhile (/=',') u'
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 687a85f9c..09321d1cc 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -60,8 +60,9 @@ import Data.Unique (hashUnique, newUnique)
import System.Random (randomRIO)
import Text.Printf (printf)
import qualified Control.Exception as E
-import Text.Pandoc.MIME (getMimeType, extensionFromMimeType)
-import Control.Applicative ((<|>), (<$>))
+import Text.Pandoc.MIME (MimeType, getMimeType, getMimeTypeDef,
+ extensionFromMimeType)
+import Control.Applicative ((<$>), (<|>))
import Data.Maybe (fromMaybe, mapMaybe)
data ListMarker = NoMarker
@@ -94,7 +95,7 @@ data WriterState = WriterState{
, stFootnotes :: [Element]
, stSectionIds :: [String]
, stExternalLinks :: M.Map String String
- , stImages :: M.Map FilePath (String, String, Maybe String, Element, B.ByteString)
+ , stImages :: M.Map FilePath (String, String, Maybe MimeType, Element, B.ByteString)
, stListLevel :: Int
, stListNumId :: Int
, stLists :: [ListMarker]
@@ -207,11 +208,10 @@ writeDocx opts doc@(Pandoc meta _) = do
let mkOverrideNode (part', contentType') = mknode "Override"
[("PartName",part'),("ContentType",contentType')] ()
let mkImageOverride (_, imgpath, mbMimeType, _, _) =
- mkOverrideNode ("/word/" ++ imgpath,
- fromMaybe "application/octet-stream" mbMimeType)
- let mkMediaOverride imgpath = mkOverrideNode ('/':imgpath,
- fromMaybe "application/octet-stream"
- $ getMimeType imgpath)
+ mkOverrideNode ("/word/" ++ imgpath,
+ fromMaybe "application/octet-stream" mbMimeType)
+ let mkMediaOverride imgpath =
+ mkOverrideNode ('/':imgpath, getMimeTypeDef imgpath)
let overrides = map mkOverrideNode (
[("/word/webSettings.xml",
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml")
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 2df2b9087..ffd5bf101 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -65,7 +65,7 @@ import Text.Pandoc.UUID (getRandomUUID)
import Text.Pandoc.Writers.HTML (writeHtmlString, writeHtml)
import Data.Char ( toLower, isDigit, isAlphaNum )
import Network.URI ( unEscapeString )
-import Text.Pandoc.MIME (getMimeType)
+import Text.Pandoc.MIME (MimeType, getMimeType)
import qualified Control.Exception as E
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
import Text.HTML.TagSoup (Tag(TagOpen), fromAttrib, parseTags)
@@ -849,7 +849,7 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity .
Nothing -> '&':'#':unEntity xs
unEntity (x:xs) = x : unEntity xs
-mediaTypeOf :: FilePath -> Maybe String
+mediaTypeOf :: FilePath -> Maybe MimeType
mediaTypeOf x =
let mediaPrefixes = ["image", "video", "audio"] in
case getMimeType x of
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index feaa0167c..03f8e8ba4 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -30,7 +30,7 @@ Conversion of 'Pandoc' documents to ODT.
-}
module Text.Pandoc.Writers.ODT ( writeODT ) where
import Data.IORef
-import Data.List ( isPrefixOf, isSuffixOf )
+import Data.List ( isPrefixOf )
import Data.Maybe ( fromMaybe )
import Text.XML.Light.Output
import Text.TeXMath
@@ -77,11 +77,7 @@ writeODT opts doc@(Pandoc meta _) = do
$ contentEntry : picEntries
-- construct META-INF/manifest.xml based on archive
let toFileEntry fp = case getMimeType fp of
- Nothing -> if "Formula-" `isPrefixOf` fp && "/" `isSuffixOf` fp
- then selfClosingTag "manifest:file-entry"
- [("manifest:media-type","application/vnd.oasis.opendocument.formula")
- ,("manifest:full-path",fp)]
- else empty
+ Nothing -> empty
Just m -> selfClosingTag "manifest:file-entry"
[("manifest:media-type", m)
,("manifest:full-path", fp)