aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-07-30 15:26:40 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-07-30 15:26:40 -0700
commite4913d6dba1d413715fb68fd91683ab7b321bec8 (patch)
tree8e20d225d3678e12bd8f176166981b09522e20d6 /src
parent23d806644f07e1df35b946bfac63d877005df443 (diff)
downloadpandoc-e4913d6dba1d413715fb68fd91683ab7b321bec8.tar.gz
Allow --self-contained to get content from MediaBag.
Added a parameter to makeSelfContained (API change).
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/SelfContained.hs59
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs2
2 files changed, 35 insertions, 26 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 2a2f56281..777da3551 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -35,50 +35,53 @@ import Text.HTML.TagSoup
import Network.URI (isURI, escapeURIString)
import Data.ByteString.Base64
import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString as BS
import Data.ByteString (ByteString)
import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err)
+import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag)
import Text.Pandoc.UTF8 (toString, fromString)
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
+import qualified Data.Map as M
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
-convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
-convertTag userdata t@(TagOpen tagname as)
+convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String)
+convertTag media userdata t@(TagOpen tagname as)
| tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do
as' <- mapM processAttribute as
return $ TagOpen tagname as'
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) y
+ (raw, mime) <- getRaw media userdata (fromAttrib "type" t) y
let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
return (x, enc)
else return (x,y)
-convertTag userdata t@(TagOpen "script" as) =
+convertTag media userdata t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag userdata t@(TagOpen "link" as) =
+convertTag media userdata t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ t = return t
+convertTag _ _ t = return t
-- NOTE: This is really crude, it doesn't respect CSS comments.
-cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
-cssURLs userdata d orig =
+cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString
+ -> IO ByteString
+cssURLs media userdata d orig =
case B.breakSubstring "url(" orig of
(x,y) | B.null y -> return orig
| otherwise -> do
@@ -91,14 +94,15 @@ cssURLs userdata d orig =
let url' = if isURI url
then url
else d </> url
- (raw, mime) <- getRaw userdata "" url'
- rest <- cssURLs userdata d v
+ (raw, mime) <- getRaw media userdata "" url'
+ rest <- cssURLs media userdata d v
let enc = "data:" `B.append` fromString mime `B.append`
";base64," `B.append` (encode raw)
return $ x `B.append` "url(" `B.append` enc `B.append` rest
-getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
-getItem userdata f =
+getItem :: MediaBag -> Maybe FilePath -> String
+ -> IO (ByteString, Maybe String)
+getItem media userdata f =
if isURI f
then openURL f >>= either handleErr return
else do
@@ -110,14 +114,19 @@ getItem userdata f =
".gz" -> getMimeType $ dropExtension f'
x -> getMimeType x
exists <- doesFileExist f'
- cont <- if exists then B.readFile f' else readDataFile userdata f'
+ cont <- if exists
+ then B.readFile f'
+ else case M.lookup f media of
+ Just bs -> return $ BS.concat $ L.toChunks bs
+ Nothing -> readDataFile userdata f'
return (cont, mime)
where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e
-getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
-getRaw userdata mimetype src = do
+getRaw :: MediaBag -> Maybe FilePath -> String -> String
+ -> IO (ByteString, String)
+getRaw media userdata mimetype src = do
let ext = map toLower $ takeExtension src
- (raw, respMime) <- getItem userdata src
+ (raw, respMime) <- getItem media userdata src
let raw' = if ext == ".gz"
then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
$ [raw]
@@ -128,20 +137,20 @@ getRaw userdata mimetype src = do
(x, Nothing) -> x
(_, Just x ) -> x
result <- if mime == "text/css"
- then cssURLs userdata (takeDirectory src) raw'
+ then cssURLs media userdata (takeDirectory src) raw'
else return raw'
return (result, mime)
-- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs. Items specified using absolute
-- URLs will be downloaded; those specified using relative URLs will
--- be sought first relative to the working directory, then relative
+-- be sought first relative to the working directory, then in the
+-- media bag, then relative
-- to the user data directory (if the first parameter is 'Just'
-- a directory), and finally relative to pandoc's default data
-- directory.
-makeSelfContained :: Maybe FilePath -> String -> IO String
-makeSelfContained userdata inp = do
+makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String
+makeSelfContained media userdata inp = do
let tags = parseTags inp
- out' <- mapM (convertTag userdata) tags
+ out' <- mapM (convertTag media userdata) tags
return $ renderTags' out'
-
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 682b61d78..1f222b8b8 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -793,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
- raw <- makeSelfContained Nothing $ writeHtmlInline opts x
+ raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline opts mediaRef (RawInline fmt raw)
| fmt == Format "html" = do