From e4913d6dba1d413715fb68fd91683ab7b321bec8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 30 Jul 2014 15:26:40 -0700 Subject: Allow --self-contained to get content from MediaBag. Added a parameter to makeSelfContained (API change). --- src/Text/Pandoc/SelfContained.hs | 59 +++++++++++++++++++++++----------------- src/Text/Pandoc/Writers/EPUB.hs | 2 +- 2 files changed, 35 insertions(+), 26 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 2a2f56281..777da3551 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -35,50 +35,53 @@ import Text.HTML.TagSoup import Network.URI (isURI, escapeURIString) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B +import qualified Data.ByteString as BS import Data.ByteString (ByteString) import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err, MediaBag) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) +import qualified Data.Map as M isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) -convertTag userdata t@(TagOpen tagname as) +convertTag :: MediaBag -> Maybe FilePath -> Tag String -> IO (Tag String) +convertTag media userdata t@(TagOpen tagname as) | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do as' <- mapM processAttribute as return $ TagOpen tagname as' where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) y + (raw, mime) <- getRaw media userdata (fromAttrib "type" t) y let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) return (x, enc) else return (x,y) -convertTag userdata t@(TagOpen "script" as) = +convertTag media userdata t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag userdata t@(TagOpen "link" as) = +convertTag media userdata t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ t = return t +convertTag _ _ t = return t -- NOTE: This is really crude, it doesn't respect CSS comments. -cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString -cssURLs userdata d orig = +cssURLs :: MediaBag -> Maybe FilePath -> FilePath -> ByteString + -> IO ByteString +cssURLs media userdata d orig = case B.breakSubstring "url(" orig of (x,y) | B.null y -> return orig | otherwise -> do @@ -91,14 +94,15 @@ cssURLs userdata d orig = let url' = if isURI url then url else d url - (raw, mime) <- getRaw userdata "" url' - rest <- cssURLs userdata d v + (raw, mime) <- getRaw media userdata "" url' + rest <- cssURLs media userdata d v let enc = "data:" `B.append` fromString mime `B.append` ";base64," `B.append` (encode raw) return $ x `B.append` "url(" `B.append` enc `B.append` rest -getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) -getItem userdata f = +getItem :: MediaBag -> Maybe FilePath -> String + -> IO (ByteString, Maybe String) +getItem media userdata f = if isURI f then openURL f >>= either handleErr return else do @@ -110,14 +114,19 @@ getItem userdata f = ".gz" -> getMimeType $ dropExtension f' x -> getMimeType x exists <- doesFileExist f' - cont <- if exists then B.readFile f' else readDataFile userdata f' + cont <- if exists + then B.readFile f' + else case M.lookup f media of + Just bs -> return $ BS.concat $ L.toChunks bs + Nothing -> readDataFile userdata f' return (cont, mime) where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e -getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) -getRaw userdata mimetype src = do +getRaw :: MediaBag -> Maybe FilePath -> String -> String + -> IO (ByteString, String) +getRaw media userdata mimetype src = do let ext = map toLower $ takeExtension src - (raw, respMime) <- getItem userdata src + (raw, respMime) <- getItem media userdata src let raw' = if ext == ".gz" then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks $ [raw] @@ -128,20 +137,20 @@ getRaw userdata mimetype src = do (x, Nothing) -> x (_, Just x ) -> x result <- if mime == "text/css" - then cssURLs userdata (takeDirectory src) raw' + then cssURLs media userdata (takeDirectory src) raw' else return raw' return (result, mime) -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. Items specified using absolute -- URLs will be downloaded; those specified using relative URLs will --- be sought first relative to the working directory, then relative +-- be sought first relative to the working directory, then in the +-- media bag, then relative -- to the user data directory (if the first parameter is 'Just' -- a directory), and finally relative to pandoc's default data -- directory. -makeSelfContained :: Maybe FilePath -> String -> IO String -makeSelfContained userdata inp = do +makeSelfContained :: MediaBag -> Maybe FilePath -> String -> IO String +makeSelfContained media userdata inp = do let tags = parseTags inp - out' <- mapM (convertTag userdata) tags + out' <- mapM (convertTag media userdata) tags return $ renderTags' out' - diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 682b61d78..1f222b8b8 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -793,7 +793,7 @@ transformInline opts mediaRef (Image lab (src,tit)) = do return $ Image lab (newsrc, tit) transformInline opts _ (x@(Math _ _)) | WebTeX _ <- writerHTMLMathMethod opts = do - raw <- makeSelfContained Nothing $ writeHtmlInline opts x + raw <- makeSelfContained M.empty Nothing $ writeHtmlInline opts x return $ RawInline (Format "html") raw transformInline opts mediaRef (RawInline fmt raw) | fmt == Format "html" = do -- cgit v1.2.3