diff options
Diffstat (limited to 'src/Text/Pandoc/SelfContained.hs')
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 86 |
1 files changed, 38 insertions, 48 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 2a2f56281..1a4e037cf 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -32,53 +32,54 @@ the HTML using data URIs. -} module Text.Pandoc.SelfContained ( makeSelfContained ) where import Text.HTML.TagSoup -import Network.URI (isURI, escapeURIString) +import Network.URI (isURI, escapeURIString, URI(..), parseURI) import Data.ByteString.Base64 import qualified Data.ByteString.Char8 as B import Data.ByteString (ByteString) -import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) +import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) +import Text.Pandoc.Shared (renderTags', err, fetchItem') +import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.UTF8 (toString, fromString) -import Text.Pandoc.MIME (getMimeType) -import System.Directory (doesFileExist) +import Text.Pandoc.Options (WriterOptions(..)) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c -convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) -convertTag userdata t@(TagOpen tagname as) +convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) +convertTag media sourceURL t@(TagOpen tagname as) | tagname `elem` ["img", "embed", "video", "input", "audio", "source"] = do as' <- mapM processAttribute as return $ TagOpen tagname as' where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) y + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) return (x, enc) else return (x,y) -convertTag userdata t@(TagOpen "script" as) = +convertTag media sourceURL t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag userdata t@(TagOpen "link" as) = +convertTag media sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ t = return t +convertTag _ _ t = return t -- NOTE: This is really crude, it doesn't respect CSS comments. -cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString -cssURLs userdata d orig = +cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString + -> IO ByteString +cssURLs media sourceURL d orig = case B.breakSubstring "url(" orig of (x,y) | B.null y -> return orig | otherwise -> do @@ -91,33 +92,21 @@ cssURLs userdata d orig = let url' = if isURI url then url else d </> url - (raw, mime) <- getRaw userdata "" url' - rest <- cssURLs userdata d v + (raw, mime) <- getRaw media sourceURL "" url' + rest <- cssURLs media sourceURL d v let enc = "data:" `B.append` fromString mime `B.append` ";base64," `B.append` (encode raw) return $ x `B.append` "url(" `B.append` enc `B.append` rest -getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) -getItem userdata f = - if isURI f - then openURL f >>= either handleErr return - else do - -- strip off trailing query or fragment part, if relative URL. - -- this is needed for things like cmunrm.eot?#iefix, - -- which is used to get old versions of IE to work with web fonts. - let f' = takeWhile (\c -> c /= '?' && c /= '#') f - let mime = case takeExtension f' of - ".gz" -> getMimeType $ dropExtension f' - x -> getMimeType x - exists <- doesFileExist f' - cont <- if exists then B.readFile f' else readDataFile userdata f' - return (cont, mime) - where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e - -getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) -getRaw userdata mimetype src = do +getRaw :: MediaBag -> Maybe String -> String -> String + -> IO (ByteString, String) +getRaw media sourceURL mimetype src = do let ext = map toLower $ takeExtension src - (raw, respMime) <- getItem userdata src + fetchResult <- fetchItem' media sourceURL src + (raw, respMime) <- case fetchResult of + Left msg -> err 67 $ "Could not fetch " ++ src ++ + "\n" ++ show msg + Right x -> return x let raw' = if ext == ".gz" then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks $ [raw] @@ -127,21 +116,22 @@ getRaw userdata mimetype src = do $ "Could not determine mime type for `" ++ src ++ "'" (x, Nothing) -> x (_, Just x ) -> x + let cssSourceURL = case parseURI src of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriPath = "", + uriQuery = "", + uriFragment = "" } + _ -> Nothing result <- if mime == "text/css" - then cssURLs userdata (takeDirectory src) raw' + then cssURLs media cssSourceURL (takeDirectory src) raw' else return raw' return (result, mime) -- | Convert HTML into self-contained HTML, incorporating images, --- scripts, and CSS using data: URIs. Items specified using absolute --- URLs will be downloaded; those specified using relative URLs will --- be sought first relative to the working directory, then relative --- to the user data directory (if the first parameter is 'Just' --- a directory), and finally relative to pandoc's default data --- directory. -makeSelfContained :: Maybe FilePath -> String -> IO String -makeSelfContained userdata inp = do +-- scripts, and CSS using data: URIs. +makeSelfContained :: WriterOptions -> String -> IO String +makeSelfContained opts inp = do let tags = parseTags inp - out' <- mapM (convertTag userdata) tags + out' <- mapM (convertTag (writerMediaBag opts) (writerSourceURL opts)) tags return $ renderTags' out' - |