diff options
Diffstat (limited to 'src/Text/Pandoc/SelfContained.hs')
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 107 |
1 files changed, 73 insertions, 34 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 5b8f7a75a..a77127286 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2014 John MacFarlane + Copyright : Copyright (C) 2011-2015 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -40,15 +40,30 @@ import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', err, fetchItem') +import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.UTF8 (toString, fromString) +import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Options (WriterOptions(..)) +import Data.List (isPrefixOf) +import Control.Applicative +import Text.Parsec (runParserT, ParsecT) +import qualified Text.Parsec as P +import Control.Monad.Trans (lift) isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c +makeDataURI :: String -> ByteString -> String +makeDataURI mime raw = + if textual + then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw) + else "data:" ++ mime' ++ ";base64," ++ toString (encode raw) + where textual = "text/" `Data.List.isPrefixOf` mime + mime' = if textual && ';' `notElem` mime + then mime ++ ";charset=utf-8" + else mime -- mime type already has charset + convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) convertTag media sourceURL t@(TagOpen tagname as) | tagname `elem` @@ -58,51 +73,75 @@ convertTag media sourceURL t@(TagOpen tagname as) where processAttribute (x,y) = if x == "src" || x == "href" || x == "poster" then do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y - let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) y return (x, enc) else return (x,y) convertTag media sourceURL t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) src return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) convertTag media sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src - let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + enc <- getDataURI media sourceURL (fromAttrib "type" t) src return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) convertTag _ _ t = return t --- NOTE: This is really crude, it doesn't respect CSS comments. cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString -> IO ByteString -cssURLs media sourceURL d orig = - case B.breakSubstring "url(" orig of - (x,y) | B.null y -> return orig - | otherwise -> do - let (u,v) = B.breakSubstring ")" $ B.drop 4 y - let url = toString - $ case B.take 1 u of - "\"" -> B.takeWhile (/='"') $ B.drop 1 u - "'" -> B.takeWhile (/='\'') $ B.drop 1 u - _ -> u - let url' = if isURI url - then url - else d </> url - (raw, mime) <- getRaw media sourceURL "" url' - rest <- cssURLs media sourceURL d v - let enc = "data:" `B.append` fromString mime `B.append` - ";base64," `B.append` (encode raw) - return $ x `B.append` "url(" `B.append` enc `B.append` rest - -getRaw :: MediaBag -> Maybe String -> MimeType -> String - -> IO (ByteString, MimeType) -getRaw media sourceURL mimetype src = do +cssURLs media sourceURL d orig = do + res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig + case res of + Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig + Right bs -> return bs + +parseCSSUrls :: MediaBag -> Maybe String -> FilePath + -> ParsecT ByteString () IO ByteString +parseCSSUrls media sourceURL d = B.concat <$> P.many + (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) + +-- Note: some whitespace in CSS is significant, so we can't collapse it! +pCSSWhite :: ParsecT ByteString () IO ByteString +pCSSWhite = B.singleton <$> P.space <* P.spaces + +pCSSComment :: ParsecT ByteString () IO ByteString +pCSSComment = P.try $ do + P.string "/*" + P.manyTill P.anyChar (P.try (P.string "*/")) + return B.empty + +pCSSOther :: ParsecT ByteString () IO ByteString +pCSSOther = do + (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|> + (B.singleton <$> P.char 'u') <|> + (B.singleton <$> P.char '/') + +pCSSUrl :: MediaBag -> Maybe String -> FilePath + -> ParsecT ByteString () IO ByteString +pCSSUrl media sourceURL d = P.try $ do + P.string "url(" + P.spaces + quote <- P.option Nothing (Just <$> P.oneOf "\"'") + url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote) + P.spaces + P.char ')' + let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++ + maybe "" (:[]) quote ++ ")") + case trim url of + '#':_ -> return fallback + 'd':'a':'t':'a':':':_ -> return fallback + u -> do let url' = if isURI u then u else d </> u + enc <- lift $ getDataURI media sourceURL "" url' + return (B.pack enc) + + +getDataURI :: MediaBag -> Maybe String -> MimeType -> String + -> IO String +getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri +getDataURI media sourceURL mimetype src = do let ext = map toLower $ takeExtension src fetchResult <- fetchItem' media sourceURL src (raw, respMime) <- case fetchResult of @@ -128,7 +167,7 @@ getRaw media sourceURL mimetype src = do result <- if mime == "text/css" then cssURLs media cssSourceURL (takeDirectory src) raw' else return raw' - return (result, mime) + return $ makeDataURI mime result -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. |