aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/SelfContained.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/SelfContained.hs')
-rw-r--r--src/Text/Pandoc/SelfContained.hs107
1 files changed, 73 insertions, 34 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 5b8f7a75a..a77127286 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-
-Copyright (C) 2011-2014 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2011-2015 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.SelfContained
- Copyright : Copyright (C) 2011-2014 John MacFarlane
+ Copyright : Copyright (C) 2011-2015 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -40,15 +40,30 @@ import System.FilePath (takeExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (renderTags', err, fetchItem')
+import Text.Pandoc.Shared (renderTags', err, fetchItem', warn, trim)
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.UTF8 (toString, fromString)
+import Text.Pandoc.UTF8 (toString)
import Text.Pandoc.Options (WriterOptions(..))
+import Data.List (isPrefixOf)
+import Control.Applicative
+import Text.Parsec (runParserT, ParsecT)
+import qualified Text.Parsec as P
+import Control.Monad.Trans (lift)
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
+makeDataURI :: String -> ByteString -> String
+makeDataURI mime raw =
+ if textual
+ then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
+ else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
+ where textual = "text/" `Data.List.isPrefixOf` mime
+ mime' = if textual && ';' `notElem` mime
+ then mime ++ ";charset=utf-8"
+ else mime -- mime type already has charset
+
convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
convertTag media sourceURL t@(TagOpen tagname as)
| tagname `elem`
@@ -58,51 +73,75 @@ convertTag media sourceURL t@(TagOpen tagname as)
where processAttribute (x,y) =
if x == "src" || x == "href" || x == "poster"
then do
- (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) y
- let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) y
return (x, enc)
else return (x,y)
convertTag media sourceURL t@(TagOpen "script" as) =
case fromAttrib "src" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
- let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) src
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
[] -> return t
src -> do
- (raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
- let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ enc <- getDataURI media sourceURL (fromAttrib "type" t) src
return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
convertTag _ _ t = return t
--- NOTE: This is really crude, it doesn't respect CSS comments.
cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
-> IO ByteString
-cssURLs media sourceURL d orig =
- case B.breakSubstring "url(" orig of
- (x,y) | B.null y -> return orig
- | otherwise -> do
- let (u,v) = B.breakSubstring ")" $ B.drop 4 y
- let url = toString
- $ case B.take 1 u of
- "\"" -> B.takeWhile (/='"') $ B.drop 1 u
- "'" -> B.takeWhile (/='\'') $ B.drop 1 u
- _ -> u
- let url' = if isURI url
- then url
- else d </> url
- (raw, mime) <- getRaw media sourceURL "" url'
- rest <- cssURLs media sourceURL d v
- let enc = "data:" `B.append` fromString mime `B.append`
- ";base64," `B.append` (encode raw)
- return $ x `B.append` "url(" `B.append` enc `B.append` rest
-
-getRaw :: MediaBag -> Maybe String -> MimeType -> String
- -> IO (ByteString, MimeType)
-getRaw media sourceURL mimetype src = do
+cssURLs media sourceURL d orig = do
+ res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
+ case res of
+ Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
+ Right bs -> return bs
+
+parseCSSUrls :: MediaBag -> Maybe String -> FilePath
+ -> ParsecT ByteString () IO ByteString
+parseCSSUrls media sourceURL d = B.concat <$> P.many
+ (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
+
+-- Note: some whitespace in CSS is significant, so we can't collapse it!
+pCSSWhite :: ParsecT ByteString () IO ByteString
+pCSSWhite = B.singleton <$> P.space <* P.spaces
+
+pCSSComment :: ParsecT ByteString () IO ByteString
+pCSSComment = P.try $ do
+ P.string "/*"
+ P.manyTill P.anyChar (P.try (P.string "*/"))
+ return B.empty
+
+pCSSOther :: ParsecT ByteString () IO ByteString
+pCSSOther = do
+ (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
+ (B.singleton <$> P.char 'u') <|>
+ (B.singleton <$> P.char '/')
+
+pCSSUrl :: MediaBag -> Maybe String -> FilePath
+ -> ParsecT ByteString () IO ByteString
+pCSSUrl media sourceURL d = P.try $ do
+ P.string "url("
+ P.spaces
+ quote <- P.option Nothing (Just <$> P.oneOf "\"'")
+ url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote)
+ P.spaces
+ P.char ')'
+ let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
+ maybe "" (:[]) quote ++ ")")
+ case trim url of
+ '#':_ -> return fallback
+ 'd':'a':'t':'a':':':_ -> return fallback
+ u -> do let url' = if isURI u then u else d </> u
+ enc <- lift $ getDataURI media sourceURL "" url'
+ return (B.pack enc)
+
+
+getDataURI :: MediaBag -> Maybe String -> MimeType -> String
+ -> IO String
+getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri
+getDataURI media sourceURL mimetype src = do
let ext = map toLower $ takeExtension src
fetchResult <- fetchItem' media sourceURL src
(raw, respMime) <- case fetchResult of
@@ -128,7 +167,7 @@ getRaw media sourceURL mimetype src = do
result <- if mime == "text/css"
then cssURLs media cssSourceURL (takeDirectory src) raw'
else return raw'
- return (result, mime)
+ return $ makeDataURI mime result
-- | Convert HTML into self-contained HTML, incorporating images,
-- scripts, and CSS using data: URIs.