aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/SelfContained.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/SelfContained.hs')
-rw-r--r--src/Text/Pandoc/SelfContained.hs181
1 files changed, 0 insertions, 181 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
deleted file mode 100644
index 85b298a85..000000000
--- a/src/Text/Pandoc/SelfContained.hs
+++ /dev/null
@@ -1,181 +0,0 @@
-{-# LANGUAGE OverloadedStrings #-}
-{-
-Copyright (C) 2011-2016 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.SelfContained
- Copyright : Copyright (C) 2011-2016 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for converting an HTML file into one that can be viewed
-offline, by incorporating linked images, CSS, and scripts into
-the HTML using data URIs.
--}
-module Text.Pandoc.SelfContained ( makeSelfContained ) where
-import Text.HTML.TagSoup
-import Network.URI (isURI, escapeURIString, URI(..), parseURI)
-import Data.ByteString.Base64
-import qualified Data.ByteString.Char8 as B
-import Data.ByteString (ByteString)
-import System.FilePath (takeExtension, takeDirectory, (</>))
-import Data.Char (toLower, isAscii, isAlphaNum)
-import Codec.Compression.GZip as Gzip
-import qualified Data.ByteString.Lazy as L
-import Control.Monad.Trans (MonadIO(..))
-import Text.Pandoc.Shared (renderTags', err, warn, trim)
-import Text.Pandoc.MediaBag (MediaBag)
-import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.UTF8 (toString)
-import Text.Pandoc.Options (WriterOptions(..))
-import Data.List (isPrefixOf)
-import Control.Applicative ((<|>))
-import Text.Parsec (runParserT, ParsecT)
-import qualified Text.Parsec as P
-import Control.Monad.Trans (lift)
-import Text.Pandoc.Class (fetchItem, runIO, setMediaBag)
-
-isOk :: Char -> Bool
-isOk c = isAscii c && isAlphaNum c
-
-makeDataURI :: String -> ByteString -> String
-makeDataURI mime raw =
- if textual
- then "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
- else "data:" ++ mime' ++ ";base64," ++ toString (encode raw)
- where textual = "text/" `Data.List.isPrefixOf` mime
- mime' = if textual && ';' `notElem` mime
- then mime ++ ";charset=utf-8"
- else mime -- mime type already has charset
-
-convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
-convertTag media sourceURL t@(TagOpen tagname as)
- | tagname `elem`
- ["img", "embed", "video", "input", "audio", "source", "track"] = do
- as' <- mapM processAttribute as
- return $ TagOpen tagname as'
- where processAttribute (x,y) =
- if x == "src" || x == "href" || x == "poster"
- then do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) y
- return (x, enc)
- else return (x,y)
-convertTag media sourceURL t@(TagOpen "script" as) =
- case fromAttrib "src" t of
- [] -> return t
- src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media sourceURL t@(TagOpen "link" as) =
- case fromAttrib "href" t of
- [] -> return t
- src -> do
- enc <- getDataURI media sourceURL (fromAttrib "type" t) src
- return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ _ t = return t
-
-cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
- -> IO ByteString
-cssURLs media sourceURL d orig = do
- res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
- case res of
- Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig
- Right bs -> return bs
-
-parseCSSUrls :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-parseCSSUrls media sourceURL d = B.concat <$> P.many
- (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
-
--- Note: some whitespace in CSS is significant, so we can't collapse it!
-pCSSWhite :: ParsecT ByteString () IO ByteString
-pCSSWhite = B.singleton <$> P.space <* P.spaces
-
-pCSSComment :: ParsecT ByteString () IO ByteString
-pCSSComment = P.try $ do
- P.string "/*"
- P.manyTill P.anyChar (P.try (P.string "*/"))
- return B.empty
-
-pCSSOther :: ParsecT ByteString () IO ByteString
-pCSSOther = do
- (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
- (B.singleton <$> P.char 'u') <|>
- (B.singleton <$> P.char '/')
-
-pCSSUrl :: MediaBag -> Maybe String -> FilePath
- -> ParsecT ByteString () IO ByteString
-pCSSUrl media sourceURL d = P.try $ do
- P.string "url("
- P.spaces
- quote <- P.option Nothing (Just <$> P.oneOf "\"'")
- url <- P.manyTill P.anyChar (maybe (P.lookAhead (P.char ')')) P.char quote)
- P.spaces
- P.char ')'
- let fallback = B.pack ("url(" ++ maybe "" (:[]) quote ++ trim url ++
- maybe "" (:[]) quote ++ ")")
- case trim url of
- '#':_ -> return fallback
- 'd':'a':'t':'a':':':_ -> return fallback
- u -> do let url' = if isURI u then u else d </> u
- enc <- lift $ getDataURI media sourceURL "" url'
- return (B.pack $ "url(" ++ enc ++ ")")
-
-
-getDataURI :: MediaBag -> Maybe String -> MimeType -> String
- -> IO String
-getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri
-getDataURI media sourceURL mimetype src = do
- let ext = map toLower $ takeExtension src
- fetchResult <- runIO $ do setMediaBag media
- fetchItem sourceURL src
- (raw, respMime) <- case fetchResult of
- Left msg -> err 67 $ "Could not fetch " ++ src ++
- "\n" ++ show msg
- Right x -> return x
- let raw' = if ext == ".gz"
- then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
- $ [raw]
- else raw
- let mime = case (mimetype, respMime) of
- ("",Nothing) -> error
- $ "Could not determine mime type for `" ++ src ++ "'"
- (x, Nothing) -> x
- (_, Just x ) -> x
- let cssSourceURL = case parseURI src of
- Just u
- | uriScheme u `elem` ["http:","https:"] ->
- Just $ show u{ uriPath = "",
- uriQuery = "",
- uriFragment = "" }
- _ -> Nothing
- result <- if mime == "text/css"
- then cssURLs media cssSourceURL (takeDirectory src) raw'
- else return raw'
- return $ makeDataURI mime result
-
--- | Convert HTML into self-contained HTML, incorporating images,
--- scripts, and CSS using data: URIs.
-makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String
-makeSelfContained opts mediabag inp = liftIO $ do
- let tags = parseTags inp
- out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags
- return $ renderTags' out'