diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-02-23 15:00:00 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-02-23 15:06:25 +0100 |
commit | 2bbf98a6132c56fd675c2427d46ff22d4f143496 (patch) | |
tree | a02d414de4387b61476f7ba2f8f1d1446d4be793 /src | |
parent | a38f84748459071d514c90e9f18431755772e523 (diff) | |
download | pandoc-2bbf98a6132c56fd675c2427d46ff22d4f143496.tar.gz |
Put makeSelfContained in PandocMonad instead of IO.
This removes the need to pass MediaBag around and improves
exceptions. It also opens up the possibility of using
makeSelfContained purely.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/App.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Logging.hs | 16 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 89 |
3 files changed, 58 insertions, 49 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index be8f26811..4c5e941e0 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -409,7 +409,7 @@ convertWithOpts opts = do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] selfcontain = if optSelfContained opts && htmlFormat - then makeSelfContained writerOptions media + then makeSelfContained writerOptions else return handleEntities = if htmlFormat && optAscii opts then toEntities diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 1f98d019e..bf3e7cb4e 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -72,6 +72,8 @@ data LogMessage = | CouldNotDetermineImageSize String String | CouldNotDetermineMimeType String | CouldNotConvertTeXMath String String + | CouldNotParseCSS String + | Fetching String deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -155,6 +157,12 @@ instance ToJSON LogMessage where ["type" .= String "CouldNotConvertTeXMath", "contents" .= Text.pack s, "message" .= Text.pack msg] + CouldNotParseCSS msg -> + ["type" .= String "CouldNotParseCSS", + "message" .= Text.pack msg] + Fetching fp -> + ["type" .= String "CouldNotParseCSS", + "path" .= Text.pack fp] showPos :: SourcePos -> String showPos pos = sn ++ "line " ++ @@ -208,6 +216,10 @@ showLogMessage msg = CouldNotConvertTeXMath s m -> "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++ if null m then "" else (':':'\n':m) + CouldNotParseCSS m -> + "Could not parse CSS" ++ if null m then "" else (':':'\n':m) + Fetching fp -> + "Fetching " ++ fp ++ "..." messageVerbosity:: LogMessage -> Verbosity messageVerbosity msg = @@ -228,5 +240,5 @@ messageVerbosity msg = CouldNotDetermineImageSize{} -> WARNING CouldNotDetermineMimeType{} -> WARNING CouldNotConvertTeXMath{} -> WARNING - - + CouldNotParseCSS{} -> WARNING + Fetching{} -> INFO diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 5258aa5f7..4ab13d760 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -40,9 +40,7 @@ import System.FilePath (takeExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Control.Monad.Trans (MonadIO(..)) -import Text.Pandoc.Shared (renderTags', err, warn, trim) -import Text.Pandoc.MediaBag (MediaBag) +import Text.Pandoc.Shared (renderTags', trim) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.UTF8 (toString) import Text.Pandoc.Options (WriterOptions(..)) @@ -50,8 +48,11 @@ import Data.List (isPrefixOf) import Control.Applicative ((<|>)) import Text.Parsec (runParserT, ParsecT) import qualified Text.Parsec as P +import Control.Monad.Except (throwError) import Control.Monad.Trans (lift) -import Text.Pandoc.Class (fetchItem, runIO, setMediaBag) +import Text.Pandoc.Class (fetchItem, PandocMonad(..), report) +import Text.Pandoc.Error +import Text.Pandoc.Logging isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c @@ -66,8 +67,8 @@ makeDataURI mime raw = then mime ++ ";charset=utf-8" else mime -- mime type already has charset -convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String) -convertTag media sourceURL t@(TagOpen tagname as) +convertTag :: PandocMonad m => Maybe String -> Tag String -> m (Tag String) +convertTag sourceURL t@(TagOpen tagname as) | tagname `elem` ["img", "embed", "video", "input", "audio", "source", "track"] = do as' <- mapM processAttribute as @@ -75,55 +76,57 @@ convertTag media sourceURL t@(TagOpen tagname as) where processAttribute (x,y) = if x == "src" || x == "data-src" || x == "href" || x == "poster" then do - enc <- getDataURI media sourceURL (fromAttrib "type" t) y + enc <- getDataURI sourceURL (fromAttrib "type" t) y return (x, enc) else return (x,y) -convertTag media sourceURL t@(TagOpen "script" as) = +convertTag sourceURL t@(TagOpen "script" as) = case fromAttrib "src" t of [] -> return t src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src + enc <- getDataURI sourceURL (fromAttrib "type" t) src return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) -convertTag media sourceURL t@(TagOpen "link" as) = +convertTag sourceURL t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do - enc <- getDataURI media sourceURL (fromAttrib "type" t) src + enc <- getDataURI sourceURL (fromAttrib "type" t) src return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) -convertTag _ _ t = return t +convertTag _ t = return t -cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString - -> IO ByteString -cssURLs media sourceURL d orig = do - res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig +cssURLs :: PandocMonad m + => Maybe String -> FilePath -> ByteString -> m ByteString +cssURLs sourceURL d orig = do + res <- runParserT (parseCSSUrls sourceURL d) () "css" orig case res of - Left e -> warn ("Could not parse CSS: " ++ show e) >> return orig + Left e -> do + report $ CouldNotParseCSS (show e) + return orig Right bs -> return bs -parseCSSUrls :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -parseCSSUrls media sourceURL d = B.concat <$> P.many - (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) +parseCSSUrls :: PandocMonad m + => Maybe String -> FilePath -> ParsecT ByteString () m ByteString +parseCSSUrls sourceURL d = B.concat <$> P.many + (pCSSWhite <|> pCSSComment <|> pCSSUrl sourceURL d <|> pCSSOther) -- Note: some whitespace in CSS is significant, so we can't collapse it! -pCSSWhite :: ParsecT ByteString () IO ByteString +pCSSWhite :: PandocMonad m => ParsecT ByteString () m ByteString pCSSWhite = B.singleton <$> P.space <* P.spaces -pCSSComment :: ParsecT ByteString () IO ByteString +pCSSComment :: PandocMonad m => ParsecT ByteString () m ByteString pCSSComment = P.try $ do P.string "/*" P.manyTill P.anyChar (P.try (P.string "*/")) return B.empty -pCSSOther :: ParsecT ByteString () IO ByteString +pCSSOther :: PandocMonad m => ParsecT ByteString () m ByteString pCSSOther = do (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|> (B.singleton <$> P.char 'u') <|> (B.singleton <$> P.char '/') -pCSSUrl :: MediaBag -> Maybe String -> FilePath - -> ParsecT ByteString () IO ByteString -pCSSUrl media sourceURL d = P.try $ do +pCSSUrl :: PandocMonad m + => Maybe String -> FilePath -> ParsecT ByteString () m ByteString +pCSSUrl sourceURL d = P.try $ do P.string "url(" P.spaces quote <- P.option Nothing (Just <$> P.oneOf "\"'") @@ -136,30 +139,24 @@ pCSSUrl media sourceURL d = P.try $ do '#':_ -> return fallback 'd':'a':'t':'a':':':_ -> return fallback u -> do let url' = if isURI u then u else d </> u - enc <- lift $ getDataURI media sourceURL "" url' + enc <- lift $ getDataURI sourceURL "" url' return (B.pack $ "url(" ++ enc ++ ")") -getDataURI :: MediaBag -> Maybe String -> MimeType -> String - -> IO String -getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri -getDataURI media sourceURL mimetype src = do +getDataURI :: PandocMonad m => Maybe String -> MimeType -> String -> m String +getDataURI _ _ src@('d':'a':'t':'a':':':_) = return src -- already data: uri +getDataURI sourceURL mimetype src = do let ext = map toLower $ takeExtension src - fetchResult <- runIO $ do setMediaBag media - fetchItem sourceURL src - (raw, respMime) <- case fetchResult of - Left msg -> err 67 $ "Could not fetch " ++ src ++ - "\n" ++ show msg - Right x -> return x + (raw, respMime) <- fetchItem sourceURL src let raw' = if ext == ".gz" then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks $ [raw] else raw - let mime = case (mimetype, respMime) of - ("",Nothing) -> error + mime <- case (mimetype, respMime) of + ("",Nothing) -> throwError $ PandocSomeError $ "Could not determine mime type for `" ++ src ++ "'" - (x, Nothing) -> x - (_, Just x ) -> x + (x, Nothing) -> return x + (_, Just x ) -> return x let cssSourceURL = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> @@ -168,14 +165,14 @@ getDataURI media sourceURL mimetype src = do uriFragment = "" } _ -> Nothing result <- if mime == "text/css" - then cssURLs media cssSourceURL (takeDirectory src) raw' + then cssURLs cssSourceURL (takeDirectory src) raw' else return raw' return $ makeDataURI mime result -- | Convert HTML into self-contained HTML, incorporating images, -- scripts, and CSS using data: URIs. -makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String -makeSelfContained opts mediabag inp = liftIO $ do +makeSelfContained :: PandocMonad m => WriterOptions -> String -> m String +makeSelfContained opts inp = do let tags = parseTags inp - out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags + out' <- mapM (convertTag (writerSourceURL opts)) tags return $ renderTags' out' |