From 2bbf98a6132c56fd675c2427d46ff22d4f143496 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Thu, 23 Feb 2017 15:00:00 +0100
Subject: Put makeSelfContained in PandocMonad instead of IO.

This removes the need to pass MediaBag around and improves
exceptions.  It also opens up the possibility of using
makeSelfContained purely.
---
 src/Text/Pandoc/App.hs           |  2 +-
 src/Text/Pandoc/Logging.hs       | 16 +++++++-
 src/Text/Pandoc/SelfContained.hs | 89 +++++++++++++++++++---------------------
 3 files changed, 58 insertions(+), 49 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index be8f26811..4c5e941e0 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -409,7 +409,7 @@ convertWithOpts opts = do
                 let htmlFormat = format `elem`
                       ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
                     selfcontain = if optSelfContained opts && htmlFormat
-                                  then makeSelfContained writerOptions media
+                                  then makeSelfContained writerOptions
                                   else return
                     handleEntities = if htmlFormat && optAscii opts
                                      then toEntities
diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs
index 1f98d019e..bf3e7cb4e 100644
--- a/src/Text/Pandoc/Logging.hs
+++ b/src/Text/Pandoc/Logging.hs
@@ -72,6 +72,8 @@ data LogMessage =
   | CouldNotDetermineImageSize String String
   | CouldNotDetermineMimeType String
   | CouldNotConvertTeXMath String String
+  | CouldNotParseCSS String
+  | Fetching String
   deriving (Show, Eq, Data, Ord, Typeable, Generic)
 
 instance ToJSON LogMessage where
@@ -155,6 +157,12 @@ instance ToJSON LogMessage where
            ["type" .= String "CouldNotConvertTeXMath",
             "contents" .= Text.pack s,
             "message" .= Text.pack msg]
+      CouldNotParseCSS msg ->
+           ["type" .= String "CouldNotParseCSS",
+            "message" .= Text.pack msg]
+      Fetching fp ->
+           ["type" .= String "CouldNotParseCSS",
+            "path" .= Text.pack fp]
 
 showPos :: SourcePos -> String
 showPos pos = sn ++ "line " ++
@@ -208,6 +216,10 @@ showLogMessage msg =
        CouldNotConvertTeXMath s m ->
          "Could not convert TeX math '" ++ s ++ "', rendering as TeX" ++
            if null m then "" else (':':'\n':m)
+       CouldNotParseCSS m ->
+         "Could not parse CSS" ++ if null m then "" else (':':'\n':m)
+       Fetching fp ->
+         "Fetching " ++ fp ++ "..."
 
 messageVerbosity:: LogMessage -> Verbosity
 messageVerbosity msg =
@@ -228,5 +240,5 @@ messageVerbosity msg =
        CouldNotDetermineImageSize{} -> WARNING
        CouldNotDetermineMimeType{} -> WARNING
        CouldNotConvertTeXMath{} -> WARNING
-
-
+       CouldNotParseCSS{} -> WARNING
+       Fetching{} -> INFO
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 5258aa5f7..4ab13d760 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -40,9 +40,7 @@ import System.FilePath (takeExtension, takeDirectory, (</>))
 import Data.Char (toLower, isAscii, isAlphaNum)
 import Codec.Compression.GZip as Gzip
 import qualified Data.ByteString.Lazy as L
-import Control.Monad.Trans (MonadIO(..))
-import Text.Pandoc.Shared (renderTags', err, warn, trim)
-import Text.Pandoc.MediaBag (MediaBag)
+import Text.Pandoc.Shared (renderTags', trim)
 import Text.Pandoc.MIME (MimeType)
 import Text.Pandoc.UTF8 (toString)
 import Text.Pandoc.Options (WriterOptions(..))
@@ -50,8 +48,11 @@ import Data.List (isPrefixOf)
 import Control.Applicative ((<|>))
 import Text.Parsec (runParserT, ParsecT)
 import qualified Text.Parsec as P
+import Control.Monad.Except (throwError)
 import Control.Monad.Trans (lift)
-import Text.Pandoc.Class (fetchItem, runIO, setMediaBag)
+import Text.Pandoc.Class (fetchItem, PandocMonad(..), report)
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
 
 isOk :: Char -> Bool
 isOk c = isAscii c && isAlphaNum c
@@ -66,8 +67,8 @@ makeDataURI mime raw =
                    then mime ++ ";charset=utf-8"
                    else mime  -- mime type already has charset
 
-convertTag :: MediaBag -> Maybe String -> Tag String -> IO (Tag String)
-convertTag media sourceURL t@(TagOpen tagname as)
+convertTag :: PandocMonad m => Maybe String -> Tag String -> m (Tag String)
+convertTag sourceURL t@(TagOpen tagname as)
   | tagname `elem`
      ["img", "embed", "video", "input", "audio", "source", "track"] = do
        as' <- mapM processAttribute as
@@ -75,55 +76,57 @@ convertTag media sourceURL t@(TagOpen tagname as)
   where processAttribute (x,y) =
            if x == "src" || x == "data-src" || x == "href" || x == "poster"
               then do
-                enc <- getDataURI media sourceURL (fromAttrib "type" t) y
+                enc <- getDataURI sourceURL (fromAttrib "type" t) y
                 return (x, enc)
               else return (x,y)
-convertTag media sourceURL t@(TagOpen "script" as) =
+convertTag sourceURL t@(TagOpen "script" as) =
   case fromAttrib "src" t of
        []     -> return t
        src    -> do
-           enc <- getDataURI media sourceURL (fromAttrib "type" t) src
+           enc <- getDataURI sourceURL (fromAttrib "type" t) src
            return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
-convertTag media sourceURL t@(TagOpen "link" as) =
+convertTag sourceURL t@(TagOpen "link" as) =
   case fromAttrib "href" t of
        []  -> return t
        src -> do
-           enc <- getDataURI media sourceURL (fromAttrib "type" t) src
+           enc <- getDataURI sourceURL (fromAttrib "type" t) src
            return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
-convertTag _ _ t = return t
+convertTag _ t = return t
 
-cssURLs :: MediaBag -> Maybe String -> FilePath -> ByteString
-        -> IO ByteString
-cssURLs media sourceURL d orig = do
-  res <- runParserT (parseCSSUrls media sourceURL d) () "css" orig
+cssURLs :: PandocMonad m
+        => Maybe String -> FilePath -> ByteString -> m ByteString
+cssURLs sourceURL d orig = do
+  res <- runParserT (parseCSSUrls sourceURL d) () "css" orig
   case res of
-       Left e    -> warn ("Could not parse CSS: " ++ show e) >> return orig
+       Left e    -> do
+         report $ CouldNotParseCSS (show e)
+         return orig
        Right bs  -> return bs
 
-parseCSSUrls :: MediaBag -> Maybe String -> FilePath
-             -> ParsecT ByteString () IO ByteString
-parseCSSUrls media sourceURL d = B.concat <$> P.many
-    (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther)
+parseCSSUrls :: PandocMonad m
+             => Maybe String -> FilePath -> ParsecT ByteString () m ByteString
+parseCSSUrls sourceURL d = B.concat <$> P.many
+    (pCSSWhite <|> pCSSComment <|> pCSSUrl sourceURL d <|> pCSSOther)
 
 -- Note: some whitespace in CSS is significant, so we can't collapse it!
-pCSSWhite :: ParsecT ByteString () IO ByteString
+pCSSWhite :: PandocMonad m => ParsecT ByteString () m ByteString
 pCSSWhite = B.singleton <$> P.space <* P.spaces
 
-pCSSComment :: ParsecT ByteString () IO ByteString
+pCSSComment :: PandocMonad m => ParsecT ByteString () m ByteString
 pCSSComment = P.try $ do
   P.string "/*"
   P.manyTill P.anyChar (P.try (P.string "*/"))
   return B.empty
 
-pCSSOther :: ParsecT ByteString () IO ByteString
+pCSSOther :: PandocMonad m => ParsecT ByteString () m ByteString
 pCSSOther = do
   (B.pack <$> P.many1 (P.noneOf "u/ \n\r\t")) <|>
     (B.singleton <$> P.char 'u') <|>
     (B.singleton <$> P.char '/')
 
-pCSSUrl :: MediaBag -> Maybe String -> FilePath
-        -> ParsecT ByteString () IO ByteString
-pCSSUrl media sourceURL d = P.try $ do
+pCSSUrl :: PandocMonad m
+        => Maybe String -> FilePath -> ParsecT ByteString () m ByteString
+pCSSUrl sourceURL d = P.try $ do
   P.string "url("
   P.spaces
   quote <- P.option Nothing (Just <$> P.oneOf "\"'")
@@ -136,30 +139,24 @@ pCSSUrl media sourceURL d = P.try $ do
       '#':_ -> return fallback
       'd':'a':'t':'a':':':_ -> return fallback
       u ->  do let url' = if isURI u then u else d </> u
-               enc <- lift $ getDataURI media sourceURL "" url'
+               enc <- lift $ getDataURI sourceURL "" url'
                return (B.pack $ "url(" ++ enc ++ ")")
 
 
-getDataURI :: MediaBag -> Maybe String -> MimeType -> String
-       -> IO String
-getDataURI _ _ _ src@('d':'a':'t':'a':':':_) = return src  -- already data: uri
-getDataURI media sourceURL mimetype src = do
+getDataURI :: PandocMonad m => Maybe String -> MimeType -> String -> m String
+getDataURI _ _ src@('d':'a':'t':'a':':':_) = return src  -- already data: uri
+getDataURI sourceURL mimetype src = do
   let ext = map toLower $ takeExtension src
-  fetchResult <- runIO $ do setMediaBag media
-                            fetchItem sourceURL src
-  (raw, respMime) <- case fetchResult of
-                          Left msg -> err 67 $ "Could not fetch " ++ src ++
-                                               "\n" ++ show msg
-                          Right x  -> return x
+  (raw, respMime) <- fetchItem sourceURL src
   let raw' = if ext == ".gz"
                 then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
                       $ [raw]
                 else raw
-  let mime = case (mimetype, respMime) of
-                  ("",Nothing) -> error
+  mime <- case (mimetype, respMime) of
+                  ("",Nothing) -> throwError $ PandocSomeError
                          $ "Could not determine mime type for `" ++ src ++ "'"
-                  (x, Nothing) -> x
-                  (_, Just x ) -> x
+                  (x, Nothing) -> return x
+                  (_, Just x ) -> return x
   let cssSourceURL = case parseURI src of
                           Just u
                             | uriScheme u `elem` ["http:","https:"] ->
@@ -168,14 +165,14 @@ getDataURI media sourceURL mimetype src = do
                                                uriFragment = "" }
                           _ -> Nothing
   result <- if mime == "text/css"
-               then cssURLs media cssSourceURL (takeDirectory src) raw'
+               then cssURLs cssSourceURL (takeDirectory src) raw'
                else return raw'
   return $ makeDataURI mime result
 
 -- | Convert HTML into self-contained HTML, incorporating images,
 -- scripts, and CSS using data: URIs.
-makeSelfContained :: MonadIO m => WriterOptions -> MediaBag -> String -> m String
-makeSelfContained opts mediabag inp = liftIO $ do
+makeSelfContained :: PandocMonad m => WriterOptions -> String -> m String
+makeSelfContained opts inp = do
   let tags = parseTags inp
-  out' <- mapM (convertTag mediabag (writerSourceURL opts)) tags
+  out' <- mapM (convertTag (writerSourceURL opts)) tags
   return $ renderTags' out'
-- 
cgit v1.2.3