From 7c980f39bf1cff941d3e78056fd69e0b371833e3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 18 Jul 2013 20:58:14 -0700 Subject: Improved fetching of external resources. * In Shared, openURL and fetchItem now return an Either, for better error handling. (API change.) * Better error message when fetching a URL fails with `--self-contained`. * EPUB writer: If resource not found, skip it, as in Docx writer. * Closes #916. --- src/Text/Pandoc/SelfContained.hs | 5 +++-- src/Text/Pandoc/Shared.hs | 16 +++++++++------- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 13 +++++++++---- src/Text/Pandoc/Writers/ODT.hs | 5 ++--- 5 files changed, 24 insertions(+), 17 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index c4613992a..0547bc065 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -40,7 +40,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (renderTags', openURL, readDataFile) +import Text.Pandoc.Shared (renderTags', openURL, readDataFile, err) import Text.Pandoc.UTF8 (toString, fromString) import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -98,7 +98,7 @@ cssURLs userdata d orig = getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) getItem userdata f = if isAbsoluteURI f - then openURL f + then openURL f >>= either handleErr return else do -- strip off trailing query or fragment part, if relative URL. -- this is needed for things like cmunrm.eot?#iefix, @@ -110,6 +110,7 @@ getItem userdata f = exists <- doesFileExist f' cont <- if exists then B.readFile f' else readDataFile userdata f' return (cont, mime) + where handleErr e = err 61 $ "Failed to retrieve " ++ f ++ "\n" ++ show e getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) getRaw userdata mimetype src = do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 09086da1f..0f2e16d2e 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -95,6 +95,7 @@ import Text.Pandoc.MIME (getMimeType) import System.FilePath ( (), takeExtension, dropExtension ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S +import qualified Control.Exception as E import Control.Monad (msum, unless) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) @@ -586,12 +587,13 @@ readDataFileUTF8 userDir fname = -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. -fetchItem :: String -> String -> IO (BS.ByteString, Maybe String) +fetchItem :: String -> String + -> IO (Either E.SomeException (BS.ByteString, Maybe String)) fetchItem sourceDir s = case s of _ | isAbsoluteURI s -> openURL s | isAbsoluteURI sourceDir -> openURL $ sourceDir ++ "/" ++ s - | otherwise -> do + | otherwise -> E.try $ do let mime = case takeExtension s of ".gz" -> getMimeType $ dropExtension s x -> getMimeType x @@ -600,21 +602,21 @@ fetchItem sourceDir s = return (cont, mime) -- | Read from a URL and return raw data and maybe mime type. -openURL :: String -> IO (BS.ByteString, Maybe String) +openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe String)) openURL u | "data:" `isPrefixOf` u = let mime = takeWhile (/=',') $ drop 5 u contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u - in return (contents, Just mime) + in return $ Right (contents, Just mime) #ifdef HTTP_CONDUIT - | otherwise = do + | otherwise = E.try $ do req <- parseUrl u resp <- withManager $ httpLbs req return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else - | otherwise = getBodyAndMimeType `fmap` browse - (do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." + | otherwise = E.try $ getBodyAndMimeType `fmap` browse + (do UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1ed8c2fa5..611cddc65 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -726,7 +726,7 @@ inlineToOpenXML opts (Image alt (src, tit)) = do Just (_,_,_,elt,_) -> return [elt] Nothing -> do let sourceDir = writerSourceDirectory opts - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- liftIO $ fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index f171a2560..42863ef86 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -123,10 +123,15 @@ writeEPUB opts doc@(Pandoc meta _) = do Pandoc _ blocks <- bottomUpM (transformInline opts' sourceDir picsRef) doc pics <- readIORef picsRef - let readPicEntry (oldsrc, newsrc) = do - (img,_) <- fetchItem sourceDir oldsrc - return $ toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img - picEntries <- mapM readPicEntry pics + let readPicEntry entries (oldsrc, newsrc) = do + res <- fetchItem sourceDir oldsrc + case res of + Left e -> do + warn $ "Could not find image `" ++ oldsrc ++ "', skipping..." + return entries + Right (img,_) -> return $ + (toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries + picEntries <- foldM readPicEntry [] pics -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index db27286e8..589010bb9 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import Control.Monad (liftM) -import Control.Monad.Trans (liftIO) import Text.Pandoc.XML import Text.Pandoc.Pretty import qualified Control.Exception as E @@ -114,10 +113,10 @@ writeODT opts doc@(Pandoc meta _) = do transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline transformPic sourceDir entriesRef (Image lab (src,_)) = do - res <- liftIO $ E.try $ fetchItem sourceDir src + res <- fetchItem sourceDir src case res of Left (_ :: E.SomeException) -> do - liftIO $ warn $ "Could not find image `" ++ src ++ "', skipping..." + warn $ "Could not find image `" ++ src ++ "', skipping..." return $ Emph lab Right (img, _) -> do let size = imageSize img -- cgit v1.2.3