aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class/PandocIO.hs
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2020-03-21 14:26:28 +0100
committerAlbert Krewinkel <albert@zeitkraut.de>2020-03-22 14:59:38 +0100
commit2a042ff711ec9724a8b63b7cb4d29109379330c5 (patch)
tree3f13c755e249c9b66c442431c72a4250aeec1696 /src/Text/Pandoc/Class/PandocIO.hs
parent66375f3fc4130a68cdcf70111858146583ac2ef3 (diff)
downloadpandoc-2a042ff711ec9724a8b63b7cb4d29109379330c5.tar.gz
Text.Pandoc.Class: extract submodules PandocIO, PandocPure
Diffstat (limited to 'src/Text/Pandoc/Class/PandocIO.hs')
-rw-r--r--src/Text/Pandoc/Class/PandocIO.hs218
1 files changed, 218 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs
new file mode 100644
index 000000000..1cbfd680e
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocIO.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Class.PandocIO
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines @'PandocIO'@, an IO-based instance of the
+@'PandocMonad'@ type class. File, data, and network access all are run
+using IO operators.
+-}
+module Text.Pandoc.Class.PandocIO
+ ( getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , setTrace
+ , setRequestHeader
+ , getLog
+ , setVerbosity
+ , getVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , setUserDataDir
+ , getUserDataDir
+ , fetchItem
+ , getInputFiles
+ , setInputFiles
+ , getOutputFile
+ , setOutputFile
+ , setResourcePath
+ , getResourcePath
+ , PandocIO(..)
+ , runIO
+ , runIOorExplode
+ , extractMedia
+ ) where
+
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.ByteString.Base64 (decodeLenient)
+import Data.ByteString.Lazy (toChunks)
+import Data.Default
+import Data.Text (Text)
+import Data.Unique (hashUnique)
+import Network.HTTP.Client
+ (httpLbs, responseBody, responseHeaders,
+ Request(port, host, requestHeaders), parseRequest, newManager)
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import Network.HTTP.Types.Header ( hContentType )
+import Network.Socket (withSocketsDo)
+import Network.URI ( unEscapeString )
+import Prelude
+import System.Directory (createDirectoryIfMissing)
+import System.Environment (getEnv)
+import System.FilePath ((</>), takeDirectory, normalise)
+import System.IO (stderr)
+import System.IO.Error
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Walk (walk)
+import qualified Control.Exception as E
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Text as T
+import qualified Data.Time as IO (getCurrentTime)
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import qualified Data.Unique as IO (newUnique)
+import qualified System.Directory as Directory
+import qualified System.Directory as IO (getModificationTime)
+import qualified System.Environment as IO (lookupEnv)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.Random as IO (newStdGen)
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifndef EMBED_DATA_FILES
+import qualified Paths_pandoc as Paths
+#endif
+
+-- | Evaluate a 'PandocIO' operation.
+runIO :: PandocIO a -> IO (Either PandocError a)
+runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
+
+-- | Evaluate a 'PandocIO' operation, handling any errors
+-- by exiting with an appropriate message and error status.
+runIOorExplode :: PandocIO a -> IO a
+runIOorExplode ma = runIO ma >>= handleError
+
+newtype PandocIO a = PandocIO {
+ unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
+ } deriving ( MonadIO
+ , Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Utility function to lift IO errors into 'PandocError's.
+liftIOError :: (String -> IO a) -> String -> PandocIO a
+liftIOError f u = do
+ res <- liftIO $ tryIOError $ f u
+ case res of
+ Left e -> throwError $ PandocIOError (T.pack u) e
+ Right r -> return r
+
+-- | Show potential IO errors to the user continuing execution anyway
+logIOError :: IO () -> PandocIO ()
+logIOError f = do
+ res <- liftIO $ tryIOError f
+ case res of
+ Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
+ Right _ -> pure ()
+
+instance PandocMonad PandocIO where
+ lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
+ getCurrentTime = liftIO IO.getCurrentTime
+ getCurrentTimeZone = liftIO IO.getCurrentTimeZone
+ newStdGen = liftIO IO.newStdGen
+ newUniqueHash = hashUnique <$> liftIO IO.newUnique
+
+ openURL u
+ | Just u'' <- T.stripPrefix "data:" u = do
+ let mime = T.takeWhile (/=',') u''
+ let contents = UTF8.fromString $
+ unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
+ return (decodeLenient contents, Just mime)
+ | otherwise = do
+ let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
+ customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
+ report $ Fetching u
+ res <- liftIO $ E.try $ withSocketsDo $ do
+ let parseReq = parseRequest
+ proxy <- tryIOError (getEnv "http_proxy")
+ let addProxy' x = case proxy of
+ Left _ -> return x
+ Right pr -> parseReq pr >>= \r ->
+ return (addProxy (host r) (port r) x)
+ req <- parseReq (T.unpack u) >>= addProxy'
+ let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
+ resp <- newManager tlsManagerSettings >>= httpLbs req'
+ return (B.concat $ toChunks $ responseBody resp,
+ UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
+
+ case res of
+ Right r -> return r
+ Left e -> throwError $ PandocHttpError u e
+
+ readFileLazy s = liftIOError BL.readFile s
+ readFileStrict s = liftIOError B.readFile s
+
+ glob = liftIOError IO.glob
+ fileExists = liftIOError Directory.doesFileExist
+#ifdef EMBED_DATA_FILES
+ getDataFileName = return
+#else
+ getDataFileName = liftIOError Paths.getDataFileName
+#endif
+ getModificationTime = liftIOError IO.getModificationTime
+ getCommonState = PandocIO $ lift get
+ putCommonState x = PandocIO $ lift $ put x
+ logOutput msg = liftIO $ do
+ UTF8.hPutStr stderr $
+ "[" ++ show (messageVerbosity msg) ++ "] "
+ alertIndent $ T.lines $ showLogMessage msg
+
+-- | Prints the list of lines to @stderr@, indenting every but the first
+-- line by two spaces.
+alertIndent :: [Text] -> IO ()
+alertIndent [] = return ()
+alertIndent (l:ls) = do
+ UTF8.hPutStrLn stderr $ T.unpack l
+ mapM_ go ls
+ where go l' = do UTF8.hPutStr stderr " "
+ UTF8.hPutStrLn stderr $ T.unpack l'
+
+-- | Extract media from the mediabag into a directory.
+extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
+extractMedia dir d = do
+ media <- getMediaBag
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ mapM_ (writeMedia dir media) fps
+ return $ walk (adjustImagePath dir fps) d
+
+-- | Write the contents of a media bag to a path.
+writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
+writeMedia dir mediabag subpath = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> unEscapeString (normalise subpath)
+ let mbcontents = lookupMedia subpath mediabag
+ case mbcontents of
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
+ Just (_, bs) -> do
+ report $ Extracting $ T.pack fullpath
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ logIOError $ BL.writeFile fullpath bs
+
+-- | If the given Inline element is an image with a @src@ path equal to
+-- one in the list of @paths@, then prepends @dir@ to the image source;
+-- returns the element unchanged otherwise.
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
+adjustImagePath _ _ x = x