aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Class
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2020-03-21 14:26:28 +0100
committerAlbert Krewinkel <albert@zeitkraut.de>2020-03-22 14:59:38 +0100
commit2a042ff711ec9724a8b63b7cb4d29109379330c5 (patch)
tree3f13c755e249c9b66c442431c72a4250aeec1696 /src/Text/Pandoc/Class
parent66375f3fc4130a68cdcf70111858146583ac2ef3 (diff)
downloadpandoc-2a042ff711ec9724a8b63b7cb4d29109379330c5.tar.gz
Text.Pandoc.Class: extract submodules PandocIO, PandocPure
Diffstat (limited to 'src/Text/Pandoc/Class')
-rw-r--r--src/Text/Pandoc/Class/PandocIO.hs218
-rw-r--r--src/Text/Pandoc/Class/PandocPure.hs218
2 files changed, 436 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs
new file mode 100644
index 000000000..1cbfd680e
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocIO.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Class.PandocIO
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines @'PandocIO'@, an IO-based instance of the
+@'PandocMonad'@ type class. File, data, and network access all are run
+using IO operators.
+-}
+module Text.Pandoc.Class.PandocIO
+ ( getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , setTrace
+ , setRequestHeader
+ , getLog
+ , setVerbosity
+ , getVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , setUserDataDir
+ , getUserDataDir
+ , fetchItem
+ , getInputFiles
+ , setInputFiles
+ , getOutputFile
+ , setOutputFile
+ , setResourcePath
+ , getResourcePath
+ , PandocIO(..)
+ , runIO
+ , runIOorExplode
+ , extractMedia
+ ) where
+
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.ByteString.Base64 (decodeLenient)
+import Data.ByteString.Lazy (toChunks)
+import Data.Default
+import Data.Text (Text)
+import Data.Unique (hashUnique)
+import Network.HTTP.Client
+ (httpLbs, responseBody, responseHeaders,
+ Request(port, host, requestHeaders), parseRequest, newManager)
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import Network.HTTP.Types.Header ( hContentType )
+import Network.Socket (withSocketsDo)
+import Network.URI ( unEscapeString )
+import Prelude
+import System.Directory (createDirectoryIfMissing)
+import System.Environment (getEnv)
+import System.FilePath ((</>), takeDirectory, normalise)
+import System.IO (stderr)
+import System.IO.Error
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Walk (walk)
+import qualified Control.Exception as E
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Text as T
+import qualified Data.Time as IO (getCurrentTime)
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import qualified Data.Unique as IO (newUnique)
+import qualified System.Directory as Directory
+import qualified System.Directory as IO (getModificationTime)
+import qualified System.Environment as IO (lookupEnv)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.Random as IO (newStdGen)
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifndef EMBED_DATA_FILES
+import qualified Paths_pandoc as Paths
+#endif
+
+-- | Evaluate a 'PandocIO' operation.
+runIO :: PandocIO a -> IO (Either PandocError a)
+runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
+
+-- | Evaluate a 'PandocIO' operation, handling any errors
+-- by exiting with an appropriate message and error status.
+runIOorExplode :: PandocIO a -> IO a
+runIOorExplode ma = runIO ma >>= handleError
+
+newtype PandocIO a = PandocIO {
+ unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
+ } deriving ( MonadIO
+ , Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Utility function to lift IO errors into 'PandocError's.
+liftIOError :: (String -> IO a) -> String -> PandocIO a
+liftIOError f u = do
+ res <- liftIO $ tryIOError $ f u
+ case res of
+ Left e -> throwError $ PandocIOError (T.pack u) e
+ Right r -> return r
+
+-- | Show potential IO errors to the user continuing execution anyway
+logIOError :: IO () -> PandocIO ()
+logIOError f = do
+ res <- liftIO $ tryIOError f
+ case res of
+ Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
+ Right _ -> pure ()
+
+instance PandocMonad PandocIO where
+ lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
+ getCurrentTime = liftIO IO.getCurrentTime
+ getCurrentTimeZone = liftIO IO.getCurrentTimeZone
+ newStdGen = liftIO IO.newStdGen
+ newUniqueHash = hashUnique <$> liftIO IO.newUnique
+
+ openURL u
+ | Just u'' <- T.stripPrefix "data:" u = do
+ let mime = T.takeWhile (/=',') u''
+ let contents = UTF8.fromString $
+ unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
+ return (decodeLenient contents, Just mime)
+ | otherwise = do
+ let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
+ customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
+ report $ Fetching u
+ res <- liftIO $ E.try $ withSocketsDo $ do
+ let parseReq = parseRequest
+ proxy <- tryIOError (getEnv "http_proxy")
+ let addProxy' x = case proxy of
+ Left _ -> return x
+ Right pr -> parseReq pr >>= \r ->
+ return (addProxy (host r) (port r) x)
+ req <- parseReq (T.unpack u) >>= addProxy'
+ let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
+ resp <- newManager tlsManagerSettings >>= httpLbs req'
+ return (B.concat $ toChunks $ responseBody resp,
+ UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
+
+ case res of
+ Right r -> return r
+ Left e -> throwError $ PandocHttpError u e
+
+ readFileLazy s = liftIOError BL.readFile s
+ readFileStrict s = liftIOError B.readFile s
+
+ glob = liftIOError IO.glob
+ fileExists = liftIOError Directory.doesFileExist
+#ifdef EMBED_DATA_FILES
+ getDataFileName = return
+#else
+ getDataFileName = liftIOError Paths.getDataFileName
+#endif
+ getModificationTime = liftIOError IO.getModificationTime
+ getCommonState = PandocIO $ lift get
+ putCommonState x = PandocIO $ lift $ put x
+ logOutput msg = liftIO $ do
+ UTF8.hPutStr stderr $
+ "[" ++ show (messageVerbosity msg) ++ "] "
+ alertIndent $ T.lines $ showLogMessage msg
+
+-- | Prints the list of lines to @stderr@, indenting every but the first
+-- line by two spaces.
+alertIndent :: [Text] -> IO ()
+alertIndent [] = return ()
+alertIndent (l:ls) = do
+ UTF8.hPutStrLn stderr $ T.unpack l
+ mapM_ go ls
+ where go l' = do UTF8.hPutStr stderr " "
+ UTF8.hPutStrLn stderr $ T.unpack l'
+
+-- | Extract media from the mediabag into a directory.
+extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
+extractMedia dir d = do
+ media <- getMediaBag
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ mapM_ (writeMedia dir media) fps
+ return $ walk (adjustImagePath dir fps) d
+
+-- | Write the contents of a media bag to a path.
+writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
+writeMedia dir mediabag subpath = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> unEscapeString (normalise subpath)
+ let mbcontents = lookupMedia subpath mediabag
+ case mbcontents of
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
+ Just (_, bs) -> do
+ report $ Extracting $ T.pack fullpath
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ logIOError $ BL.writeFile fullpath bs
+
+-- | If the given Inline element is an image with a @src@ path equal to
+-- one in the list of @paths@, then prepends @dir@ to the image source;
+-- returns the element unchanged otherwise.
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
+adjustImagePath _ _ x = x
diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs
new file mode 100644
index 000000000..010ead44f
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocPure.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Class.Pure
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines a pure instance 'PandocPure' of the @'PandocMonad'@
+typeclass. This instance is useful for testing, or when all IO access is
+prohibited for security reasons.
+-}
+module Text.Pandoc.Class.PandocPure
+ ( PureState(..)
+ , getPureState
+ , getsPureState
+ , putPureState
+ , modifyPureState
+ , PandocPure(..)
+ , FileTree
+ , FileInfo(..)
+ , addToFileTree
+ , insertInFileTree
+ , runPure
+ ) where
+
+import Codec.Archive.Zip
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.Default
+import Data.Text (Text)
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
+import Data.Time.LocalTime (TimeZone, utc)
+import Data.Word (Word8)
+import System.Directory (doesDirectoryExist, getDirectoryContents)
+import System.FilePath ((</>))
+import System.FilePath.Glob (match, compile)
+import System.Random (StdGen, next, mkStdGen)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Error
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Map as M
+import qualified Data.Text as T
+import qualified System.Directory as Directory (getModificationTime)
+
+-- | The 'PureState' contains ersatz representations
+-- of things that would normally be obtained through IO.
+data PureState = PureState
+ { stStdGen :: StdGen
+ , stWord8Store :: [Word8] -- ^ should be infinite, i.e. [1..]
+ , stUniqStore :: [Int] -- ^ should be infinite and contain every
+ -- element at most once, e.g. [1..]
+ , stEnv :: [(Text, Text)]
+ , stTime :: UTCTime
+ , stTimeZone :: TimeZone
+ , stReferenceDocx :: Archive
+ , stReferencePptx :: Archive
+ , stReferenceODT :: Archive
+ , stFiles :: FileTree
+ , stUserDataFiles :: FileTree
+ , stCabalDataFiles :: FileTree
+ }
+
+instance Default PureState where
+ def = PureState
+ { stStdGen = mkStdGen 1848
+ , stWord8Store = [1..]
+ , stUniqStore = [1..]
+ , stEnv = [("USER", "pandoc-user")]
+ , stTime = posixSecondsToUTCTime 0
+ , stTimeZone = utc
+ , stReferenceDocx = emptyArchive
+ , stReferencePptx = emptyArchive
+ , stReferenceODT = emptyArchive
+ , stFiles = mempty
+ , stUserDataFiles = mempty
+ , stCabalDataFiles = mempty
+ }
+
+
+-- | Retrieve the underlying state of the @'PandocPure'@ type.
+getPureState :: PandocPure PureState
+getPureState = PandocPure $ lift $ lift get
+
+-- | Retrieve a value from the underlying state of the @'PandocPure'@
+-- type.
+getsPureState :: (PureState -> a) -> PandocPure a
+getsPureState f = f <$> getPureState
+
+-- | Set a new state for the @'PandocPure'@ type.
+putPureState :: PureState -> PandocPure ()
+putPureState ps= PandocPure $ lift $ lift $ put ps
+
+-- | Modify the underlying state of the @'PandocPure'@ type.
+modifyPureState :: (PureState -> PureState) -> PandocPure ()
+modifyPureState f = PandocPure $ lift $ lift $ modify f
+
+-- | Captures all file-level information necessary for a @'PandocMonad'@
+-- conforming mock file system.
+data FileInfo = FileInfo
+ { infoFileMTime :: UTCTime
+ , infoFileContents :: B.ByteString
+ }
+
+-- | Basis of the mock file system used by @'PandocPure'@.
+newtype FileTree = FileTree { unFileTree :: M.Map FilePath FileInfo }
+ deriving (Semigroup, Monoid)
+
+-- | Retrieve @'FileInfo'@ of the given @'FilePath'@ from a
+-- @'FileTree'@.
+getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
+getFileInfo fp tree =
+ M.lookup (makeCanonical fp) (unFileTree tree)
+
+-- | Add the specified file to the FileTree. If file
+-- is a directory, add its contents recursively.
+addToFileTree :: FileTree -> FilePath -> IO FileTree
+addToFileTree tree fp = do
+ isdir <- doesDirectoryExist fp
+ if isdir
+ then do -- recursively add contents of directories
+ let isSpecial ".." = True
+ isSpecial "." = True
+ isSpecial _ = False
+ fs <- map (fp </>) . filter (not . isSpecial) <$> getDirectoryContents fp
+ foldM addToFileTree tree fs
+ else do
+ contents <- B.readFile fp
+ mtime <- Directory.getModificationTime fp
+ return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
+ , infoFileContents = contents } tree
+
+-- | Insert an ersatz file into the 'FileTree'.
+insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
+insertInFileTree fp info (FileTree treemap) =
+ FileTree $ M.insert (makeCanonical fp) info treemap
+
+newtype PandocPure a = PandocPure {
+ unPandocPure :: ExceptT PandocError
+ (StateT CommonState (State PureState)) a
+ } deriving ( Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Run a 'PandocPure' operation.
+runPure :: PandocPure a -> Either PandocError a
+runPure x = flip evalState def $
+ flip evalStateT def $
+ runExceptT $
+ unPandocPure x
+
+instance PandocMonad PandocPure where
+ lookupEnv s = do
+ env <- getsPureState stEnv
+ return (lookup s env)
+
+ getCurrentTime = getsPureState stTime
+
+ getCurrentTimeZone = getsPureState stTimeZone
+
+ newStdGen = do
+ g <- getsPureState stStdGen
+ let (_, nxtGen) = next g
+ modifyPureState $ \st -> st { stStdGen = nxtGen }
+ return g
+
+ newUniqueHash = do
+ uniqs <- getsPureState stUniqStore
+ case uniqs of
+ u : us -> do
+ modifyPureState $ \st -> st { stUniqStore = us }
+ return u
+ _ -> throwError $ PandocShouldNeverHappenError
+ "uniq store ran out of elements"
+ openURL u = throwError $ PandocResourceNotFound u
+ readFileLazy fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return (BL.fromStrict bs)
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
+ readFileStrict fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return bs
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
+
+ glob s = do
+ FileTree ftmap <- getsPureState stFiles
+ return $ filter (match (compile s)) $ M.keys ftmap
+
+ fileExists fp = do
+ fps <- getsPureState stFiles
+ case getFileInfo fp fps of
+ Nothing -> return False
+ Just _ -> return True
+
+ getDataFileName fp = return $ "data/" ++ fp
+
+ getModificationTime fp = do
+ fps <- getsPureState stFiles
+ case infoFileMTime <$> getFileInfo fp fps of
+ Just tm -> return tm
+ Nothing -> throwError $ PandocIOError (T.pack fp)
+ (userError "Can't get modification time")
+
+ getCommonState = PandocPure $ lift get
+ putCommonState x = PandocPure $ lift $ put x
+
+ logOutput _msg = return ()