aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Class.hs403
-rw-r--r--src/Text/Pandoc/Class/PandocIO.hs218
-rw-r--r--src/Text/Pandoc/Class/PandocPure.hs218
3 files changed, 446 insertions, 393 deletions
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index d2d58f0ed..2f28ac4dd 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -1,17 +1,6 @@
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE UndecidableInstances #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Class
- Copyright : Copyright (C) 2016-17 Jesse Rosenthal, John MacFarlane
+ Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -25,388 +14,16 @@ whether they want conversions to perform IO operations (such as
reading include files or images).
-}
-module Text.Pandoc.Class ( PandocMonad(..)
- , CommonState(..)
- , PureState(..)
- , getPureState
- , getsPureState
- , putPureState
- , modifyPureState
- , getPOSIXTime
- , getZonedTime
- , readFileFromDirs
- , report
- , setTrace
- , setRequestHeader
- , getLog
- , setVerbosity
- , getVerbosity
- , getMediaBag
- , setMediaBag
- , insertMedia
- , setUserDataDir
- , getUserDataDir
- , fetchItem
- , getInputFiles
- , setInputFiles
- , getOutputFile
- , setOutputFile
- , setResourcePath
- , getResourcePath
- , PandocIO(..)
- , PandocPure(..)
- , FileTree
- , FileInfo(..)
- , addToFileTree
- , insertInFileTree
- , runIO
- , runIOorExplode
- , runPure
- , readDefaultDataFile
- , readDataFile
- , fetchMediaResource
- , fillMediaBag
- , extractMedia
- , toLang
- , setTranslations
- , translateTerm
- , Translations
- ) where
+module Text.Pandoc.Class
+ ( module Text.Pandoc.Class.CommonState
+ , module Text.Pandoc.Class.PandocIO
+ , module Text.Pandoc.Class.PandocMonad
+ , module Text.Pandoc.Class.PandocPure
+ , Translations
+ ) where
-import Codec.Archive.Zip
-import Control.Monad.Except
-import Control.Monad.State.Strict
-import Data.ByteString.Base64 (decodeLenient)
-import Data.ByteString.Lazy (toChunks)
-import Data.Default
-import Data.Time (UTCTime)
-import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
-import Data.Time.LocalTime (TimeZone, utc)
-import Data.Unique (hashUnique)
-import Data.Word (Word8)
-import Network.HTTP.Client
- (httpLbs, responseBody, responseHeaders,
- Request(port, host, requestHeaders), parseRequest, newManager)
-import Network.HTTP.Client.Internal (addProxy)
-import Network.HTTP.Client.TLS (tlsManagerSettings)
-import Network.HTTP.Types.Header ( hContentType )
-import Network.Socket (withSocketsDo)
-import Network.URI ( unEscapeString )
-import Prelude
-import System.Directory (createDirectoryIfMissing, getDirectoryContents,
- doesDirectoryExist)
-import System.Environment (getEnv)
-import System.FilePath ((</>), takeDirectory, normalise)
-import System.FilePath.Glob (match, compile)
-import System.IO (stderr)
-import System.IO.Error
-import System.Random (StdGen, next, mkStdGen)
import Text.Pandoc.Class.CommonState (CommonState (..))
import Text.Pandoc.Class.PandocMonad
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import Text.Pandoc.Logging
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Class.PandocIO
+import Text.Pandoc.Class.PandocPure
import Text.Pandoc.Translations (Translations)
-import Text.Pandoc.Walk (walk)
-import qualified Control.Exception as E
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.CaseInsensitive as CI
-import qualified Data.Map as M
-import qualified Data.Text as T
-import qualified Data.Time as IO (getCurrentTime)
-import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
-import qualified Data.Unique as IO (newUnique)
-import qualified System.Directory as Directory
-import qualified System.Directory as IO (getModificationTime)
-import qualified System.Environment as IO (lookupEnv)
-import qualified System.FilePath.Glob as IO (glob)
-import qualified System.Random as IO (newStdGen)
-import qualified Text.Pandoc.UTF8 as UTF8
-#ifndef EMBED_DATA_FILES
-import qualified Paths_pandoc as Paths
-#endif
-
--- | Evaluate a 'PandocIO' operation.
-runIO :: PandocIO a -> IO (Either PandocError a)
-runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-
--- | Evaluate a 'PandocIO' operation, handling any errors
--- by exiting with an appropriate message and error status.
-runIOorExplode :: PandocIO a -> IO a
-runIOorExplode ma = runIO ma >>= handleError
-
-newtype PandocIO a = PandocIO {
- unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
- } deriving ( MonadIO
- , Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
--- | Utility function to lift IO errors into 'PandocError's.
-liftIOError :: (String -> IO a) -> String -> PandocIO a
-liftIOError f u = do
- res <- liftIO $ tryIOError $ f u
- case res of
- Left e -> throwError $ PandocIOError (T.pack u) e
- Right r -> return r
-
--- | Show potential IO errors to the user continuing execution anyway
-logIOError :: IO () -> PandocIO ()
-logIOError f = do
- res <- liftIO $ tryIOError f
- case res of
- Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
- Right _ -> pure ()
-
-instance PandocMonad PandocIO where
- lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
- getCurrentTime = liftIO IO.getCurrentTime
- getCurrentTimeZone = liftIO IO.getCurrentTimeZone
- newStdGen = liftIO IO.newStdGen
- newUniqueHash = hashUnique <$> liftIO IO.newUnique
-
- openURL u
- | Just u'' <- T.stripPrefix "data:" u = do
- let mime = T.takeWhile (/=',') u''
- let contents = UTF8.fromString $
- unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
- return (decodeLenient contents, Just mime)
- | otherwise = do
- let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
- customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
- report $ Fetching u
- res <- liftIO $ E.try $ withSocketsDo $ do
- let parseReq = parseRequest
- proxy <- tryIOError (getEnv "http_proxy")
- let addProxy' x = case proxy of
- Left _ -> return x
- Right pr -> parseReq pr >>= \r ->
- return (addProxy (host r) (port r) x)
- req <- parseReq (T.unpack u) >>= addProxy'
- let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
- resp <- newManager tlsManagerSettings >>= httpLbs req'
- return (B.concat $ toChunks $ responseBody resp,
- UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
-
- case res of
- Right r -> return r
- Left e -> throwError $ PandocHttpError u e
-
- readFileLazy s = liftIOError BL.readFile s
- readFileStrict s = liftIOError B.readFile s
-
- glob = liftIOError IO.glob
- fileExists = liftIOError Directory.doesFileExist
-#ifdef EMBED_DATA_FILES
- getDataFileName = return
-#else
- getDataFileName = liftIOError Paths.getDataFileName
-#endif
- getModificationTime = liftIOError IO.getModificationTime
- getCommonState = PandocIO $ lift get
- putCommonState x = PandocIO $ lift $ put x
- logOutput msg = liftIO $ do
- UTF8.hPutStr stderr $
- "[" ++ show (messageVerbosity msg) ++ "] "
- alertIndent $ T.lines $ showLogMessage msg
-
-alertIndent :: [T.Text] -> IO ()
-alertIndent [] = return ()
-alertIndent (l:ls) = do
- UTF8.hPutStrLn stderr $ T.unpack l
- mapM_ go ls
- where go l' = do UTF8.hPutStr stderr " "
- UTF8.hPutStrLn stderr $ T.unpack l'
-
--- | Extract media from the mediabag into a directory.
-extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
-extractMedia dir d = do
- media <- getMediaBag
- case [fp | (fp, _, _) <- mediaDirectory media] of
- [] -> return d
- fps -> do
- mapM_ (writeMedia dir media) fps
- return $ walk (adjustImagePath dir fps) d
-
--- Write the contents of a media bag to a path.
-writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
-writeMedia dir mediabag subpath = do
- -- we join and split to convert a/b/c to a\b\c on Windows;
- -- in zip containers all paths use /
- let fullpath = dir </> unEscapeString (normalise subpath)
- let mbcontents = lookupMedia subpath mediabag
- case mbcontents of
- Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
- Just (_, bs) -> do
- report $ Extracting $ T.pack fullpath
- liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
- logIOError $ BL.writeFile fullpath bs
-
-adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
-adjustImagePath dir paths (Image attr lab (src, tit))
- | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
-adjustImagePath _ _ x = x
-
--- | The 'PureState' contains ersatz representations
--- of things that would normally be obtained through IO.
-data PureState = PureState { stStdGen :: StdGen
- , stWord8Store :: [Word8] -- should be
- -- infinite,
- -- i.e. [1..]
- , stUniqStore :: [Int] -- should be
- -- infinite and
- -- contain every
- -- element at most
- -- once, e.g. [1..]
- , stEnv :: [(T.Text, T.Text)]
- , stTime :: UTCTime
- , stTimeZone :: TimeZone
- , stReferenceDocx :: Archive
- , stReferencePptx :: Archive
- , stReferenceODT :: Archive
- , stFiles :: FileTree
- , stUserDataFiles :: FileTree
- , stCabalDataFiles :: FileTree
- }
-
-instance Default PureState where
- def = PureState { stStdGen = mkStdGen 1848
- , stWord8Store = [1..]
- , stUniqStore = [1..]
- , stEnv = [("USER", "pandoc-user")]
- , stTime = posixSecondsToUTCTime 0
- , stTimeZone = utc
- , stReferenceDocx = emptyArchive
- , stReferencePptx = emptyArchive
- , stReferenceODT = emptyArchive
- , stFiles = mempty
- , stUserDataFiles = mempty
- , stCabalDataFiles = mempty
- }
-
-
-getPureState :: PandocPure PureState
-getPureState = PandocPure $ lift $ lift get
-
-getsPureState :: (PureState -> a) -> PandocPure a
-getsPureState f = f <$> getPureState
-
-putPureState :: PureState -> PandocPure ()
-putPureState ps= PandocPure $ lift $ lift $ put ps
-
-modifyPureState :: (PureState -> PureState) -> PandocPure ()
-modifyPureState f = PandocPure $ lift $ lift $ modify f
-
-
-data FileInfo = FileInfo { infoFileMTime :: UTCTime
- , infoFileContents :: B.ByteString
- }
-
-newtype FileTree = FileTree {unFileTree :: M.Map FilePath FileInfo}
- deriving (Semigroup, Monoid)
-
-getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
-getFileInfo fp tree =
- M.lookup (makeCanonical fp) (unFileTree tree)
-
--- | Add the specified file to the FileTree. If file
--- is a directory, add its contents recursively.
-addToFileTree :: FileTree -> FilePath -> IO FileTree
-addToFileTree tree fp = do
- isdir <- doesDirectoryExist fp
- if isdir
- then do -- recursively add contents of directories
- let isSpecial ".." = True
- isSpecial "." = True
- isSpecial _ = False
- fs <- (map (fp </>) . filter (not . isSpecial)) <$> getDirectoryContents fp
- foldM addToFileTree tree fs
- else do
- contents <- B.readFile fp
- mtime <- IO.getModificationTime fp
- return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
- , infoFileContents = contents } tree
-
--- | Insert an ersatz file into the 'FileTree'.
-insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
-insertInFileTree fp info (FileTree treemap) =
- FileTree $ M.insert (makeCanonical fp) info treemap
-
-newtype PandocPure a = PandocPure {
- unPandocPure :: ExceptT PandocError
- (StateT CommonState (State PureState)) a
- } deriving ( Functor
- , Applicative
- , Monad
- , MonadError PandocError
- )
-
--- Run a 'PandocPure' operation.
-runPure :: PandocPure a -> Either PandocError a
-runPure x = flip evalState def $
- flip evalStateT def $
- runExceptT $
- unPandocPure x
-
-instance PandocMonad PandocPure where
- lookupEnv s = do
- env <- getsPureState stEnv
- return (lookup s env)
-
- getCurrentTime = getsPureState stTime
-
- getCurrentTimeZone = getsPureState stTimeZone
-
- newStdGen = do
- g <- getsPureState stStdGen
- let (_, nxtGen) = next g
- modifyPureState $ \st -> st { stStdGen = nxtGen }
- return g
-
- newUniqueHash = do
- uniqs <- getsPureState stUniqStore
- case uniqs of
- u : us -> do
- modifyPureState $ \st -> st { stUniqStore = us }
- return u
- _ -> throwError $ PandocShouldNeverHappenError
- "uniq store ran out of elements"
- openURL u = throwError $ PandocResourceNotFound u
- readFileLazy fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return (BL.fromStrict bs)
- Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
- readFileStrict fp = do
- fps <- getsPureState stFiles
- case infoFileContents <$> getFileInfo fp fps of
- Just bs -> return bs
- Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
-
- glob s = do
- FileTree ftmap <- getsPureState stFiles
- return $ filter (match (compile s)) $ M.keys ftmap
-
- fileExists fp = do
- fps <- getsPureState stFiles
- case getFileInfo fp fps of
- Nothing -> return False
- Just _ -> return True
-
- getDataFileName fp = return $ "data/" ++ fp
-
- getModificationTime fp = do
- fps <- getsPureState stFiles
- case infoFileMTime <$> getFileInfo fp fps of
- Just tm -> return tm
- Nothing -> throwError $ PandocIOError (T.pack fp)
- (userError "Can't get modification time")
-
- getCommonState = PandocPure $ lift get
- putCommonState x = PandocPure $ lift $ put x
-
- logOutput _msg = return ()
diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs
new file mode 100644
index 000000000..1cbfd680e
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocIO.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Class.PandocIO
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines @'PandocIO'@, an IO-based instance of the
+@'PandocMonad'@ type class. File, data, and network access all are run
+using IO operators.
+-}
+module Text.Pandoc.Class.PandocIO
+ ( getPOSIXTime
+ , getZonedTime
+ , readFileFromDirs
+ , report
+ , setTrace
+ , setRequestHeader
+ , getLog
+ , setVerbosity
+ , getVerbosity
+ , getMediaBag
+ , setMediaBag
+ , insertMedia
+ , setUserDataDir
+ , getUserDataDir
+ , fetchItem
+ , getInputFiles
+ , setInputFiles
+ , getOutputFile
+ , setOutputFile
+ , setResourcePath
+ , getResourcePath
+ , PandocIO(..)
+ , runIO
+ , runIOorExplode
+ , extractMedia
+ ) where
+
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.ByteString.Base64 (decodeLenient)
+import Data.ByteString.Lazy (toChunks)
+import Data.Default
+import Data.Text (Text)
+import Data.Unique (hashUnique)
+import Network.HTTP.Client
+ (httpLbs, responseBody, responseHeaders,
+ Request(port, host, requestHeaders), parseRequest, newManager)
+import Network.HTTP.Client.Internal (addProxy)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import Network.HTTP.Types.Header ( hContentType )
+import Network.Socket (withSocketsDo)
+import Network.URI ( unEscapeString )
+import Prelude
+import System.Directory (createDirectoryIfMissing)
+import System.Environment (getEnv)
+import System.FilePath ((</>), takeDirectory, normalise)
+import System.IO (stderr)
+import System.IO.Error
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Logging
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.Walk (walk)
+import qualified Control.Exception as E
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Text as T
+import qualified Data.Time as IO (getCurrentTime)
+import qualified Data.Time.LocalTime as IO (getCurrentTimeZone)
+import qualified Data.Unique as IO (newUnique)
+import qualified System.Directory as Directory
+import qualified System.Directory as IO (getModificationTime)
+import qualified System.Environment as IO (lookupEnv)
+import qualified System.FilePath.Glob as IO (glob)
+import qualified System.Random as IO (newStdGen)
+import qualified Text.Pandoc.UTF8 as UTF8
+#ifndef EMBED_DATA_FILES
+import qualified Paths_pandoc as Paths
+#endif
+
+-- | Evaluate a 'PandocIO' operation.
+runIO :: PandocIO a -> IO (Either PandocError a)
+runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
+
+-- | Evaluate a 'PandocIO' operation, handling any errors
+-- by exiting with an appropriate message and error status.
+runIOorExplode :: PandocIO a -> IO a
+runIOorExplode ma = runIO ma >>= handleError
+
+newtype PandocIO a = PandocIO {
+ unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
+ } deriving ( MonadIO
+ , Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Utility function to lift IO errors into 'PandocError's.
+liftIOError :: (String -> IO a) -> String -> PandocIO a
+liftIOError f u = do
+ res <- liftIO $ tryIOError $ f u
+ case res of
+ Left e -> throwError $ PandocIOError (T.pack u) e
+ Right r -> return r
+
+-- | Show potential IO errors to the user continuing execution anyway
+logIOError :: IO () -> PandocIO ()
+logIOError f = do
+ res <- liftIO $ tryIOError f
+ case res of
+ Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e
+ Right _ -> pure ()
+
+instance PandocMonad PandocIO where
+ lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack
+ getCurrentTime = liftIO IO.getCurrentTime
+ getCurrentTimeZone = liftIO IO.getCurrentTimeZone
+ newStdGen = liftIO IO.newStdGen
+ newUniqueHash = hashUnique <$> liftIO IO.newUnique
+
+ openURL u
+ | Just u'' <- T.stripPrefix "data:" u = do
+ let mime = T.takeWhile (/=',') u''
+ let contents = UTF8.fromString $
+ unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
+ return (decodeLenient contents, Just mime)
+ | otherwise = do
+ let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
+ customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
+ report $ Fetching u
+ res <- liftIO $ E.try $ withSocketsDo $ do
+ let parseReq = parseRequest
+ proxy <- tryIOError (getEnv "http_proxy")
+ let addProxy' x = case proxy of
+ Left _ -> return x
+ Right pr -> parseReq pr >>= \r ->
+ return (addProxy (host r) (port r) x)
+ req <- parseReq (T.unpack u) >>= addProxy'
+ let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
+ resp <- newManager tlsManagerSettings >>= httpLbs req'
+ return (B.concat $ toChunks $ responseBody resp,
+ UTF8.toText `fmap` lookup hContentType (responseHeaders resp))
+
+ case res of
+ Right r -> return r
+ Left e -> throwError $ PandocHttpError u e
+
+ readFileLazy s = liftIOError BL.readFile s
+ readFileStrict s = liftIOError B.readFile s
+
+ glob = liftIOError IO.glob
+ fileExists = liftIOError Directory.doesFileExist
+#ifdef EMBED_DATA_FILES
+ getDataFileName = return
+#else
+ getDataFileName = liftIOError Paths.getDataFileName
+#endif
+ getModificationTime = liftIOError IO.getModificationTime
+ getCommonState = PandocIO $ lift get
+ putCommonState x = PandocIO $ lift $ put x
+ logOutput msg = liftIO $ do
+ UTF8.hPutStr stderr $
+ "[" ++ show (messageVerbosity msg) ++ "] "
+ alertIndent $ T.lines $ showLogMessage msg
+
+-- | Prints the list of lines to @stderr@, indenting every but the first
+-- line by two spaces.
+alertIndent :: [Text] -> IO ()
+alertIndent [] = return ()
+alertIndent (l:ls) = do
+ UTF8.hPutStrLn stderr $ T.unpack l
+ mapM_ go ls
+ where go l' = do UTF8.hPutStr stderr " "
+ UTF8.hPutStrLn stderr $ T.unpack l'
+
+-- | Extract media from the mediabag into a directory.
+extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc
+extractMedia dir d = do
+ media <- getMediaBag
+ case [fp | (fp, _, _) <- mediaDirectory media] of
+ [] -> return d
+ fps -> do
+ mapM_ (writeMedia dir media) fps
+ return $ walk (adjustImagePath dir fps) d
+
+-- | Write the contents of a media bag to a path.
+writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO ()
+writeMedia dir mediabag subpath = do
+ -- we join and split to convert a/b/c to a\b\c on Windows;
+ -- in zip containers all paths use /
+ let fullpath = dir </> unEscapeString (normalise subpath)
+ let mbcontents = lookupMedia subpath mediabag
+ case mbcontents of
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath
+ Just (_, bs) -> do
+ report $ Extracting $ T.pack fullpath
+ liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
+ logIOError $ BL.writeFile fullpath bs
+
+-- | If the given Inline element is an image with a @src@ path equal to
+-- one in the list of @paths@, then prepends @dir@ to the image source;
+-- returns the element unchanged otherwise.
+adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline
+adjustImagePath dir paths (Image attr lab (src, tit))
+ | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit)
+adjustImagePath _ _ x = x
diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs
new file mode 100644
index 000000000..010ead44f
--- /dev/null
+++ b/src/Text/Pandoc/Class/PandocPure.hs
@@ -0,0 +1,218 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+Module : Text.Pandoc.Class.Pure
+Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
+Stability : alpha
+Portability : portable
+
+This module defines a pure instance 'PandocPure' of the @'PandocMonad'@
+typeclass. This instance is useful for testing, or when all IO access is
+prohibited for security reasons.
+-}
+module Text.Pandoc.Class.PandocPure
+ ( PureState(..)
+ , getPureState
+ , getsPureState
+ , putPureState
+ , modifyPureState
+ , PandocPure(..)
+ , FileTree
+ , FileInfo(..)
+ , addToFileTree
+ , insertInFileTree
+ , runPure
+ ) where
+
+import Codec.Archive.Zip
+import Control.Monad.Except
+import Control.Monad.State.Strict
+import Data.Default
+import Data.Text (Text)
+import Data.Time (UTCTime)
+import Data.Time.Clock.POSIX ( posixSecondsToUTCTime )
+import Data.Time.LocalTime (TimeZone, utc)
+import Data.Word (Word8)
+import System.Directory (doesDirectoryExist, getDirectoryContents)
+import System.FilePath ((</>))
+import System.FilePath.Glob (match, compile)
+import System.Random (StdGen, next, mkStdGen)
+import Text.Pandoc.Class.CommonState (CommonState (..))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Error
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Map as M
+import qualified Data.Text as T
+import qualified System.Directory as Directory (getModificationTime)
+
+-- | The 'PureState' contains ersatz representations
+-- of things that would normally be obtained through IO.
+data PureState = PureState
+ { stStdGen :: StdGen
+ , stWord8Store :: [Word8] -- ^ should be infinite, i.e. [1..]
+ , stUniqStore :: [Int] -- ^ should be infinite and contain every
+ -- element at most once, e.g. [1..]
+ , stEnv :: [(Text, Text)]
+ , stTime :: UTCTime
+ , stTimeZone :: TimeZone
+ , stReferenceDocx :: Archive
+ , stReferencePptx :: Archive
+ , stReferenceODT :: Archive
+ , stFiles :: FileTree
+ , stUserDataFiles :: FileTree
+ , stCabalDataFiles :: FileTree
+ }
+
+instance Default PureState where
+ def = PureState
+ { stStdGen = mkStdGen 1848
+ , stWord8Store = [1..]
+ , stUniqStore = [1..]
+ , stEnv = [("USER", "pandoc-user")]
+ , stTime = posixSecondsToUTCTime 0
+ , stTimeZone = utc
+ , stReferenceDocx = emptyArchive
+ , stReferencePptx = emptyArchive
+ , stReferenceODT = emptyArchive
+ , stFiles = mempty
+ , stUserDataFiles = mempty
+ , stCabalDataFiles = mempty
+ }
+
+
+-- | Retrieve the underlying state of the @'PandocPure'@ type.
+getPureState :: PandocPure PureState
+getPureState = PandocPure $ lift $ lift get
+
+-- | Retrieve a value from the underlying state of the @'PandocPure'@
+-- type.
+getsPureState :: (PureState -> a) -> PandocPure a
+getsPureState f = f <$> getPureState
+
+-- | Set a new state for the @'PandocPure'@ type.
+putPureState :: PureState -> PandocPure ()
+putPureState ps= PandocPure $ lift $ lift $ put ps
+
+-- | Modify the underlying state of the @'PandocPure'@ type.
+modifyPureState :: (PureState -> PureState) -> PandocPure ()
+modifyPureState f = PandocPure $ lift $ lift $ modify f
+
+-- | Captures all file-level information necessary for a @'PandocMonad'@
+-- conforming mock file system.
+data FileInfo = FileInfo
+ { infoFileMTime :: UTCTime
+ , infoFileContents :: B.ByteString
+ }
+
+-- | Basis of the mock file system used by @'PandocPure'@.
+newtype FileTree = FileTree { unFileTree :: M.Map FilePath FileInfo }
+ deriving (Semigroup, Monoid)
+
+-- | Retrieve @'FileInfo'@ of the given @'FilePath'@ from a
+-- @'FileTree'@.
+getFileInfo :: FilePath -> FileTree -> Maybe FileInfo
+getFileInfo fp tree =
+ M.lookup (makeCanonical fp) (unFileTree tree)
+
+-- | Add the specified file to the FileTree. If file
+-- is a directory, add its contents recursively.
+addToFileTree :: FileTree -> FilePath -> IO FileTree
+addToFileTree tree fp = do
+ isdir <- doesDirectoryExist fp
+ if isdir
+ then do -- recursively add contents of directories
+ let isSpecial ".." = True
+ isSpecial "." = True
+ isSpecial _ = False
+ fs <- map (fp </>) . filter (not . isSpecial) <$> getDirectoryContents fp
+ foldM addToFileTree tree fs
+ else do
+ contents <- B.readFile fp
+ mtime <- Directory.getModificationTime fp
+ return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime
+ , infoFileContents = contents } tree
+
+-- | Insert an ersatz file into the 'FileTree'.
+insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree
+insertInFileTree fp info (FileTree treemap) =
+ FileTree $ M.insert (makeCanonical fp) info treemap
+
+newtype PandocPure a = PandocPure {
+ unPandocPure :: ExceptT PandocError
+ (StateT CommonState (State PureState)) a
+ } deriving ( Functor
+ , Applicative
+ , Monad
+ , MonadError PandocError
+ )
+
+-- | Run a 'PandocPure' operation.
+runPure :: PandocPure a -> Either PandocError a
+runPure x = flip evalState def $
+ flip evalStateT def $
+ runExceptT $
+ unPandocPure x
+
+instance PandocMonad PandocPure where
+ lookupEnv s = do
+ env <- getsPureState stEnv
+ return (lookup s env)
+
+ getCurrentTime = getsPureState stTime
+
+ getCurrentTimeZone = getsPureState stTimeZone
+
+ newStdGen = do
+ g <- getsPureState stStdGen
+ let (_, nxtGen) = next g
+ modifyPureState $ \st -> st { stStdGen = nxtGen }
+ return g
+
+ newUniqueHash = do
+ uniqs <- getsPureState stUniqStore
+ case uniqs of
+ u : us -> do
+ modifyPureState $ \st -> st { stUniqStore = us }
+ return u
+ _ -> throwError $ PandocShouldNeverHappenError
+ "uniq store ran out of elements"
+ openURL u = throwError $ PandocResourceNotFound u
+ readFileLazy fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return (BL.fromStrict bs)
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
+ readFileStrict fp = do
+ fps <- getsPureState stFiles
+ case infoFileContents <$> getFileInfo fp fps of
+ Just bs -> return bs
+ Nothing -> throwError $ PandocResourceNotFound $ T.pack fp
+
+ glob s = do
+ FileTree ftmap <- getsPureState stFiles
+ return $ filter (match (compile s)) $ M.keys ftmap
+
+ fileExists fp = do
+ fps <- getsPureState stFiles
+ case getFileInfo fp fps of
+ Nothing -> return False
+ Just _ -> return True
+
+ getDataFileName fp = return $ "data/" ++ fp
+
+ getModificationTime fp = do
+ fps <- getsPureState stFiles
+ case infoFileMTime <$> getFileInfo fp fps of
+ Just tm -> return tm
+ Nothing -> throwError $ PandocIOError (T.pack fp)
+ (userError "Can't get modification time")
+
+ getCommonState = PandocPure $ lift get
+ putCommonState x = PandocPure $ lift $ put x
+
+ logOutput _msg = return ()