diff options
Diffstat (limited to 'src/Text/Pandoc/Class')
| -rw-r--r-- | src/Text/Pandoc/Class/PandocIO.hs | 218 | ||||
| -rw-r--r-- | src/Text/Pandoc/Class/PandocPure.hs | 218 | 
2 files changed, 436 insertions, 0 deletions
| diff --git a/src/Text/Pandoc/Class/PandocIO.hs b/src/Text/Pandoc/Class/PandocIO.hs new file mode 100644 index 000000000..1cbfd680e --- /dev/null +++ b/src/Text/Pandoc/Class/PandocIO.hs @@ -0,0 +1,218 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | +Module      : Text.Pandoc.Class.PandocIO +Copyright   : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License     : GNU GPL, version 2 or above + +Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu> +Stability   : alpha +Portability : portable + +This module defines @'PandocIO'@, an IO-based instance of the +@'PandocMonad'@ type class. File, data, and network access all are run +using IO operators. +-} +module Text.Pandoc.Class.PandocIO +  ( getPOSIXTime +  , getZonedTime +  , readFileFromDirs +  , report +  , setTrace +  , setRequestHeader +  , getLog +  , setVerbosity +  , getVerbosity +  , getMediaBag +  , setMediaBag +  , insertMedia +  , setUserDataDir +  , getUserDataDir +  , fetchItem +  , getInputFiles +  , setInputFiles +  , getOutputFile +  , setOutputFile +  , setResourcePath +  , getResourcePath +  , PandocIO(..) +  , runIO +  , runIOorExplode +  , extractMedia + ) where + +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.ByteString.Base64 (decodeLenient) +import Data.ByteString.Lazy (toChunks) +import Data.Default +import Data.Text (Text) +import Data.Unique (hashUnique) +import Network.HTTP.Client +       (httpLbs, responseBody, responseHeaders, +        Request(port, host, requestHeaders), parseRequest, newManager) +import Network.HTTP.Client.Internal (addProxy) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import Network.HTTP.Types.Header ( hContentType ) +import Network.Socket (withSocketsDo) +import Network.URI ( unEscapeString ) +import Prelude +import System.Directory (createDirectoryIfMissing) +import System.Environment (getEnv) +import System.FilePath ((</>), takeDirectory, normalise) +import System.IO (stderr) +import System.IO.Error +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.Walk (walk) +import qualified Control.Exception as E +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.CaseInsensitive as CI +import qualified Data.Text as T +import qualified Data.Time as IO (getCurrentTime) +import qualified Data.Time.LocalTime as IO (getCurrentTimeZone) +import qualified Data.Unique as IO (newUnique) +import qualified System.Directory as Directory +import qualified System.Directory as IO (getModificationTime) +import qualified System.Environment as IO (lookupEnv) +import qualified System.FilePath.Glob as IO (glob) +import qualified System.Random as IO (newStdGen) +import qualified Text.Pandoc.UTF8 as UTF8 +#ifndef EMBED_DATA_FILES +import qualified Paths_pandoc as Paths +#endif + +-- | Evaluate a 'PandocIO' operation. +runIO :: PandocIO a -> IO (Either PandocError a) +runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma + +-- | Evaluate a 'PandocIO' operation, handling any errors +-- by exiting with an appropriate message and error status. +runIOorExplode :: PandocIO a -> IO a +runIOorExplode ma = runIO ma >>= handleError + +newtype PandocIO a = PandocIO { +  unPandocIO :: ExceptT PandocError (StateT CommonState IO) a +  } deriving ( MonadIO +             , Functor +             , Applicative +             , Monad +             , MonadError PandocError +             ) + +-- | Utility function to lift IO errors into 'PandocError's. +liftIOError :: (String -> IO a) -> String -> PandocIO a +liftIOError f u = do +  res <- liftIO $ tryIOError $ f u +  case res of +         Left e  -> throwError $ PandocIOError (T.pack u) e +         Right r -> return r + +-- | Show potential IO errors to the user continuing execution anyway +logIOError :: IO () -> PandocIO () +logIOError f = do +  res <- liftIO $ tryIOError f +  case res of +    Left e -> report $ IgnoredIOError $ T.pack $ E.displayException e +    Right _ -> pure () + +instance PandocMonad PandocIO where +  lookupEnv = fmap (fmap T.pack) . liftIO . IO.lookupEnv . T.unpack +  getCurrentTime = liftIO IO.getCurrentTime +  getCurrentTimeZone = liftIO IO.getCurrentTimeZone +  newStdGen = liftIO IO.newStdGen +  newUniqueHash = hashUnique <$> liftIO IO.newUnique + +  openURL u +   | Just u'' <- T.stripPrefix "data:" u = do +       let mime     = T.takeWhile (/=',') u'' +       let contents = UTF8.fromString $ +                       unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' +       return (decodeLenient contents, Just mime) +   | otherwise = do +       let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) +       customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders +       report $ Fetching u +       res <- liftIO $ E.try $ withSocketsDo $ do +         let parseReq = parseRequest +         proxy <- tryIOError (getEnv "http_proxy") +         let addProxy' x = case proxy of +                              Left _ -> return x +                              Right pr -> parseReq pr >>= \r -> +                                  return (addProxy (host r) (port r) x) +         req <- parseReq (T.unpack u) >>= addProxy' +         let req' = req{requestHeaders = customHeaders ++ requestHeaders req} +         resp <- newManager tlsManagerSettings >>= httpLbs req' +         return (B.concat $ toChunks $ responseBody resp, +                 UTF8.toText `fmap` lookup hContentType (responseHeaders resp)) + +       case res of +            Right r -> return r +            Left e  -> throwError $ PandocHttpError u e + +  readFileLazy s = liftIOError BL.readFile s +  readFileStrict s = liftIOError B.readFile s + +  glob = liftIOError IO.glob +  fileExists = liftIOError Directory.doesFileExist +#ifdef EMBED_DATA_FILES +  getDataFileName = return +#else +  getDataFileName = liftIOError Paths.getDataFileName +#endif +  getModificationTime = liftIOError IO.getModificationTime +  getCommonState = PandocIO $ lift get +  putCommonState x = PandocIO $ lift $ put x +  logOutput msg = liftIO $ do +    UTF8.hPutStr stderr $ +        "[" ++ show (messageVerbosity msg) ++ "] " +    alertIndent $ T.lines $ showLogMessage msg + +-- | Prints the list of lines to @stderr@, indenting every but the first +-- line by two spaces. +alertIndent :: [Text] -> IO () +alertIndent [] = return () +alertIndent (l:ls) = do +  UTF8.hPutStrLn stderr $ T.unpack l +  mapM_ go ls +  where go l' = do UTF8.hPutStr stderr "  " +                   UTF8.hPutStrLn stderr $ T.unpack l' + +-- | Extract media from the mediabag into a directory. +extractMedia :: FilePath -> Pandoc -> PandocIO Pandoc +extractMedia dir d = do +  media <- getMediaBag +  case [fp | (fp, _, _) <- mediaDirectory media] of +        []  -> return d +        fps -> do +          mapM_ (writeMedia dir media) fps +          return $ walk (adjustImagePath dir fps) d + +-- | Write the contents of a media bag to a path. +writeMedia :: FilePath -> MediaBag -> FilePath -> PandocIO () +writeMedia dir mediabag subpath = do +  -- we join and split to convert a/b/c to a\b\c on Windows; +  -- in zip containers all paths use / +  let fullpath = dir </> unEscapeString (normalise subpath) +  let mbcontents = lookupMedia subpath mediabag +  case mbcontents of +       Nothing -> throwError $ PandocResourceNotFound $ T.pack subpath +       Just (_, bs) -> do +         report $ Extracting $ T.pack fullpath +         liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) +         logIOError $ BL.writeFile fullpath bs + +-- | If the given Inline element is an image with a @src@ path equal to +-- one in the list of @paths@, then prepends @dir@ to the image source; +-- returns the element unchanged otherwise. +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) +   | T.unpack src `elem` paths = Image attr lab (T.pack dir <> "/" <> src, tit) +adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocPure.hs b/src/Text/Pandoc/Class/PandocPure.hs new file mode 100644 index 000000000..010ead44f --- /dev/null +++ b/src/Text/Pandoc/Class/PandocPure.hs @@ -0,0 +1,218 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | +Module      : Text.Pandoc.Class.Pure +Copyright   : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane +License     : GNU GPL, version 2 or above + +Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu> +Stability   : alpha +Portability : portable + +This module defines a pure instance 'PandocPure' of the @'PandocMonad'@ +typeclass. This instance is useful for testing, or when all IO access is +prohibited for security reasons. +-} +module Text.Pandoc.Class.PandocPure +  ( PureState(..) +  , getPureState +  , getsPureState +  , putPureState +  , modifyPureState +  , PandocPure(..) +  , FileTree +  , FileInfo(..) +  , addToFileTree +  , insertInFileTree +  , runPure +  ) where + +import Codec.Archive.Zip +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Default +import Data.Text (Text) +import Data.Time (UTCTime) +import Data.Time.Clock.POSIX ( posixSecondsToUTCTime ) +import Data.Time.LocalTime (TimeZone, utc) +import Data.Word (Word8) +import System.Directory (doesDirectoryExist, getDirectoryContents) +import System.FilePath ((</>)) +import System.FilePath.Glob (match, compile) +import System.Random (StdGen, next, mkStdGen) +import Text.Pandoc.Class.CommonState (CommonState (..)) +import Text.Pandoc.Class.PandocMonad +import Text.Pandoc.Error +import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as BL +import qualified Data.Map as M +import qualified Data.Text as T +import qualified System.Directory as Directory (getModificationTime) + +-- | The 'PureState' contains ersatz representations +-- of things that would normally be obtained through IO. +data PureState = PureState +  { stStdGen     :: StdGen +  , stWord8Store :: [Word8]    -- ^ should be infinite, i.e. [1..] +  , stUniqStore  :: [Int]      -- ^ should be infinite and contain every +                               -- element at most once, e.g. [1..] +  , stEnv :: [(Text, Text)] +  , stTime :: UTCTime +  , stTimeZone :: TimeZone +  , stReferenceDocx :: Archive +  , stReferencePptx :: Archive +  , stReferenceODT :: Archive +  , stFiles :: FileTree +  , stUserDataFiles :: FileTree +  , stCabalDataFiles :: FileTree +  } + +instance Default PureState where +  def = PureState +        { stStdGen = mkStdGen 1848 +        , stWord8Store = [1..] +        , stUniqStore = [1..] +        , stEnv = [("USER", "pandoc-user")] +        , stTime = posixSecondsToUTCTime 0 +        , stTimeZone = utc +        , stReferenceDocx = emptyArchive +        , stReferencePptx = emptyArchive +        , stReferenceODT = emptyArchive +        , stFiles = mempty +        , stUserDataFiles = mempty +        , stCabalDataFiles = mempty +        } + + +-- | Retrieve the underlying state of the @'PandocPure'@ type. +getPureState :: PandocPure PureState +getPureState = PandocPure $ lift $ lift get + +-- | Retrieve a value from the underlying state of the @'PandocPure'@ +-- type. +getsPureState :: (PureState -> a) -> PandocPure a +getsPureState f = f <$> getPureState + +-- | Set a new state for the @'PandocPure'@ type. +putPureState :: PureState -> PandocPure () +putPureState ps= PandocPure $ lift $ lift $ put ps + +-- | Modify the underlying state of the @'PandocPure'@ type. +modifyPureState :: (PureState -> PureState) -> PandocPure () +modifyPureState f = PandocPure $ lift $ lift $ modify f + +-- | Captures all file-level information necessary for a @'PandocMonad'@ +-- conforming mock file system. +data FileInfo = FileInfo +  { infoFileMTime :: UTCTime +  , infoFileContents :: B.ByteString +  } + +-- | Basis of the mock file system used by @'PandocPure'@. +newtype FileTree = FileTree { unFileTree :: M.Map FilePath FileInfo } +  deriving (Semigroup, Monoid) + +-- | Retrieve @'FileInfo'@ of the given @'FilePath'@ from a +-- @'FileTree'@. +getFileInfo :: FilePath -> FileTree -> Maybe FileInfo +getFileInfo fp tree = +  M.lookup (makeCanonical fp) (unFileTree tree) + +-- | Add the specified file to the FileTree. If file +-- is a directory, add its contents recursively. +addToFileTree :: FileTree -> FilePath -> IO FileTree +addToFileTree tree fp = do +  isdir <- doesDirectoryExist fp +  if isdir +     then do -- recursively add contents of directories +       let isSpecial ".." = True +           isSpecial "."  = True +           isSpecial _    = False +       fs <- map (fp </>) . filter (not . isSpecial) <$> getDirectoryContents fp +       foldM addToFileTree tree fs +     else do +       contents <- B.readFile fp +       mtime <- Directory.getModificationTime fp +       return $ insertInFileTree fp FileInfo{ infoFileMTime = mtime +                                            , infoFileContents = contents } tree + +-- | Insert an ersatz file into the 'FileTree'. +insertInFileTree :: FilePath -> FileInfo -> FileTree -> FileTree +insertInFileTree fp info (FileTree treemap) = +  FileTree $ M.insert (makeCanonical fp) info treemap + +newtype PandocPure a = PandocPure { +  unPandocPure :: ExceptT PandocError +                  (StateT CommonState (State PureState)) a +  } deriving ( Functor +             , Applicative +             , Monad +             , MonadError PandocError +             ) + +-- | Run a 'PandocPure' operation. +runPure :: PandocPure a -> Either PandocError a +runPure x = flip evalState def $ +            flip evalStateT def $ +            runExceptT $ +            unPandocPure x + +instance PandocMonad PandocPure where +  lookupEnv s = do +    env <- getsPureState stEnv +    return (lookup s env) + +  getCurrentTime = getsPureState stTime + +  getCurrentTimeZone = getsPureState stTimeZone + +  newStdGen = do +    g <- getsPureState stStdGen +    let (_, nxtGen) = next g +    modifyPureState $ \st -> st { stStdGen = nxtGen } +    return g + +  newUniqueHash = do +    uniqs <- getsPureState stUniqStore +    case uniqs of +      u : us -> do +        modifyPureState $ \st -> st { stUniqStore = us } +        return u +      _ -> throwError $ PandocShouldNeverHappenError +                        "uniq store ran out of elements" +  openURL u = throwError $ PandocResourceNotFound u +  readFileLazy fp = do +    fps <- getsPureState stFiles +    case infoFileContents <$> getFileInfo fp fps of +      Just bs -> return (BL.fromStrict bs) +      Nothing -> throwError $ PandocResourceNotFound $ T.pack fp +  readFileStrict fp = do +    fps <- getsPureState stFiles +    case infoFileContents <$> getFileInfo fp fps of +      Just bs -> return bs +      Nothing -> throwError $ PandocResourceNotFound $ T.pack fp + +  glob s = do +    FileTree ftmap <- getsPureState stFiles +    return $ filter (match (compile s)) $ M.keys ftmap + +  fileExists fp = do +    fps <- getsPureState stFiles +    case getFileInfo fp fps of +         Nothing -> return False +         Just _  -> return True + +  getDataFileName fp = return $ "data/" ++ fp + +  getModificationTime fp = do +    fps <- getsPureState stFiles +    case infoFileMTime <$> getFileInfo fp fps of +      Just tm -> return tm +      Nothing -> throwError $ PandocIOError (T.pack fp) +                    (userError "Can't get modification time") + +  getCommonState = PandocPure $ lift get +  putCommonState x = PandocPure $ lift $ put x + +  logOutput _msg = return () | 
