aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANUAL.txt13
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/App.hs29
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs5
-rw-r--r--src/Text/Pandoc/App/Opt.hs4
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs22
-rw-r--r--src/Text/Pandoc/Class.hs2
-rw-r--r--src/Text/Pandoc/Class/PandocSandboxed.hs79
-rw-r--r--src/Text/Pandoc/Class/Sandbox.hs50
9 files changed, 120 insertions, 85 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 5dc35c8ff..75e74f1cd 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -730,6 +730,16 @@ header when requesting a document from a URL:
document in standalone mode. If no *VAL* is specified, the
key will be given the value `true`.
+`--sandbox`
+
+: Run pandoc in a sandbox, limiting IO operations in readers
+ and writers to reading the files specified on the command line.
+ Note that this option does not limit IO operations by
+ filters or in the production of PDF documents. But it does
+ offer security against, for example, disclosure of files
+ through the use of `include` directives. Anyone using
+ pandoc on untrusted user input should use this option.
+
`-D` *FORMAT*, `--print-default-template=`*FORMAT*
: Print the system default template for an output *FORMAT*. (See `-t`
@@ -6543,7 +6553,8 @@ application, here are some things to keep in mind:
2. Several input formats (including HTML, Org, and RST) support `include`
directives that allow the contents of a file to be included in the
output. An untrusted attacker could use these to view the contents of
- files on the file system.
+ files on the file system. (Using the `--sandbox` option can
+ protect against this threat.)
3. If your application uses pandoc as a Haskell library (rather than
shelling out to the executable), it is possible to use it in a mode
diff --git a/pandoc.cabal b/pandoc.cabal
index b90a61942..da53cb1cd 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -622,6 +622,7 @@ library
Text.Pandoc.Class.PandocMonad,
Text.Pandoc.Class.PandocIO,
Text.Pandoc.Class.PandocPure,
+ Text.Pandoc.Class.Sandbox,
Text.Pandoc.Filter.JSON,
Text.Pandoc.Filter.Lua,
Text.Pandoc.Filter.Path,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 15236896c..f7c1f218d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -139,7 +139,26 @@ convertWithOpts opts = do
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
- (reader, readerExts) <- getReader readerName
+
+ let makeSandboxed pureReader =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureReader of
+ TextReader r -> TextReader $ \o t -> sandbox files (r o t)
+ ByteStringReader r
+ -> ByteStringReader $ \o t -> sandbox files (r o t)
+
+ (reader, readerExts) <-
+ if optSandbox opts
+ then case runPure (getReader readerName) of
+ Left e -> throwError e
+ Right (r, rexts) -> return (makeSandboxed r, rexts)
+ else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
@@ -274,8 +293,9 @@ convertWithOpts opts = do
ByteStringReader r ->
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
>>=
- ( (if isJust (optExtractMedia opts)
- || writerNameBase == "docx" -- for fallback png creation
+ ( (if not (optSandbox opts) &&
+ (isJust (optExtractMedia opts)
+ || writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> return . adjustMetadata (metadataFromFile <>)
@@ -286,7 +306,8 @@ convertWithOpts opts = do
>=> maybe return extractMedia (optExtractMedia opts)
)
- when (writerNameBase == "docx") $ do -- create fallback pngs for svgs
+ when (writerNameBase == "docx" && not (optSandbox opts)) $ do
+ -- create fallback pngs for svgs
items <- mediaItems <$> getMediaBag
forM_ items $ \(fp, mt, bs) ->
case T.takeWhile (/=';') mt of
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index d2c12573c..99017000a 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -188,6 +188,11 @@ options =
(\opt -> return opt { optFileScope = True }))
"" -- "Parse input files before combining"
+ , Option "" ["sandbox"]
+ (NoArg
+ (\opt -> return opt { optSandbox = True }))
+ ""
+
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index d54d932b7..48eb15fdf 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -160,6 +160,7 @@ data Opt = Opt
, optCSL :: Maybe FilePath -- ^ CSL stylesheet
, optBibliography :: [FilePath] -- ^ Bibliography files
, optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations
+ , optSandbox :: Bool
} deriving (Generic, Show)
instance FromYAML (Opt -> Opt) where
@@ -595,6 +596,8 @@ doOpt (k',v) = do
parseYAML v >>= \x -> return (\o -> o{ optEol = x })
"strip-comments" ->
parseYAML v >>= \x -> return (\o -> o { optStripComments = x })
+ "sandbox" ->
+ parseYAML v >>= \x -> return (\o -> o { optSandbox = x })
_ -> failAtNode k' $ "Unknown option " ++ show k
-- | Defaults for command-line options.
@@ -673,6 +676,7 @@ defaultOpts = Opt
, optCSL = Nothing
, optBibliography = []
, optCitationAbbreviations = Nothing
+ , optSandbox = False
}
parseStringKey :: Node Pos -> Parser Text
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 3f83f4b21..7b057713b 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -90,11 +90,31 @@ optToOutputSettings opts = do
then writerName
else T.toLower $ baseWriterName writerName
+ let makeSandboxed pureWriter =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureWriter of
+ TextWriter w -> TextWriter $ \o d -> sandbox files (w o d)
+ ByteStringWriter w
+ -> ByteStringWriter $ \o d -> sandbox files (w o d)
+
+
(writer, writerExts) <-
if ".lua" `T.isSuffixOf` format
then return (TextWriter
(\o d -> writeCustom (T.unpack writerName) o d), mempty)
- else getWriter (T.toLower writerName)
+ else if optSandbox opts
+ then
+ case runPure (getWriter writerName) of
+ Left e -> throwError e
+ Right (w, wexts) ->
+ return (makeSandboxed w, wexts)
+ else getWriter (T.toLower writerName)
let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 2f28ac4dd..6394df251 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Class
, module Text.Pandoc.Class.PandocIO
, module Text.Pandoc.Class.PandocMonad
, module Text.Pandoc.Class.PandocPure
+ , module Text.Pandoc.Class.Sandbox
, Translations
) where
@@ -27,3 +28,4 @@ import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.Class.PandocIO
import Text.Pandoc.Class.PandocPure
import Text.Pandoc.Translations (Translations)
+import Text.Pandoc.Class.Sandbox
diff --git a/src/Text/Pandoc/Class/PandocSandboxed.hs b/src/Text/Pandoc/Class/PandocSandboxed.hs
deleted file mode 100644
index 61ee1f1c6..000000000
--- a/src/Text/Pandoc/Class/PandocSandboxed.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{- |
-Module : Text.Pandoc.Class.PandocIO
-Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
-License : GNU GPL, version 2 or above
-
-Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
-Stability : alpha
-Portability : portable
-
-This module defines @'PandocIO'@, an IO-based instance of the
-@'PandocMonad'@ type class. File, data, and network access all are run
-using IO operators.
--}
-module Text.Pandoc.Class.PandocIO
- ( PandocIO(..)
- , runIO
- , runIOorExplode
- , extractMedia
- ) where
-
-import Control.Monad.Except (ExceptT, MonadError, runExceptT)
-import Control.Monad.IO.Class (MonadIO)
-import Control.Monad.State (StateT, evalStateT, lift, get, put)
-import Data.Default (Default (def))
-import Text.Pandoc.Class.CommonState (CommonState (..))
-import Text.Pandoc.Class.PandocMonad
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import qualified Text.Pandoc.Class.IO as IO
-import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow)
-
--- | Evaluate a 'PandocIO' operation.
-runIO :: PandocIO a -> IO (Either PandocError a)
-runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-
--- | Evaluate a 'PandocIO' operation, handling any errors
--- by exiting with an appropriate message and error status.
-runIOorExplode :: PandocIO a -> IO a
-runIOorExplode ma = runIO ma >>= handleError
-
-newtype PandocIO a = PandocIO {
- unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
- } deriving ( MonadIO
- , Functor
- , Applicative
- , Monad
- , MonadCatch
- , MonadMask
- , MonadThrow
- , MonadError PandocError
- )
-
-instance PandocMonad PandocIO where
- lookupEnv = IO.lookupEnv
- getCurrentTime = IO.getCurrentTime
- getCurrentTimeZone = IO.getCurrentTimeZone
- newStdGen = IO.newStdGen
- newUniqueHash = IO.newUniqueHash
-
- openURL = IO.openURL
- readFileLazy = IO.readFileLazy
- readFileStrict = IO.readFileStrict
- readStdinStrict = IO.readStdinStrict
-
- glob = IO.glob
- fileExists = IO.fileExists
- getDataFileName = IO.getDataFileName
- getModificationTime = IO.getModificationTime
-
- getCommonState = PandocIO $ lift get
- putCommonState = PandocIO . lift . put
-
- logOutput = IO.logOutput
-
--- | Extract media from the mediabag into a directory.
-extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
-extractMedia = IO.extractMedia
diff --git a/src/Text/Pandoc/Class/Sandbox.hs b/src/Text/Pandoc/Class/Sandbox.hs
new file mode 100644
index 000000000..8bc0f1e77
--- /dev/null
+++ b/src/Text/Pandoc/Class/Sandbox.hs
@@ -0,0 +1,50 @@
+{- |
+Module : Text.Pandoc.Class.Sandbox
+Copyright : Copyright (C) 2021 John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : John MacFarlane (<jgm@berkeley.edu>)
+Stability : alpha
+Portability : portable
+
+This module provides a way to run PandocMonad actions in a sandbox
+(pure context, with no IO allowed and access only to designated files).
+-}
+
+module Text.Pandoc.Class.Sandbox
+ ( sandbox )
+where
+
+import Control.Monad (foldM)
+import Control.Monad.Except (throwError)
+import Control.Monad.IO.Class (MonadIO (liftIO))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Class.PandocPure
+import Text.Pandoc.Class.CommonState (CommonState(..))
+import Text.Pandoc.Logging (messageVerbosity)
+
+-- | Lift a PandocPure action into any instance of PandocMonad.
+-- The main computation is done purely, but CommonState is preserved
+-- continuously, and warnings are emitted after the action completes.
+-- The parameter is a list of FilePaths which will be added to the
+-- ersatz file system and be available for reading.
+sandbox :: (PandocMonad m, MonadIO m) => [FilePath] -> PandocPure a -> m a
+sandbox files action = do
+ oldState <- getCommonState
+ tree <- liftIO $ foldM addToFileTree mempty files
+ case runPure (do putCommonState oldState
+ modifyPureState $ \ps -> ps{ stFiles = tree }
+ result <- action
+ st <- getCommonState
+ return (st, result)) of
+ Left e -> throwError e
+ Right (st, result) -> do
+ putCommonState st
+ let verbosity = stVerbosity st
+ -- emit warnings, since these are not printed in runPure
+ let newMessages = reverse $ take
+ (length (stLog st) - length (stLog oldState)) (stLog st)
+ mapM_ logOutput
+ (filter ((<= verbosity) . messageVerbosity) newMessages)
+ return result
+