aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-08-21 15:30:13 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-08-28 22:31:42 -0700
commitd6d7c9620abddc5e5e45450c091bc8a73bac8f66 (patch)
tree2dd3e01150a5611f5bb86cd08239de7f5eca3106
parentb76796eae8ce842f8414cca8cd8e3b55be513694 (diff)
downloadpandoc-d6d7c9620abddc5e5e45450c091bc8a73bac8f66.tar.gz
Add `--sandbox` option.
+ Add sandbox feature for readers. When this option is used, readers and writers only have access to input files (and other files specified directly on command line). This restriction is enforced in the type system. + Filters, PDF production, custom writers are unaffected. This feature only insulates the actual readers and writers, not the pipeline around them in Text.Pandoc.App. + Note that when `--sandboxed` is specified, readers won't have access to the resource path, nor will anything have access to the user data directory. + Add module Text.Pandoc.Class.Sandbox, defining `sandbox`. Exported via Text.Pandoc.Class. [API change] Closes #5045.
-rw-r--r--MANUAL.txt13
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/App.hs29
-rw-r--r--src/Text/Pandoc/App/CommandLineOptions.hs5
-rw-r--r--src/Text/Pandoc/App/Opt.hs4
-rw-r--r--src/Text/Pandoc/App/OutputSettings.hs22
-rw-r--r--src/Text/Pandoc/Class.hs2
-rw-r--r--src/Text/Pandoc/Class/PandocSandboxed.hs79
-rw-r--r--src/Text/Pandoc/Class/Sandbox.hs50
9 files changed, 120 insertions, 85 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 5dc35c8ff..75e74f1cd 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -730,6 +730,16 @@ header when requesting a document from a URL:
document in standalone mode. If no *VAL* is specified, the
key will be given the value `true`.
+`--sandbox`
+
+: Run pandoc in a sandbox, limiting IO operations in readers
+ and writers to reading the files specified on the command line.
+ Note that this option does not limit IO operations by
+ filters or in the production of PDF documents. But it does
+ offer security against, for example, disclosure of files
+ through the use of `include` directives. Anyone using
+ pandoc on untrusted user input should use this option.
+
`-D` *FORMAT*, `--print-default-template=`*FORMAT*
: Print the system default template for an output *FORMAT*. (See `-t`
@@ -6543,7 +6553,8 @@ application, here are some things to keep in mind:
2. Several input formats (including HTML, Org, and RST) support `include`
directives that allow the contents of a file to be included in the
output. An untrusted attacker could use these to view the contents of
- files on the file system.
+ files on the file system. (Using the `--sandbox` option can
+ protect against this threat.)
3. If your application uses pandoc as a Haskell library (rather than
shelling out to the executable), it is possible to use it in a mode
diff --git a/pandoc.cabal b/pandoc.cabal
index b90a61942..da53cb1cd 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -622,6 +622,7 @@ library
Text.Pandoc.Class.PandocMonad,
Text.Pandoc.Class.PandocIO,
Text.Pandoc.Class.PandocPure,
+ Text.Pandoc.Class.Sandbox,
Text.Pandoc.Filter.JSON,
Text.Pandoc.Filter.Lua,
Text.Pandoc.Filter.Path,
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 15236896c..f7c1f218d 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -139,7 +139,26 @@ convertWithOpts opts = do
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
- (reader, readerExts) <- getReader readerName
+
+ let makeSandboxed pureReader =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureReader of
+ TextReader r -> TextReader $ \o t -> sandbox files (r o t)
+ ByteStringReader r
+ -> ByteStringReader $ \o t -> sandbox files (r o t)
+
+ (reader, readerExts) <-
+ if optSandbox opts
+ then case runPure (getReader readerName) of
+ Left e -> throwError e
+ Right (r, rexts) -> return (makeSandboxed r, rexts)
+ else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
@@ -274,8 +293,9 @@ convertWithOpts opts = do
ByteStringReader r ->
mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
>>=
- ( (if isJust (optExtractMedia opts)
- || writerNameBase == "docx" -- for fallback png creation
+ ( (if not (optSandbox opts) &&
+ (isJust (optExtractMedia opts)
+ || writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> return . adjustMetadata (metadataFromFile <>)
@@ -286,7 +306,8 @@ convertWithOpts opts = do
>=> maybe return extractMedia (optExtractMedia opts)
)
- when (writerNameBase == "docx") $ do -- create fallback pngs for svgs
+ when (writerNameBase == "docx" && not (optSandbox opts)) $ do
+ -- create fallback pngs for svgs
items <- mediaItems <$> getMediaBag
forM_ items $ \(fp, mt, bs) ->
case T.takeWhile (/=';') mt of
diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs
index d2c12573c..99017000a 100644
--- a/src/Text/Pandoc/App/CommandLineOptions.hs
+++ b/src/Text/Pandoc/App/CommandLineOptions.hs
@@ -188,6 +188,11 @@ options =
(\opt -> return opt { optFileScope = True }))
"" -- "Parse input files before combining"
+ , Option "" ["sandbox"]
+ (NoArg
+ (\opt -> return opt { optSandbox = True }))
+ ""
+
, Option "s" ["standalone"]
(NoArg
(\opt -> return opt { optStandalone = True }))
diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs
index d54d932b7..48eb15fdf 100644
--- a/src/Text/Pandoc/App/Opt.hs
+++ b/src/Text/Pandoc/App/Opt.hs
@@ -160,6 +160,7 @@ data Opt = Opt
, optCSL :: Maybe FilePath -- ^ CSL stylesheet
, optBibliography :: [FilePath] -- ^ Bibliography files
, optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations
+ , optSandbox :: Bool
} deriving (Generic, Show)
instance FromYAML (Opt -> Opt) where
@@ -595,6 +596,8 @@ doOpt (k',v) = do
parseYAML v >>= \x -> return (\o -> o{ optEol = x })
"strip-comments" ->
parseYAML v >>= \x -> return (\o -> o { optStripComments = x })
+ "sandbox" ->
+ parseYAML v >>= \x -> return (\o -> o { optSandbox = x })
_ -> failAtNode k' $ "Unknown option " ++ show k
-- | Defaults for command-line options.
@@ -673,6 +676,7 @@ defaultOpts = Opt
, optCSL = Nothing
, optBibliography = []
, optCitationAbbreviations = Nothing
+ , optSandbox = False
}
parseStringKey :: Node Pos -> Parser Text
diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs
index 3f83f4b21..7b057713b 100644
--- a/src/Text/Pandoc/App/OutputSettings.hs
+++ b/src/Text/Pandoc/App/OutputSettings.hs
@@ -90,11 +90,31 @@ optToOutputSettings opts = do
then writerName
else T.toLower $ baseWriterName writerName
+ let makeSandboxed pureWriter =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureWriter of
+ TextWriter w -> TextWriter $ \o d -> sandbox files (w o d)
+ ByteStringWriter w
+ -> ByteStringWriter $ \o d -> sandbox files (w o d)
+
+
(writer, writerExts) <-
if ".lua" `T.isSuffixOf` format
then return (TextWriter
(\o d -> writeCustom (T.unpack writerName) o d), mempty)
- else getWriter (T.toLower writerName)
+ else if optSandbox opts
+ then
+ case runPure (getWriter writerName) of
+ Left e -> throwError e
+ Right (w, wexts) ->
+ return (makeSandboxed w, wexts)
+ else getWriter (T.toLower writerName)
let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput
diff --git a/src/Text/Pandoc/Class.hs b/src/Text/Pandoc/Class.hs
index 2f28ac4dd..6394df251 100644
--- a/src/Text/Pandoc/Class.hs
+++ b/src/Text/Pandoc/Class.hs
@@ -19,6 +19,7 @@ module Text.Pandoc.Class
, module Text.Pandoc.Class.PandocIO
, module Text.Pandoc.Class.PandocMonad
, module Text.Pandoc.Class.PandocPure
+ , module Text.Pandoc.Class.Sandbox
, Translations
) where
@@ -27,3 +28,4 @@ import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.Class.PandocIO
import Text.Pandoc.Class.PandocPure
import Text.Pandoc.Translations (Translations)
+import Text.Pandoc.Class.Sandbox
diff --git a/src/Text/Pandoc/Class/PandocSandboxed.hs b/src/Text/Pandoc/Class/PandocSandboxed.hs
deleted file mode 100644
index 61ee1f1c6..000000000
--- a/src/Text/Pandoc/Class/PandocSandboxed.hs
+++ /dev/null
@@ -1,79 +0,0 @@
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{- |
-Module : Text.Pandoc.Class.PandocIO
-Copyright : Copyright (C) 2016-2020 Jesse Rosenthal, John MacFarlane
-License : GNU GPL, version 2 or above
-
-Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
-Stability : alpha
-Portability : portable
-
-This module defines @'PandocIO'@, an IO-based instance of the
-@'PandocMonad'@ type class. File, data, and network access all are run
-using IO operators.
--}
-module Text.Pandoc.Class.PandocIO
- ( PandocIO(..)
- , runIO
- , runIOorExplode
- , extractMedia
- ) where
-
-import Control.Monad.Except (ExceptT, MonadError, runExceptT)
-import Control.Monad.IO.Class (MonadIO)
-import Control.Monad.State (StateT, evalStateT, lift, get, put)
-import Data.Default (Default (def))
-import Text.Pandoc.Class.CommonState (CommonState (..))
-import Text.Pandoc.Class.PandocMonad
-import Text.Pandoc.Definition
-import Text.Pandoc.Error
-import qualified Text.Pandoc.Class.IO as IO
-import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow)
-
--- | Evaluate a 'PandocIO' operation.
-runIO :: PandocIO a -> IO (Either PandocError a)
-runIO ma = flip evalStateT def $ runExceptT $ unPandocIO ma
-
--- | Evaluate a 'PandocIO' operation, handling any errors
--- by exiting with an appropriate message and error status.
-runIOorExplode :: PandocIO a -> IO a
-runIOorExplode ma = runIO ma >>= handleError
-
-newtype PandocIO a = PandocIO {
- unPandocIO :: ExceptT PandocError (StateT CommonState IO) a
- } deriving ( MonadIO
- , Functor
- , Applicative
- , Monad
- , MonadCatch
- , MonadMask
- , MonadThrow
- , MonadError PandocError
- )
-
-instance PandocMonad PandocIO where
- lookupEnv = IO.lookupEnv
- getCurrentTime = IO.getCurrentTime
- getCurrentTimeZone = IO.getCurrentTimeZone
- newStdGen = IO.newStdGen
- newUniqueHash = IO.newUniqueHash
-
- openURL = IO.openURL
- readFileLazy = IO.readFileLazy
- readFileStrict = IO.readFileStrict
- readStdinStrict = IO.readStdinStrict
-
- glob = IO.glob
- fileExists = IO.fileExists
- getDataFileName = IO.getDataFileName
- getModificationTime = IO.getModificationTime
-
- getCommonState = PandocIO $ lift get
- putCommonState = PandocIO . lift . put
-
- logOutput = IO.logOutput
-
--- | Extract media from the mediabag into a directory.
-extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc
-extractMedia = IO.extractMedia
diff --git a/src/Text/Pandoc/Class/Sandbox.hs b/src/Text/Pandoc/Class/Sandbox.hs
new file mode 100644
index 000000000..8bc0f1e77
--- /dev/null
+++ b/src/Text/Pandoc/Class/Sandbox.hs
@@ -0,0 +1,50 @@
+{- |
+Module : Text.Pandoc.Class.Sandbox
+Copyright : Copyright (C) 2021 John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : John MacFarlane (<jgm@berkeley.edu>)
+Stability : alpha
+Portability : portable
+
+This module provides a way to run PandocMonad actions in a sandbox
+(pure context, with no IO allowed and access only to designated files).
+-}
+
+module Text.Pandoc.Class.Sandbox
+ ( sandbox )
+where
+
+import Control.Monad (foldM)
+import Control.Monad.Except (throwError)
+import Control.Monad.IO.Class (MonadIO (liftIO))
+import Text.Pandoc.Class.PandocMonad
+import Text.Pandoc.Class.PandocPure
+import Text.Pandoc.Class.CommonState (CommonState(..))
+import Text.Pandoc.Logging (messageVerbosity)
+
+-- | Lift a PandocPure action into any instance of PandocMonad.
+-- The main computation is done purely, but CommonState is preserved
+-- continuously, and warnings are emitted after the action completes.
+-- The parameter is a list of FilePaths which will be added to the
+-- ersatz file system and be available for reading.
+sandbox :: (PandocMonad m, MonadIO m) => [FilePath] -> PandocPure a -> m a
+sandbox files action = do
+ oldState <- getCommonState
+ tree <- liftIO $ foldM addToFileTree mempty files
+ case runPure (do putCommonState oldState
+ modifyPureState $ \ps -> ps{ stFiles = tree }
+ result <- action
+ st <- getCommonState
+ return (st, result)) of
+ Left e -> throwError e
+ Right (st, result) -> do
+ putCommonState st
+ let verbosity = stVerbosity st
+ -- emit warnings, since these are not printed in runPure
+ let newMessages = reverse $ take
+ (length (stLog st) - length (stLog oldState)) (stLog st)
+ mapM_ logOutput
+ (filter ((<= verbosity) . messageVerbosity) newMessages)
+ return result
+