aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/App.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/App.hs')
-rw-r--r--src/Text/Pandoc/App.hs274
1 files changed, 166 insertions, 108 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 98b072ffb..9eb9c2cf3 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE TupleSections #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
@@ -25,9 +26,9 @@ module Text.Pandoc.App (
, applyFilters
) where
import qualified Control.Exception as E
-import Control.Monad ( (>=>), when )
+import Control.Monad ( (>=>), when, forM_ )
import Control.Monad.Trans ( MonadIO(..) )
-import Control.Monad.Except (throwError)
+import Control.Monad.Except (throwError, catchError)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
@@ -38,17 +39,20 @@ import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TE
+import qualified Data.Text.Encoding as TSE
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Text.Encoding.Error as TSE
import Network.URI (URI (..), parseURI)
import System.Directory (doesDirectoryExist)
import System.Exit (exitSuccess)
-import System.FilePath ( takeBaseName, takeExtension )
+import System.FilePath ( takeBaseName, takeExtension)
import System.IO (nativeNewline, stdout)
import qualified System.IO as IO (Newline (..))
import Text.Pandoc
import Text.Pandoc.Builder (setMeta)
-import Text.Pandoc.MIME (getCharset)
+import Text.Pandoc.MediaBag (mediaItems)
+import Text.Pandoc.MIME (getCharset, MimeType)
+import Text.Pandoc.Image (svgToPng)
import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths)
import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts,
IpynbOutput (..))
@@ -64,6 +68,7 @@ import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
defaultUserDataDir, tshow)
import Text.Pandoc.Writers.Shared (lookupMetaString)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
+import Text.Pandoc.Readers.Custom (readCustom)
import qualified Text.Pandoc.UTF8 as UTF8
#ifndef _WINDOWS
import System.Posix.IO (stdOutput)
@@ -94,40 +99,24 @@ convertWithOpts opts = do
let sources = case optInputFiles opts of
Just xs | not (optIgnoreArgs opts) -> xs
_ -> ["-"]
-
- let runIO' :: PandocIO a -> IO a
- runIO' f = do
- (res, reports) <- runIOorExplode $ do
- setTrace (optTrace opts)
- setVerbosity verbosity
- x <- f
- rs <- getLog
- return (x, rs)
- case optLogFile opts of
- Nothing -> return ()
- Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
- let isWarning msg = messageVerbosity msg == WARNING
- when (optFailIfWarnings opts && any isWarning reports) $
- E.throwIO PandocFailOnWarningError
- return res
-
- let eol = case optEol opts of
- CRLF -> IO.CRLF
- LF -> IO.LF
- Native -> nativeNewline
#ifdef _WINDOWS
let istty = True
#else
istty <- liftIO $ queryTerminal stdOutput
#endif
- runIO' $ do
+ res <- runIO $ do
+
+ setTrace (optTrace opts)
+ setVerbosity verbosity
setUserDataDir datadir
setResourcePath (optResourcePath opts)
setInputFiles (fromMaybe ["-"] (optInputFiles opts))
setOutputFile (optOutputFile opts)
+ inputs <- readSources sources
+
-- assign reader and writer based on options and filenames
readerName <- case optFrom opts of
Just f -> return f
@@ -151,21 +140,28 @@ convertWithOpts opts = do
<> "` instead of `pandoc " <> inputFile <> " -o " <> outputFile <> "`."
_ -> return ()
- (reader :: Reader PandocIO, readerExts) <- getReader readerName
-
- let convertTabs = tabFilter (if optPreserveTabs opts ||
- readerNameBase == "t2t" ||
- readerNameBase == "man"
- then 0
- else optTabStop opts)
-
-
- let readSources :: [FilePath] -> PandocIO [(FilePath, Text)]
- readSources srcs =
- mapM (\fp -> do
- t <- readSource fp
- return (if fp == "-" then "" else fp, convertTabs t)) srcs
+ let makeSandboxed pureReader =
+ let files = maybe id (:) (optReferenceDoc opts) .
+ maybe id (:) (optEpubMetadata opts) .
+ maybe id (:) (optEpubCoverImage opts) .
+ maybe id (:) (optCSL opts) .
+ maybe id (:) (optCitationAbbreviations opts) $
+ optEpubFonts opts ++
+ optBibliography opts
+ in case pureReader of
+ TextReader r -> TextReader $ \o t -> sandbox files (r o t)
+ ByteStringReader r
+ -> ByteStringReader $ \o t -> sandbox files (r o t)
+
+ (reader, readerExts) <-
+ if ".lua" `T.isSuffixOf` readerName
+ then return (TextReader (readCustom (T.unpack readerName)), mempty)
+ else if optSandbox opts
+ then case runPure (getReader readerName) of
+ Left e -> throwError e
+ Right (r, rexts) -> return (makeSandboxed r, rexts)
+ else getReader readerName
outputSettings <- optToOutputSettings opts
let format = outputFormat outputSettings
@@ -224,7 +220,7 @@ convertWithOpts opts = do
case optMetadataFiles opts of
[] -> return mempty
paths -> mconcat <$>
- mapM (\path -> do raw <- readFileLazy path
+ mapM (\path -> do raw <- readFileStrict path
yamlToMeta readerOpts (Just path) raw) paths
let transforms = (case optShiftHeadingLevelBy opts of
@@ -254,20 +250,11 @@ convertWithOpts opts = do
_ -> Format format) :))
$ []
- let sourceToDoc :: [FilePath] -> PandocIO Pandoc
- sourceToDoc sources' =
- case reader of
- TextReader r
- | readerNameBase == "json" ->
- mconcat <$> mapM (readSource >=> r readerOpts) sources'
- | optFileScope opts ->
- -- Read source and convert tabs (see #6709)
- let readSource' = fmap convertTabs . readSource
- in mconcat <$> mapM (readSource' >=> r readerOpts) sources'
- | otherwise ->
- readSources sources' >>= r readerOpts
- ByteStringReader r ->
- mconcat <$> mapM (readFile' >=> r readerOpts) sources'
+ let convertTabs = tabFilter (if optPreserveTabs opts ||
+ readerNameBase == "t2t" ||
+ readerNameBase == "man"
+ then 0
+ else optTabStop opts)
when (readerNameBase == "markdown_github" ||
@@ -293,8 +280,25 @@ convertWithOpts opts = do
maybe id (setMeta "citation-abbreviations")
(optCitationAbbreviations opts) $ mempty
- doc <- sourceToDoc sources >>=
- ( (if isJust (optExtractMedia opts)
+ doc <- (case reader of
+ TextReader r
+ | readerNameBase == "json" ->
+ mconcat <$>
+ mapM (inputToText convertTabs
+ >=> r readerOpts . (:[])) inputs
+ | optFileScope opts ->
+ mconcat <$> mapM
+ (inputToText convertTabs
+ >=> r readerOpts . (:[]))
+ inputs
+ | otherwise -> mapM (inputToText convertTabs) inputs
+ >>= r readerOpts
+ ByteStringReader r ->
+ mconcat <$> mapM (r readerOpts . inputToLazyByteString) inputs)
+ >>=
+ ( (if not (optSandbox opts) &&
+ (isJust (optExtractMedia opts)
+ || writerNameBase == "docx") -- for fallback pngs
then fillMediaBag
else return)
>=> return . adjustMetadata (metadataFromFile <>)
@@ -305,14 +309,28 @@ convertWithOpts opts = do
>=> maybe return extractMedia (optExtractMedia opts)
)
- case writer of
- ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile
+ when (writerNameBase == "docx" && not (optSandbox opts)) $ do
+ -- create fallback pngs for svgs
+ items <- mediaItems <$> getMediaBag
+ forM_ items $ \(fp, mt, bs) ->
+ case T.takeWhile (/=';') mt of
+ "image/svg+xml" -> do
+ res <- svgToPng (writerDpi writerOptions) bs
+ case res of
+ Right bs' -> do
+ let fp' = fp <> ".png"
+ insertMedia fp' (Just "image/png") bs'
+ Left e -> report $ CouldNotConvertImage (T.pack fp) (tshow e)
+ _ -> return ()
+
+ output <- case writer of
+ ByteStringWriter f -> BinaryOutput <$> f writerOptions doc
TextWriter f -> case outputPdfProgram outputSettings of
Just pdfProg -> do
res <- makePDF pdfProg (optPdfEngineOpts opts) f
writerOptions doc
case res of
- Right pdf -> writeFnBinary outputFile pdf
+ Right pdf -> return $ BinaryOutput pdf
Left err' -> throwError $ PandocPDFError $
TL.toStrict (TE.decodeUtf8With TE.lenientDecode err')
@@ -321,11 +339,32 @@ convertWithOpts opts = do
| standalone = t
| T.null t || T.last t /= '\n' = t <> T.singleton '\n'
| otherwise = t
- output <- ensureNl <$> f writerOptions doc
- writerFn eol outputFile =<<
- if optSelfContained opts && htmlFormat format
- then makeSelfContained output
- else return output
+ textOutput <- ensureNl <$> f writerOptions doc
+ if optSelfContained opts && htmlFormat format
+ then TextOutput <$> makeSelfContained textOutput
+ else return $ TextOutput textOutput
+ reports <- getLog
+ return (output, reports)
+
+ case res of
+ Left e -> E.throwIO e
+ Right (output, reports) -> do
+ case optLogFile opts of
+ Nothing -> return ()
+ Just logfile -> BL.writeFile logfile (encodeLogMessages reports)
+ let isWarning msg = messageVerbosity msg == WARNING
+ when (optFailIfWarnings opts && any isWarning reports) $
+ E.throwIO PandocFailOnWarningError
+ let eol = case optEol opts of
+ CRLF -> IO.CRLF
+ LF -> IO.LF
+ Native -> nativeNewline
+ case output of
+ TextOutput t -> writerFn eol outputFile t
+ BinaryOutput bs -> writeFnBinary outputFile bs
+
+data PandocOutput = TextOutput Text | BinaryOutput BL.ByteString
+ deriving (Show)
type Transform = Pandoc -> Pandoc
@@ -344,49 +383,68 @@ adjustMetadata f (Pandoc meta bs) = Pandoc (f meta) bs
applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc
applyTransforms transforms d = return $ foldr ($) d transforms
-readSource :: FilePath -> PandocIO Text
-readSource src = case parseURI src of
- Just u | uriScheme u `elem` ["http:","https:"] ->
- readURI src
- | uriScheme u == "file:" -> liftIO $
- readTextFile (uriPathToPath $ T.pack $ uriPath u)
- _ -> liftIO $ readTextFile src
- where readTextFile :: FilePath -> IO Text
- readTextFile fp = do
- bs <- if src == "-"
- then BS.getContents
- else BS.readFile fp
- E.catch (return $! UTF8.toText bs)
- (\e -> E.throwIO $ case e of
- TSE.DecodeError _ (Just w) ->
- case BS.elemIndex w bs of
- Just offset ->
- PandocUTF8DecodingError (T.pack fp) offset w
- _ -> PandocUTF8DecodingError (T.pack fp) 0 w
- _ -> PandocAppError (tshow e))
-
-readURI :: FilePath -> PandocIO Text
-readURI src = do
- (bs, mt) <- openURL (T.pack src)
+readSources :: (PandocMonad m, MonadIO m)
+ => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))]
+readSources srcs =
+ mapM (\fp -> do t <- readSource fp
+ return (if fp == "-" then "" else fp, t)) srcs
+
+readSource :: (PandocMonad m, MonadIO m)
+ => FilePath -> m (BS.ByteString, Maybe MimeType)
+readSource "-" = (,Nothing) <$> readStdinStrict
+readSource src =
+ case parseURI src of
+ Just u | uriScheme u `elem` ["http:","https:"] -> openURL (T.pack src)
+ | uriScheme u == "file:" ->
+ (,Nothing) <$>
+ readFileStrict (uriPathToPath $ T.pack $ uriPath u)
+ _ -> (,Nothing) <$> readFileStrict src
+
+utf8ToText :: PandocMonad m => FilePath -> BS.ByteString -> m Text
+utf8ToText fp bs =
+ case TSE.decodeUtf8' . dropBOM $ bs of
+ Left (TSE.DecodeError _ (Just w)) ->
+ case BS.elemIndex w bs of
+ Just offset -> throwError $ PandocUTF8DecodingError (T.pack fp) offset w
+ Nothing -> throwError $ PandocUTF8DecodingError (T.pack fp) 0 w
+ Left e -> throwError $ PandocAppError (tshow e)
+ Right t -> return t
+ where
+ dropBOM bs' =
+ if "\xEF\xBB\xBF" `BS.isPrefixOf` bs'
+ then BS.drop 3 bs'
+ else bs'
+
+
+inputToText :: PandocMonad m
+ => (Text -> Text)
+ -> (FilePath, (BS.ByteString, Maybe MimeType))
+ -> m (FilePath, Text)
+inputToText convTabs (fp, (bs,mt)) =
+ (fp,) . convTabs . T.filter (/='\r') <$>
case mt >>= getCharset of
- Just "UTF-8" -> return $ UTF8.toText bs
+ Just "UTF-8" -> utf8ToText fp bs
Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs
Just charset -> throwError $ PandocUnsupportedCharsetError charset
- Nothing -> liftIO $ -- try first as UTF-8, then as latin1
- E.catch (return $! UTF8.toText bs)
- (\case
- TSE.DecodeError{} ->
- return $ T.pack $ B8.unpack bs
- e -> E.throwIO e)
-
-readFile' :: MonadIO m => FilePath -> m BL.ByteString
-readFile' "-" = liftIO BL.getContents
-readFile' f = liftIO $ BL.readFile f
-
-writeFnBinary :: MonadIO m => FilePath -> BL.ByteString -> m ()
-writeFnBinary "-" = liftIO . BL.putStr
-writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f)
-
-writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m ()
-writerFn eol "-" = liftIO . UTF8.putStrWith eol
-writerFn eol f = liftIO . UTF8.writeFileWith eol f
+ Nothing -> catchError
+ (utf8ToText fp bs)
+ (\case
+ PandocUTF8DecodingError{} -> do
+ report $ NotUTF8Encoded
+ (if null fp
+ then "input"
+ else fp)
+ return $ T.pack $ B8.unpack bs
+ e -> throwError e)
+
+inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType))
+ -> BL.ByteString
+inputToLazyByteString (_, (bs,_)) = BL.fromStrict bs
+
+writeFnBinary :: FilePath -> BL.ByteString -> IO ()
+writeFnBinary "-" = BL.putStr
+writeFnBinary f = BL.writeFile (UTF8.encodePath f)
+
+writerFn :: IO.Newline -> FilePath -> Text -> IO ()
+writerFn eol "-" = UTF8.putStrWith eol
+writerFn eol f = UTF8.writeFileWith eol f