From c7e3c1ec1797d633c181e5701c94f4169b0e5471 Mon Sep 17 00:00:00 2001 From: Mauro Bieg Date: Tue, 12 Sep 2017 05:18:42 +0200 Subject: Support for PDF generation via `weasyprint` and `prince` (#3909) * Rename --latex-engine to --pdf-engine * In `Text.Pandoc.Options.WriterOptions`, rename `writerLaTeXEngine` to `writerPdfEngine` and `writerLaTeXArgs` to `writerPdfArgs`. * Add support for `weasyprint` and `prince`, in addition to `wkhtmltopdf`, for PDF generation via HTML (closes #3906). * `Text.Pandoc.PDF.html2pdf`: use stdin instead of intermediate HTML file --- MANUAL.txt | 29 ++++++----- data/bash_completion.tpl | 4 +- src/Text/Pandoc/App.hs | 118 +++++++++++++++++++++++++++++---------------- src/Text/Pandoc/Error.hs | 2 +- src/Text/Pandoc/Options.hs | 4 +- src/Text/Pandoc/PDF.hs | 46 +++++++++++------- 6 files changed, 127 insertions(+), 76 deletions(-) diff --git a/MANUAL.txt b/MANUAL.txt index ee50d7225..0b0fadd22 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -186,12 +186,12 @@ Creating a PDF -------------- To produce a PDF, specify an output file with a `.pdf` extension. -By default, pandoc will use LaTeX to convert it to PDF: +By default, pandoc will use LaTeX to create the PDF: pandoc test.txt -o test.pdf Production of a PDF requires that a LaTeX engine be installed (see -`--latex-engine`, below), and assumes that the following LaTeX packages +`--pdf-engine`, below), and assumes that the following LaTeX packages are available: [`amsfonts`], [`amsmath`], [`lm`], [`unicode-math`], [`ifxetex`], [`ifluatex`], [`listings`] (if the `--listings` option is used), [`fancyvrb`], [`longtable`], @@ -210,16 +210,19 @@ if added to the template or included in any header file. The optionally be used for [citation rendering]. These are included with all recent versions of [TeX Live]. -Alternatively, pandoc can use ConTeXt, `wkhtmltopdf`, or -`pdfroff` to create a PDF. To do this, specify an output file -with a `.pdf` extension, as before, but add `-t context`, `-t -html5`, or `-t ms` to the command line. +Alternatively, pandoc can use [ConTeXt], `pdfroff`, or any of the +following HTML/CSS-to-PDF-engines, to create a PDF: [`wkhtmltopdf`], +[`weasyprint`] or `prince`. +To do this, specify an output file with a `.pdf` extension, as before, +but add the `--pdf-engine` option or `-t context`, `-t html`, or `-t ms` +to the command line (`-t html` defaults to `--pdf-engine=wkhtmltopdf`). PDF output can be controlled using [variables for LaTeX] (if LaTeX is used) and [variables for ConTeXt] (if ConTeXt is used). +When using an HTML/CSS-to-PDF-engine, `--css` affects the output. If `wkhtmltopdf` is used, then the variables `margin-left`, `margin-right`, `margin-top`, `margin-bottom`, and `papersize` -will affect the output, as will `--css`. +will affect the output. [`amsfonts`]: https://ctan.org/pkg/amsfonts [`amsmath`]: https://ctan.org/pkg/amsmath @@ -251,6 +254,8 @@ will affect the output, as will `--css`. [`bibtex`]: https://ctan.org/pkg/bibtex [`biber`]: https://ctan.org/pkg/biber [TeX Live]: http://www.tug.org/texlive/ +[`wkhtmltopdf`]: https://wkhtmltopdf.org +[`weasyprint`]: http://weasyprint.org Options ======= @@ -990,15 +995,15 @@ Options affecting specific writers the EPUB-specific contents. The default is `EPUB`. To put the EPUB contents in the top level, use an empty string. -`--latex-engine=pdflatex`|`lualatex`|`xelatex` +`--pdf-engine=pdflatex`|`lualatex`|`xelatex`|`wkhtmltopdf`|`weasyprint`|`prince`|`context`|`pdfroff` -: Use the specified LaTeX engine when producing PDF output. +: Use the specified engine when producing PDF output. The default is `pdflatex`. If the engine is not in your PATH, the full path of the engine may be specified here. -`--latex-engine-opt=`*STRING* +`--pdf-engine-opt=`*STRING* -: Use the given string as a command-line argument to the `latex-engine`. +: Use the given string as a command-line argument to the `pdf-engine`. If used multiple times, the arguments are provided with spaces between them. Note that no check for duplicate options is done. @@ -1300,7 +1305,7 @@ Language variables [Unicode Bidirectional Algorithm]. When using LaTeX for bidirectional documents, only the `xelatex` engine - is fully supported (use `--latex-engine=xelatex`). + is fully supported (use `--pdf-engine=xelatex`). [BCP 47]: https://tools.ietf.org/html/bcp47 [Unicode Bidirectional Algorithm]: http://www.w3.org/International/articles/inline-bidi-markup/uba-basics diff --git a/data/bash_completion.tpl b/data/bash_completion.tpl index 317fd5095..c9145c06c 100644 --- a/data/bash_completion.tpl +++ b/data/bash_completion.tpl @@ -29,8 +29,8 @@ _pandoc() COMPREPLY=( $(compgen -W "references javascript none" -- ${cur}) ) return 0 ;; - --latex-engine) - COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex" -- ${cur}) ) + --pdf-engine) + COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex wkhtmltopdf weasyprint prince context pdfroff" -- ${cur}) ) return 0 ;; --print-default-data-file) diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 367a1f550..93a2a9da6 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -121,6 +121,53 @@ parseOptions options' defaults = do opts <- foldl (>>=) (return defaults) actions return (opts{ optInputFiles = args }) +latexEngines :: [String] +latexEngines = ["pdflatex", "lualatex", "xelatex"] + +htmlEngines :: [String] +htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] + +pdfEngines :: [String] +pdfEngines = latexEngines ++ htmlEngines ++ ["context", "pdfroff"] + +pdfWriterAndProg :: Maybe String -- ^ user-specified writer name + -> Maybe String -- ^ user-specified pdf-engine + -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) +pdfWriterAndProg mWriter mEngine = do + let panErr msg = liftIO $ E.throwIO $ PandocAppError msg + case go mWriter mEngine of + (Right writ, Right prog) -> return (writ, Just prog) + (Left err, _) -> panErr err + (_, Left err) -> panErr err + where + go Nothing Nothing = (Right "latex", Right $ head latexEngines) + go (Just writer) Nothing = (Right writer, engineForWriter writer) + go Nothing (Just engine) = (writerForEngine engine, Right engine) + go (Just writer) (Just engine) = + let (Right shouldFormat) = writerForEngine engine + userFormat = case map toLower writer of + "html5" -> "html" + x -> x + in if userFormat == shouldFormat + then (Right writer, Right engine) + else (Left $ "pdf-engine " ++ engine ++ " is not compatible with output format " + ++ writer ++ ", please use `-t " ++ shouldFormat ++ "`", Left "") + + writerForEngine "context" = Right "context" + writerForEngine "pdfroff" = Right "ms" + writerForEngine en + | takeBaseName en `elem` latexEngines = Right "latex" + | takeBaseName en `elem` htmlEngines = Right "html" + writerForEngine _ = Left "pdf-engine not known" + + engineForWriter "context" = Right "context" + engineForWriter "ms" = Right "pdfroff" + engineForWriter "latex" = Right $ head latexEngines + engineForWriter format + | format `elem` ["html", "html5"] = Right $ head htmlEngines + | otherwise = Left $ "cannot produce pdf output with output format " ++ format + + convertWithOpts :: Opt -> IO () convertWithOpts opts = do let args = optInputFiles opts @@ -171,18 +218,16 @@ convertWithOpts opts = do else "markdown") sources Just x -> map toLower x - let writerName = case optWriter opts of - Nothing -> defaultWriterName outputFile - Just x -> map toLower x - let format = takeWhile (`notElem` ['+','-']) - $ takeFileName writerName -- in case path to lua script + let nonPdfWriterName Nothing = defaultWriterName outputFile + nonPdfWriterName (Just x) = map toLower x let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + (writerName, maybePdfProg) <- if pdfOutput + then pdfWriterAndProg (optWriter opts) (optPdfEngine opts) + else return (nonPdfWriterName $ optWriter opts, Nothing) - let laTeXOutput = format `elem` ["latex", "beamer"] - let conTeXtOutput = format == "context" - let html5Output = format == "html5" || format == "html" - let msOutput = format == "ms" + let format = takeWhile (`notElem` ['+','-']) + $ takeFileName writerName -- in case path to lua script -- disabling the custom writer for now (writer, writerExts) <- @@ -417,7 +462,7 @@ convertWithOpts opts = do , writerEpubChapterLevel = optEpubChapterLevel opts , writerTOCDepth = optTOCDepth opts , writerReferenceDoc = optReferenceDoc opts - , writerLaTeXArgs = optLaTeXEngineArgs opts + , writerPdfArgs = optPdfEngineArgs opts , writerSyntaxMap = syntaxMap } @@ -475,27 +520,14 @@ convertWithOpts opts = do case writer of ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile - TextWriter f - | pdfOutput -> do - -- make sure writer is latex, beamer, context, html5 or ms - unless (laTeXOutput || conTeXtOutput || html5Output || - msOutput) $ - liftIO $ E.throwIO $ PandocAppError $ - "cannot produce pdf output with " ++ format ++ " writer" - - let pdfprog = case () of - _ | conTeXtOutput -> "context" - | html5Output -> "wkhtmltopdf" - | html5Output -> "wkhtmltopdf" - | msOutput -> "pdfroff" - | otherwise -> optLaTeXEngine opts - - res <- makePDF pdfprog f writerOptions verbosity media doc + TextWriter f -> case maybePdfProg of + Just pdfProg -> do + res <- makePDF pdfProg f writerOptions verbosity media doc case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> liftIO $ E.throwIO $ PandocPDFError (UTF8.toStringLazy err') - | otherwise -> do + Nothing -> do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] handleEntities = if (htmlFormat || @@ -605,8 +637,8 @@ data Opt = Opt , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites , optListings :: Bool -- ^ Use listings package for code blocks - , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf - , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine + , optPdfEngine :: Maybe String -- ^ Program to use for latex/html -> pdf + , optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 , optAscii :: Bool -- ^ Use ascii characters only in html @@ -681,8 +713,8 @@ defaultOpts = Opt , optDataDir = Nothing , optCiteMethod = Citeproc , optListings = False - , optLaTeXEngine = "pdflatex" - , optLaTeXEngineArgs = [] + , optPdfEngine = Nothing + , optPdfEngineArgs = [] , optSlideLevel = Nothing , optSetextHeaders = True , optAscii = False @@ -778,7 +810,6 @@ defaultWriterName x = ".org" -> "org" ".asciidoc" -> "asciidoc" ".adoc" -> "asciidoc" - ".pdf" -> "latex" ".fb2" -> "fb2" ".opml" -> "opml" ".icml" -> "icml" @@ -1314,23 +1345,24 @@ options = "NUMBER") "" -- "Header level at which to split chapters in EPUB" - , Option "" ["latex-engine"] + , Option "" ["pdf-engine"] (ReqArg (\arg opt -> do let b = takeBaseName arg - if b `elem` ["pdflatex", "lualatex", "xelatex"] - then return opt { optLaTeXEngine = arg } - else E.throwIO $ PandocOptionError "latex-engine must be pdflatex, lualatex, or xelatex.") + if b `elem` pdfEngines + then return opt { optPdfEngine = Just arg } + else E.throwIO $ PandocOptionError $ "pdf-engine must be one of " + ++ intercalate ", " pdfEngines) "PROGRAM") - "" -- "Name of latex program to use in generating PDF" + "" -- "Name of program to use in generating PDF" - , Option "" ["latex-engine-opt"] + , Option "" ["pdf-engine-opt"] (ReqArg (\arg opt -> do - let oldArgs = optLaTeXEngineArgs opt - return opt { optLaTeXEngineArgs = arg : oldArgs }) + let oldArgs = optPdfEngineArgs opt + return opt { optPdfEngineArgs = arg : oldArgs }) "STRING") - "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used" + "" -- "Flags to pass to the PDF-engine, all instances of this option are accumulated and used" , Option "" ["bibliography"] (ReqArg @@ -1590,6 +1622,10 @@ handleUnrecognizedOption "--old-dashes" = ("--old-dashes has been removed. Use +old_dashes extension instead." :) handleUnrecognizedOption "--no-wrap" = ("--no-wrap has been removed. Use --wrap=none instead." :) +handleUnrecognizedOption "--latex-engine" = + ("--latex-engine has been removed. Use --pdf-engine instead." :) +handleUnrecognizedOption "--latex-engine-opt" = + ("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :) handleUnrecognizedOption "--chapters" = ("--chapters has been removed. Use --top-level-division=chapter instead." :) handleUnrecognizedOption "--reference-docx" = diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 24186720c..2bd8bef0a 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -96,7 +96,7 @@ handleError (Left e) = PandocSyntaxMapError s -> err 67 s PandocFailOnWarningError -> err 3 "Failing because there were warnings." PandocPDFProgramNotFoundError pdfprog -> err 47 $ - pdfprog ++ " not found. " ++ pdfprog ++ " is needed for pdf output." + pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg PandocFilterError filtername msg -> err 83 $ "Error running filter " ++ filtername ++ ":\n" ++ msg diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 89b26deb0..0bd66d54d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -216,7 +216,7 @@ data WriterOptions = WriterOptions , writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files) , writerTOCDepth :: Int -- ^ Number of levels to include in TOC , writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified - , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine + , writerPdfArgs :: [String] -- ^ Flags to pass to pdf-engine , writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown , writerSyntaxMap :: SyntaxMap } deriving (Show, Data, Typeable, Generic) @@ -252,7 +252,7 @@ instance Default WriterOptions where , writerEpubChapterLevel = 1 , writerTOCDepth = 3 , writerReferenceDoc = Nothing - , writerLaTeXArgs = [] + , writerPdfArgs = [] , writerReferenceLocation = EndOfDocument , writerSyntaxMap = defaultSyntaxMap } diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 4e4c0b2c1..b2b7da54f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -37,6 +37,7 @@ import qualified Control.Exception as E import Control.Monad (unless, when) import Control.Monad.Trans (MonadIO (..)) import qualified Data.Text as T +import qualified Data.Text.IO as TextIO import Data.Text (Text) import qualified Data.ByteString as BS import Data.ByteString.Lazy (ByteString) @@ -78,8 +79,8 @@ changePathSeparators :: FilePath -> FilePath changePathSeparators = intercalate "/" . splitDirectories #endif -makePDF :: String -- ^ pdf creator (pdflatex, lualatex, - -- xelatex, context, wkhtmltopdf, pdfroff) +makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex, + -- wkhtmltopdf, weasyprint, prince, context, pdfroff) -> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer -> WriterOptions -- ^ options -> Verbosity -- ^ verbosity level @@ -94,7 +95,7 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do _ -> [] meta' <- metaToJSON opts (return . stringify) (return . stringify) meta let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd - let args = mathArgs ++ + let args = writerPdfArgs opts ++ mathArgs ++ concatMap toArgs [("page-size", getField "papersize" meta') ,("title", getField "title" meta') @@ -108,11 +109,19 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do (getField "margin-left" meta')) ] source <- writer opts doc - liftIO $ html2pdf verbosity args source + liftIO $ html2pdf verbosity "wkhtmltopdf" args source +makePDF "weasyprint" writer opts verbosity _ doc = do + let args = writerPdfArgs opts + source <- writer opts doc + liftIO $ html2pdf verbosity "weasyprint" args source +makePDF "prince" writer opts verbosity _ doc = do + let args = writerPdfArgs opts + source <- writer opts doc + liftIO $ html2pdf verbosity "prince" args source makePDF "pdfroff" writer opts verbosity _mediabag doc = do source <- writer opts doc let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i", - "--no-toc-relocation"] + "--no-toc-relocation"] ++ writerPdfArgs opts liftIO $ ms2pdf verbosity args source makePDF program writer opts verbosity mediabag doc = do let withTemp = if takeBaseName program == "context" @@ -124,7 +133,7 @@ makePDF program writer opts verbosity mediabag doc = do source <- runIOorExplode $ do setVerbosity verbosity writer opts doc' - let args = writerLaTeXArgs opts + let args = writerPdfArgs opts case takeBaseName program of "context" -> context2pdf verbosity tmpdir source prog | prog `elem` ["pdflatex", "lualatex", "xelatex"] @@ -212,7 +221,7 @@ tex2pdf' verbosity args tmpDir program source = do case logmsg of x | "! Package inputenc Error" `BC.isPrefixOf` x && program /= "xelatex" - -> "\nTry running pandoc with --latex-engine=xelatex." + -> "\nTry running pandoc with --pdf-engine=xelatex." _ -> "" return $ Left $ logmsg <> extramsg (ExitSuccess, Nothing) -> return $ Left "" @@ -347,32 +356,33 @@ ms2pdf verbosity args source = do ExitSuccess -> Right out html2pdf :: Verbosity -- ^ Verbosity level - -> [String] -- ^ Args to wkhtmltopdf + -> String -- ^ Program (wkhtmltopdf, weasyprint or prince) + -> [String] -- ^ Args to program -> Text -- ^ HTML5 source -> IO (Either ByteString ByteString) -html2pdf verbosity args source = do - file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp +html2pdf verbosity program args source = do pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp - BS.writeFile file $ UTF8.fromText source - let programArgs = args ++ [file, pdfFile] + let pdfFileArgName = if program == "prince" + then ["-o"] + else [] + let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile] env' <- getEnvironment when (verbosity >= INFO) $ do putStrLn "[makePDF] Command line:" - putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs) + putStrLn $ program ++ " " ++ unwords (map show programArgs) putStr "\n" putStrLn "[makePDF] Environment:" mapM_ print env' putStr "\n" - putStrLn $ "[makePDF] Contents of " ++ file ++ ":" - BL.readFile file >>= BL.putStr + putStrLn $ "[makePDF] Contents of intermediate HTML:" + TextIO.putStr source putStr "\n" (exit, out) <- E.catch - (pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty) + (pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source) (\(e :: IOError) -> if isDoesNotExistError e then E.throwIO $ - PandocPDFProgramNotFoundError "wkhtml2pdf" + PandocPDFProgramNotFoundError program else E.throwIO e) - removeFile file when (verbosity >= INFO) $ do BL.hPutStr stdout out putStr "\n" -- cgit v1.2.3