aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMauro Bieg <mb21@users.noreply.github.com>2017-09-12 05:18:42 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-09-11 20:18:42 -0700
commitc7e3c1ec1797d633c181e5701c94f4169b0e5471 (patch)
tree90d951a5db9e9682ab0856e0a15c5cf7ac76786d
parentddecd727838c017f376cdd8d51e14db1b9652217 (diff)
downloadpandoc-c7e3c1ec1797d633c181e5701c94f4169b0e5471.tar.gz
Support for PDF generation via `weasyprint` and `prince` (#3909)
* Rename --latex-engine to --pdf-engine * In `Text.Pandoc.Options.WriterOptions`, rename `writerLaTeXEngine` to `writerPdfEngine` and `writerLaTeXArgs` to `writerPdfArgs`. * Add support for `weasyprint` and `prince`, in addition to `wkhtmltopdf`, for PDF generation via HTML (closes #3906). * `Text.Pandoc.PDF.html2pdf`: use stdin instead of intermediate HTML file
-rw-r--r--MANUAL.txt29
-rw-r--r--data/bash_completion.tpl4
-rw-r--r--src/Text/Pandoc/App.hs118
-rw-r--r--src/Text/Pandoc/Error.hs2
-rw-r--r--src/Text/Pandoc/Options.hs4
-rw-r--r--src/Text/Pandoc/PDF.hs46
6 files changed, 127 insertions, 76 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index ee50d7225..0b0fadd22 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -186,12 +186,12 @@ Creating a PDF
--------------
To produce a PDF, specify an output file with a `.pdf` extension.
-By default, pandoc will use LaTeX to convert it to PDF:
+By default, pandoc will use LaTeX to create the PDF:
pandoc test.txt -o test.pdf
Production of a PDF requires that a LaTeX engine be installed (see
-`--latex-engine`, below), and assumes that the following LaTeX packages
+`--pdf-engine`, below), and assumes that the following LaTeX packages
are available: [`amsfonts`], [`amsmath`], [`lm`], [`unicode-math`],
[`ifxetex`], [`ifluatex`], [`listings`] (if the
`--listings` option is used), [`fancyvrb`], [`longtable`],
@@ -210,16 +210,19 @@ if added to the template or included in any header file. The
optionally be used for [citation rendering]. These are included
with all recent versions of [TeX Live].
-Alternatively, pandoc can use ConTeXt, `wkhtmltopdf`, or
-`pdfroff` to create a PDF. To do this, specify an output file
-with a `.pdf` extension, as before, but add `-t context`, `-t
-html5`, or `-t ms` to the command line.
+Alternatively, pandoc can use [ConTeXt], `pdfroff`, or any of the
+following HTML/CSS-to-PDF-engines, to create a PDF: [`wkhtmltopdf`],
+[`weasyprint`] or `prince`.
+To do this, specify an output file with a `.pdf` extension, as before,
+but add the `--pdf-engine` option or `-t context`, `-t html`, or `-t ms`
+to the command line (`-t html` defaults to `--pdf-engine=wkhtmltopdf`).
PDF output can be controlled using [variables for LaTeX] (if
LaTeX is used) and [variables for ConTeXt] (if ConTeXt is used).
+When using an HTML/CSS-to-PDF-engine, `--css` affects the output.
If `wkhtmltopdf` is used, then the variables `margin-left`,
`margin-right`, `margin-top`, `margin-bottom`, and `papersize`
-will affect the output, as will `--css`.
+will affect the output.
[`amsfonts`]: https://ctan.org/pkg/amsfonts
[`amsmath`]: https://ctan.org/pkg/amsmath
@@ -251,6 +254,8 @@ will affect the output, as will `--css`.
[`bibtex`]: https://ctan.org/pkg/bibtex
[`biber`]: https://ctan.org/pkg/biber
[TeX Live]: http://www.tug.org/texlive/
+[`wkhtmltopdf`]: https://wkhtmltopdf.org
+[`weasyprint`]: http://weasyprint.org
Options
=======
@@ -990,15 +995,15 @@ Options affecting specific writers
the EPUB-specific contents. The default is `EPUB`. To put
the EPUB contents in the top level, use an empty string.
-`--latex-engine=pdflatex`|`lualatex`|`xelatex`
+`--pdf-engine=pdflatex`|`lualatex`|`xelatex`|`wkhtmltopdf`|`weasyprint`|`prince`|`context`|`pdfroff`
-: Use the specified LaTeX engine when producing PDF output.
+: Use the specified engine when producing PDF output.
The default is `pdflatex`. If the engine is not in your PATH,
the full path of the engine may be specified here.
-`--latex-engine-opt=`*STRING*
+`--pdf-engine-opt=`*STRING*
-: Use the given string as a command-line argument to the `latex-engine`.
+: Use the given string as a command-line argument to the `pdf-engine`.
If used multiple times, the arguments are provided with spaces between
them. Note that no check for duplicate options is done.
@@ -1300,7 +1305,7 @@ Language variables
[Unicode Bidirectional Algorithm].
When using LaTeX for bidirectional documents, only the `xelatex` engine
- is fully supported (use `--latex-engine=xelatex`).
+ is fully supported (use `--pdf-engine=xelatex`).
[BCP 47]: https://tools.ietf.org/html/bcp47
[Unicode Bidirectional Algorithm]: http://www.w3.org/International/articles/inline-bidi-markup/uba-basics
diff --git a/data/bash_completion.tpl b/data/bash_completion.tpl
index 317fd5095..c9145c06c 100644
--- a/data/bash_completion.tpl
+++ b/data/bash_completion.tpl
@@ -29,8 +29,8 @@ _pandoc()
COMPREPLY=( $(compgen -W "references javascript none" -- ${cur}) )
return 0
;;
- --latex-engine)
- COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex" -- ${cur}) )
+ --pdf-engine)
+ COMPREPLY=( $(compgen -W "pdflatex lualatex xelatex wkhtmltopdf weasyprint prince context pdfroff" -- ${cur}) )
return 0
;;
--print-default-data-file)
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 367a1f550..93a2a9da6 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -121,6 +121,53 @@ parseOptions options' defaults = do
opts <- foldl (>>=) (return defaults) actions
return (opts{ optInputFiles = args })
+latexEngines :: [String]
+latexEngines = ["pdflatex", "lualatex", "xelatex"]
+
+htmlEngines :: [String]
+htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"]
+
+pdfEngines :: [String]
+pdfEngines = latexEngines ++ htmlEngines ++ ["context", "pdfroff"]
+
+pdfWriterAndProg :: Maybe String -- ^ user-specified writer name
+ -> Maybe String -- ^ user-specified pdf-engine
+ -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg)
+pdfWriterAndProg mWriter mEngine = do
+ let panErr msg = liftIO $ E.throwIO $ PandocAppError msg
+ case go mWriter mEngine of
+ (Right writ, Right prog) -> return (writ, Just prog)
+ (Left err, _) -> panErr err
+ (_, Left err) -> panErr err
+ where
+ go Nothing Nothing = (Right "latex", Right $ head latexEngines)
+ go (Just writer) Nothing = (Right writer, engineForWriter writer)
+ go Nothing (Just engine) = (writerForEngine engine, Right engine)
+ go (Just writer) (Just engine) =
+ let (Right shouldFormat) = writerForEngine engine
+ userFormat = case map toLower writer of
+ "html5" -> "html"
+ x -> x
+ in if userFormat == shouldFormat
+ then (Right writer, Right engine)
+ else (Left $ "pdf-engine " ++ engine ++ " is not compatible with output format "
+ ++ writer ++ ", please use `-t " ++ shouldFormat ++ "`", Left "")
+
+ writerForEngine "context" = Right "context"
+ writerForEngine "pdfroff" = Right "ms"
+ writerForEngine en
+ | takeBaseName en `elem` latexEngines = Right "latex"
+ | takeBaseName en `elem` htmlEngines = Right "html"
+ writerForEngine _ = Left "pdf-engine not known"
+
+ engineForWriter "context" = Right "context"
+ engineForWriter "ms" = Right "pdfroff"
+ engineForWriter "latex" = Right $ head latexEngines
+ engineForWriter format
+ | format `elem` ["html", "html5"] = Right $ head htmlEngines
+ | otherwise = Left $ "cannot produce pdf output with output format " ++ format
+
+
convertWithOpts :: Opt -> IO ()
convertWithOpts opts = do
let args = optInputFiles opts
@@ -171,18 +218,16 @@ convertWithOpts opts = do
else "markdown") sources
Just x -> map toLower x
- let writerName = case optWriter opts of
- Nothing -> defaultWriterName outputFile
- Just x -> map toLower x
- let format = takeWhile (`notElem` ['+','-'])
- $ takeFileName writerName -- in case path to lua script
+ let nonPdfWriterName Nothing = defaultWriterName outputFile
+ nonPdfWriterName (Just x) = map toLower x
let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+ (writerName, maybePdfProg) <- if pdfOutput
+ then pdfWriterAndProg (optWriter opts) (optPdfEngine opts)
+ else return (nonPdfWriterName $ optWriter opts, Nothing)
- let laTeXOutput = format `elem` ["latex", "beamer"]
- let conTeXtOutput = format == "context"
- let html5Output = format == "html5" || format == "html"
- let msOutput = format == "ms"
+ let format = takeWhile (`notElem` ['+','-'])
+ $ takeFileName writerName -- in case path to lua script
-- disabling the custom writer for now
(writer, writerExts) <-
@@ -417,7 +462,7 @@ convertWithOpts opts = do
, writerEpubChapterLevel = optEpubChapterLevel opts
, writerTOCDepth = optTOCDepth opts
, writerReferenceDoc = optReferenceDoc opts
- , writerLaTeXArgs = optLaTeXEngineArgs opts
+ , writerPdfArgs = optPdfEngineArgs opts
, writerSyntaxMap = syntaxMap
}
@@ -475,27 +520,14 @@ convertWithOpts opts = do
case writer of
ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile
- TextWriter f
- | pdfOutput -> do
- -- make sure writer is latex, beamer, context, html5 or ms
- unless (laTeXOutput || conTeXtOutput || html5Output ||
- msOutput) $
- liftIO $ E.throwIO $ PandocAppError $
- "cannot produce pdf output with " ++ format ++ " writer"
-
- let pdfprog = case () of
- _ | conTeXtOutput -> "context"
- | html5Output -> "wkhtmltopdf"
- | html5Output -> "wkhtmltopdf"
- | msOutput -> "pdfroff"
- | otherwise -> optLaTeXEngine opts
-
- res <- makePDF pdfprog f writerOptions verbosity media doc
+ TextWriter f -> case maybePdfProg of
+ Just pdfProg -> do
+ res <- makePDF pdfProg f writerOptions verbosity media doc
case res of
Right pdf -> writeFnBinary outputFile pdf
Left err' -> liftIO $
E.throwIO $ PandocPDFError (UTF8.toStringLazy err')
- | otherwise -> do
+ Nothing -> do
let htmlFormat = format `elem`
["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"]
handleEntities = if (htmlFormat ||
@@ -605,8 +637,8 @@ data Opt = Opt
, optDataDir :: Maybe FilePath
, optCiteMethod :: CiteMethod -- ^ Method to output cites
, optListings :: Bool -- ^ Use listings package for code blocks
- , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
- , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine
+ , optPdfEngine :: Maybe String -- ^ Program to use for latex/html -> pdf
+ , optPdfEngineArgs :: [String] -- ^ Flags to pass to the engine
, optSlideLevel :: Maybe Int -- ^ Header level that creates slides
, optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
, optAscii :: Bool -- ^ Use ascii characters only in html
@@ -681,8 +713,8 @@ defaultOpts = Opt
, optDataDir = Nothing
, optCiteMethod = Citeproc
, optListings = False
- , optLaTeXEngine = "pdflatex"
- , optLaTeXEngineArgs = []
+ , optPdfEngine = Nothing
+ , optPdfEngineArgs = []
, optSlideLevel = Nothing
, optSetextHeaders = True
, optAscii = False
@@ -778,7 +810,6 @@ defaultWriterName x =
".org" -> "org"
".asciidoc" -> "asciidoc"
".adoc" -> "asciidoc"
- ".pdf" -> "latex"
".fb2" -> "fb2"
".opml" -> "opml"
".icml" -> "icml"
@@ -1314,23 +1345,24 @@ options =
"NUMBER")
"" -- "Header level at which to split chapters in EPUB"
- , Option "" ["latex-engine"]
+ , Option "" ["pdf-engine"]
(ReqArg
(\arg opt -> do
let b = takeBaseName arg
- if b `elem` ["pdflatex", "lualatex", "xelatex"]
- then return opt { optLaTeXEngine = arg }
- else E.throwIO $ PandocOptionError "latex-engine must be pdflatex, lualatex, or xelatex.")
+ if b `elem` pdfEngines
+ then return opt { optPdfEngine = Just arg }
+ else E.throwIO $ PandocOptionError $ "pdf-engine must be one of "
+ ++ intercalate ", " pdfEngines)
"PROGRAM")
- "" -- "Name of latex program to use in generating PDF"
+ "" -- "Name of program to use in generating PDF"
- , Option "" ["latex-engine-opt"]
+ , Option "" ["pdf-engine-opt"]
(ReqArg
(\arg opt -> do
- let oldArgs = optLaTeXEngineArgs opt
- return opt { optLaTeXEngineArgs = arg : oldArgs })
+ let oldArgs = optPdfEngineArgs opt
+ return opt { optPdfEngineArgs = arg : oldArgs })
"STRING")
- "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used"
+ "" -- "Flags to pass to the PDF-engine, all instances of this option are accumulated and used"
, Option "" ["bibliography"]
(ReqArg
@@ -1590,6 +1622,10 @@ handleUnrecognizedOption "--old-dashes" =
("--old-dashes has been removed. Use +old_dashes extension instead." :)
handleUnrecognizedOption "--no-wrap" =
("--no-wrap has been removed. Use --wrap=none instead." :)
+handleUnrecognizedOption "--latex-engine" =
+ ("--latex-engine has been removed. Use --pdf-engine instead." :)
+handleUnrecognizedOption "--latex-engine-opt" =
+ ("--latex-engine-opt has been removed. Use --pdf-engine-opt instead." :)
handleUnrecognizedOption "--chapters" =
("--chapters has been removed. Use --top-level-division=chapter instead." :)
handleUnrecognizedOption "--reference-docx" =
diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs
index 24186720c..2bd8bef0a 100644
--- a/src/Text/Pandoc/Error.hs
+++ b/src/Text/Pandoc/Error.hs
@@ -96,7 +96,7 @@ handleError (Left e) =
PandocSyntaxMapError s -> err 67 s
PandocFailOnWarningError -> err 3 "Failing because there were warnings."
PandocPDFProgramNotFoundError pdfprog -> err 47 $
- pdfprog ++ " not found. " ++ pdfprog ++ " is needed for pdf output."
+ pdfprog ++ " not found. Please select a different --pdf-engine or install " ++ pdfprog
PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" ++ logmsg
PandocFilterError filtername msg -> err 83 $ "Error running filter " ++
filtername ++ ":\n" ++ msg
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 89b26deb0..0bd66d54d 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -216,7 +216,7 @@ data WriterOptions = WriterOptions
, writerEpubChapterLevel :: Int -- ^ Header level for chapters (separate files)
, writerTOCDepth :: Int -- ^ Number of levels to include in TOC
, writerReferenceDoc :: Maybe FilePath -- ^ Path to reference document if specified
- , writerLaTeXArgs :: [String] -- ^ Flags to pass to latex-engine
+ , writerPdfArgs :: [String] -- ^ Flags to pass to pdf-engine
, writerReferenceLocation :: ReferenceLocation -- ^ Location of footnotes and references for writing markdown
, writerSyntaxMap :: SyntaxMap
} deriving (Show, Data, Typeable, Generic)
@@ -252,7 +252,7 @@ instance Default WriterOptions where
, writerEpubChapterLevel = 1
, writerTOCDepth = 3
, writerReferenceDoc = Nothing
- , writerLaTeXArgs = []
+ , writerPdfArgs = []
, writerReferenceLocation = EndOfDocument
, writerSyntaxMap = defaultSyntaxMap
}
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
index 4e4c0b2c1..b2b7da54f 100644
--- a/src/Text/Pandoc/PDF.hs
+++ b/src/Text/Pandoc/PDF.hs
@@ -37,6 +37,7 @@ import qualified Control.Exception as E
import Control.Monad (unless, when)
import Control.Monad.Trans (MonadIO (..))
import qualified Data.Text as T
+import qualified Data.Text.IO as TextIO
import Data.Text (Text)
import qualified Data.ByteString as BS
import Data.ByteString.Lazy (ByteString)
@@ -78,8 +79,8 @@ changePathSeparators :: FilePath -> FilePath
changePathSeparators = intercalate "/" . splitDirectories
#endif
-makePDF :: String -- ^ pdf creator (pdflatex, lualatex,
- -- xelatex, context, wkhtmltopdf, pdfroff)
+makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex,
+ -- wkhtmltopdf, weasyprint, prince, context, pdfroff)
-> (WriterOptions -> Pandoc -> PandocIO Text) -- ^ writer
-> WriterOptions -- ^ options
-> Verbosity -- ^ verbosity level
@@ -94,7 +95,7 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do
_ -> []
meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
- let args = mathArgs ++
+ let args = writerPdfArgs opts ++ mathArgs ++
concatMap toArgs
[("page-size", getField "papersize" meta')
,("title", getField "title" meta')
@@ -108,11 +109,19 @@ makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = do
(getField "margin-left" meta'))
]
source <- writer opts doc
- liftIO $ html2pdf verbosity args source
+ liftIO $ html2pdf verbosity "wkhtmltopdf" args source
+makePDF "weasyprint" writer opts verbosity _ doc = do
+ let args = writerPdfArgs opts
+ source <- writer opts doc
+ liftIO $ html2pdf verbosity "weasyprint" args source
+makePDF "prince" writer opts verbosity _ doc = do
+ let args = writerPdfArgs opts
+ source <- writer opts doc
+ liftIO $ html2pdf verbosity "prince" args source
makePDF "pdfroff" writer opts verbosity _mediabag doc = do
source <- writer opts doc
let args = ["-ms", "-mpdfmark", "-e", "-t", "-k", "-KUTF-8", "-i",
- "--no-toc-relocation"]
+ "--no-toc-relocation"] ++ writerPdfArgs opts
liftIO $ ms2pdf verbosity args source
makePDF program writer opts verbosity mediabag doc = do
let withTemp = if takeBaseName program == "context"
@@ -124,7 +133,7 @@ makePDF program writer opts verbosity mediabag doc = do
source <- runIOorExplode $ do
setVerbosity verbosity
writer opts doc'
- let args = writerLaTeXArgs opts
+ let args = writerPdfArgs opts
case takeBaseName program of
"context" -> context2pdf verbosity tmpdir source
prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
@@ -212,7 +221,7 @@ tex2pdf' verbosity args tmpDir program source = do
case logmsg of
x | "! Package inputenc Error" `BC.isPrefixOf` x
&& program /= "xelatex"
- -> "\nTry running pandoc with --latex-engine=xelatex."
+ -> "\nTry running pandoc with --pdf-engine=xelatex."
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
@@ -347,32 +356,33 @@ ms2pdf verbosity args source = do
ExitSuccess -> Right out
html2pdf :: Verbosity -- ^ Verbosity level
- -> [String] -- ^ Args to wkhtmltopdf
+ -> String -- ^ Program (wkhtmltopdf, weasyprint or prince)
+ -> [String] -- ^ Args to program
-> Text -- ^ HTML5 source
-> IO (Either ByteString ByteString)
-html2pdf verbosity args source = do
- file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
+html2pdf verbosity program args source = do
pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
- BS.writeFile file $ UTF8.fromText source
- let programArgs = args ++ [file, pdfFile]
+ let pdfFileArgName = if program == "prince"
+ then ["-o"]
+ else []
+ let programArgs = args ++ ["-"] ++ pdfFileArgName ++ [pdfFile]
env' <- getEnvironment
when (verbosity >= INFO) $ do
putStrLn "[makePDF] Command line:"
- putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn "[makePDF] Environment:"
mapM_ print env'
putStr "\n"
- putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
- BL.readFile file >>= BL.putStr
+ putStrLn $ "[makePDF] Contents of intermediate HTML:"
+ TextIO.putStr source
putStr "\n"
(exit, out) <- E.catch
- (pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty)
+ (pipeProcess (Just env') program programArgs $ BL.fromStrict $ UTF8.fromText source)
(\(e :: IOError) -> if isDoesNotExistError e
then E.throwIO $
- PandocPDFProgramNotFoundError "wkhtml2pdf"
+ PandocPDFProgramNotFoundError program
else E.throwIO e)
- removeFile file
when (verbosity >= INFO) $ do
BL.hPutStr stdout out
putStr "\n"