aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/PDF.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/PDF.hs')
-rw-r--r--src/Text/Pandoc/PDF.hs381
1 files changed, 381 insertions, 0 deletions
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
new file mode 100644
index 000000000..ff206daaa
--- /dev/null
+++ b/src/Text/Pandoc/PDF.hs
@@ -0,0 +1,381 @@
+{-# LANGUAGE OverloadedStrings, CPP, ScopedTypeVariables #-}
+{-
+Copyright (C) 2012-2016 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.PDF
+ Copyright : Copyright (C) 2012-2016 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX documents to PDF.
+-}
+module Text.Pandoc.PDF ( makePDF ) where
+
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy.Char8 as BC
+import qualified Data.ByteString as BS
+import Data.Monoid ((<>))
+import System.Exit (ExitCode (..))
+import System.FilePath
+import System.IO (stdout)
+import System.IO.Temp (withTempFile, withTempDirectory)
+import System.Directory
+import Data.Digest.Pure.SHA (showDigest, sha1)
+import System.Environment
+import Control.Monad (unless, when, (<=<))
+import qualified Control.Exception as E
+import Data.List (isInfixOf)
+import Data.Maybe (fromMaybe)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Definition
+import Text.Pandoc.MediaBag
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Shared (withTempDir, inDirectory, stringify)
+import Text.Pandoc.Writers.Shared (getField, metaToJSON)
+import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..))
+import Text.Pandoc.MIME (extensionFromMimeType, getMimeType)
+import Text.Pandoc.Process (pipeProcess)
+import Control.Monad.Trans (MonadIO(..))
+import qualified Data.ByteString.Lazy as BL
+import qualified Codec.Picture as JP
+#ifdef _WINDOWS
+import Data.List (intercalate)
+#endif
+import Text.Pandoc.Class (PandocIO, runIOorExplode, fetchItem, report,
+ setVerbosity, setMediaBag, runIO)
+import Text.Pandoc.Logging
+
+#ifdef _WINDOWS
+changePathSeparators :: FilePath -> FilePath
+changePathSeparators = intercalate "/" . splitDirectories
+#endif
+
+makePDF :: MonadIO m
+ => String -- ^ pdf creator (pdflatex, lualatex,
+ -- xelatex, context, wkhtmltopdf)
+ -> (WriterOptions -> Pandoc -> PandocIO String) -- ^ writer
+ -> WriterOptions -- ^ options
+ -> Verbosity -- ^ verbosity level
+ -> MediaBag -- ^ media
+ -> Pandoc -- ^ document
+ -> m (Either ByteString ByteString)
+makePDF "wkhtmltopdf" writer opts verbosity _ doc@(Pandoc meta _) = liftIO $ do
+ let mathArgs = case writerHTMLMathMethod opts of
+ -- with MathJax, wait til all math is rendered:
+ MathJax _ -> ["--run-script", "MathJax.Hub.Register.StartupHook('End Typeset', function() { window.status = 'mathjax_loaded' });",
+ "--window-status", "mathjax_loaded"]
+ _ -> []
+ meta' <- metaToJSON opts (return . stringify) (return . stringify) meta
+ let toArgs (f, mbd) = maybe [] (\d -> ['-':'-':f, d]) mbd
+ let args = mathArgs ++
+ concatMap toArgs
+ [("page-size", getField "papersize" meta')
+ ,("title", getField "title" meta')
+ ,("margin-bottom", fromMaybe (Just "1.2in")
+ (getField "margin-bottom" meta'))
+ ,("margin-top", fromMaybe (Just "1.25in")
+ (getField "margin-top" meta'))
+ ,("margin-right", fromMaybe (Just "1.25in")
+ (getField "margin-right" meta'))
+ ,("margin-left", fromMaybe (Just "1.25in")
+ (getField "margin-left" meta'))
+ ]
+ source <- runIOorExplode $ do
+ setVerbosity verbosity
+ writer opts doc
+ html2pdf verbosity args source
+makePDF program writer opts verbosity mediabag doc = do
+ let withTemp = if takeBaseName program == "context"
+ then withTempDirectory "."
+ else withTempDir
+ liftIO $ withTemp "tex2pdf." $ \tmpdir -> do
+ doc' <- handleImages verbosity opts mediabag tmpdir doc
+ source <- runIOorExplode $ do
+ setVerbosity verbosity
+ writer opts doc'
+ let args = writerLaTeXArgs opts
+ case takeBaseName program of
+ "context" -> context2pdf verbosity tmpdir source
+ prog | prog `elem` ["pdflatex", "lualatex", "xelatex"]
+ -> tex2pdf' verbosity args tmpdir program source
+ _ -> return $ Left $ UTF8.fromStringLazy $ "Unknown program " ++ program
+
+handleImages :: Verbosity
+ -> WriterOptions
+ -> MediaBag
+ -> FilePath -- ^ temp dir to store images
+ -> Pandoc -- ^ document
+ -> IO Pandoc
+handleImages verbosity opts mediabag tmpdir =
+ walkM (convertImages verbosity tmpdir) <=<
+ walkM (handleImage' verbosity opts mediabag tmpdir)
+
+handleImage' :: Verbosity
+ -> WriterOptions
+ -> MediaBag
+ -> FilePath
+ -> Inline
+ -> IO Inline
+handleImage' verbosity opts mediabag tmpdir (Image attr ils (src,tit)) = do
+ exists <- doesFileExist src
+ if exists
+ then return $ Image attr ils (src,tit)
+ else do
+ res <- runIO $ do
+ setVerbosity verbosity
+ setMediaBag mediabag
+ fetchItem (writerSourceURL opts) src
+ case res of
+ Right (contents, Just mime) -> do
+ let ext = fromMaybe (takeExtension src) $
+ extensionFromMimeType mime
+ let basename = showDigest $ sha1 $ BL.fromChunks [contents]
+ let fname = tmpdir </> basename <.> ext
+ BS.writeFile fname contents
+ return $ Image attr ils (fname,tit)
+ _ -> do
+ runIO $ do
+ setVerbosity verbosity
+ report $ CouldNotFetchResource src "skipping..."
+ -- return alt text
+ return $ Emph ils
+handleImage' _ _ _ _ x = return x
+
+convertImages :: Verbosity -> FilePath -> Inline -> IO Inline
+convertImages verbosity tmpdir (Image attr ils (src, tit)) = do
+ img <- convertImage tmpdir src
+ newPath <-
+ case img of
+ Left e -> do
+ runIO $ do
+ setVerbosity verbosity
+ report $ CouldNotConvertImage src e
+ return src
+ Right fp -> return fp
+ return (Image attr ils (newPath, tit))
+convertImages _ _ x = return x
+
+-- Convert formats which do not work well in pdf to png
+convertImage :: FilePath -> FilePath -> IO (Either String FilePath)
+convertImage tmpdir fname =
+ case mime of
+ Just "image/png" -> doNothing
+ Just "image/jpeg" -> doNothing
+ Just "application/pdf" -> doNothing
+ _ -> JP.readImage fname >>= \res ->
+ case res of
+ Left e -> return $ Left e
+ Right img ->
+ E.catch (Right fileOut <$ JP.savePngImage fileOut img) $
+ \(e :: E.SomeException) -> return (Left (show e))
+ where
+ fileOut = replaceDirectory (replaceExtension fname ".png") tmpdir
+ mime = getMimeType fname
+ doNothing = return (Right fname)
+
+tex2pdf' :: Verbosity -- ^ Verbosity level
+ -> [String] -- ^ Arguments to the latex-engine
+ -> FilePath -- ^ temp directory for output
+ -> String -- ^ tex program
+ -> String -- ^ tex source
+ -> IO (Either ByteString ByteString)
+tex2pdf' verbosity args tmpDir program source = do
+ let numruns = if "\\tableofcontents" `isInfixOf` source
+ then 3 -- to get page numbers
+ else 2 -- 1 run won't give you PDF bookmarks
+ (exit, log', mbPdf) <- runTeXProgram verbosity program args 1 numruns tmpDir source
+ case (exit, mbPdf) of
+ (ExitFailure _, _) -> do
+ let logmsg = extractMsg log'
+ let extramsg =
+ case logmsg of
+ x | "! Package inputenc Error" `BC.isPrefixOf` x
+ && program /= "xelatex"
+ -> "\nTry running pandoc with --latex-engine=xelatex."
+ _ -> ""
+ return $ Left $ logmsg <> extramsg
+ (ExitSuccess, Nothing) -> return $ Left ""
+ (ExitSuccess, Just pdf) -> return $ Right pdf
+
+-- parsing output
+
+extractMsg :: ByteString -> ByteString
+extractMsg log' = do
+ let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
+ let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
+ let lineno = take 1 rest
+ if null msg'
+ then log'
+ else BC.unlines (msg'' ++ lineno)
+
+extractConTeXtMsg :: ByteString -> ByteString
+extractConTeXtMsg log' = do
+ let msg' = take 1 $
+ dropWhile (not . ("tex error" `BC.isPrefixOf`)) $ BC.lines log'
+ if null msg'
+ then log'
+ else BC.unlines msg'
+
+-- running tex programs
+
+-- Run a TeX program on an input bytestring and return (exit code,
+-- contents of stdout, contents of produced PDF if any). Rerun
+-- a fixed number of times to resolve references.
+runTeXProgram :: Verbosity -> String -> [String] -> Int -> Int -> FilePath
+ -> String -> IO (ExitCode, ByteString, Maybe ByteString)
+runTeXProgram verbosity program args runNumber numRuns tmpDir source = do
+ let file = tmpDir </> "input.tex"
+ exists <- doesFileExist file
+ unless exists $ UTF8.writeFile file source
+#ifdef _WINDOWS
+ -- note: we want / even on Windows, for TexLive
+ let tmpDir' = changePathSeparators tmpDir
+ let file' = changePathSeparators file
+#else
+ let tmpDir' = tmpDir
+ let file' = file
+#endif
+ let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
+ "-output-directory", tmpDir'] ++ args ++ [file']
+ env' <- getEnvironment
+ let sep = [searchPathSeparator]
+ let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++)
+ $ lookup "TEXINPUTS" env'
+ let env'' = ("TEXINPUTS", texinputs) :
+ [(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
+ when (verbosity >= INFO && runNumber == 1) $ do
+ putStrLn "[makePDF] temp dir:"
+ putStrLn tmpDir'
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ program ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env''
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
+ B.readFile file' >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env'') program programArgs BL.empty
+ when (verbosity >= INFO) $ do
+ putStrLn $ "[makePDF] Run #" ++ show runNumber
+ B.hPutStr stdout out
+ putStr "\n"
+ if runNumber <= numRuns
+ then runTeXProgram verbosity program args (runNumber + 1) numRuns tmpDir source
+ else do
+ let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
+ pdfExists <- doesFileExist pdfFile
+ pdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ else return Nothing
+ return (exit, out, pdf)
+
+html2pdf :: Verbosity -- ^ Verbosity level
+ -> [String] -- ^ Args to wkhtmltopdf
+ -> String -- ^ HTML5 source
+ -> IO (Either ByteString ByteString)
+html2pdf verbosity args source = do
+ file <- withTempFile "." "html2pdf.html" $ \fp _ -> return fp
+ pdfFile <- withTempFile "." "html2pdf.pdf" $ \fp _ -> return fp
+ UTF8.writeFile file source
+ let programArgs = args ++ [file, pdfFile]
+ env' <- getEnvironment
+ when (verbosity >= INFO) $ do
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "wkhtmltopdf" ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env'
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
+ B.readFile file >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env') "wkhtmltopdf" programArgs BL.empty
+ removeFile file
+ when (verbosity >= INFO) $ do
+ B.hPutStr stdout out
+ putStr "\n"
+ pdfExists <- doesFileExist pdfFile
+ mbPdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then do
+ res <- (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ removeFile pdfFile
+ return res
+ else return Nothing
+ return $ case (exit, mbPdf) of
+ (ExitFailure _, _) -> Left out
+ (ExitSuccess, Nothing) -> Left ""
+ (ExitSuccess, Just pdf) -> Right pdf
+
+context2pdf :: Verbosity -- ^ Verbosity level
+ -> FilePath -- ^ temp directory for output
+ -> String -- ^ ConTeXt source
+ -> IO (Either ByteString ByteString)
+context2pdf verbosity tmpDir source = inDirectory tmpDir $ do
+ let file = "input.tex"
+ UTF8.writeFile file source
+#ifdef _WINDOWS
+ -- note: we want / even on Windows, for TexLive
+ let tmpDir' = changePathSeparators tmpDir
+#else
+ let tmpDir' = tmpDir
+#endif
+ let programArgs = "--batchmode" : [file]
+ env' <- getEnvironment
+ when (verbosity >= INFO) $ do
+ putStrLn "[makePDF] temp dir:"
+ putStrLn tmpDir'
+ putStrLn "[makePDF] Command line:"
+ putStrLn $ "context" ++ " " ++ unwords (map show programArgs)
+ putStr "\n"
+ putStrLn "[makePDF] Environment:"
+ mapM_ print env'
+ putStr "\n"
+ putStrLn $ "[makePDF] Contents of " ++ file ++ ":"
+ B.readFile file >>= B.putStr
+ putStr "\n"
+ (exit, out) <- pipeProcess (Just env') "context" programArgs BL.empty
+ when (verbosity >= INFO) $ do
+ B.hPutStr stdout out
+ putStr "\n"
+ let pdfFile = replaceExtension file ".pdf"
+ pdfExists <- doesFileExist pdfFile
+ mbPdf <- if pdfExists
+ -- We read PDF as a strict bytestring to make sure that the
+ -- temp directory is removed on Windows.
+ -- See https://github.com/jgm/pandoc/issues/1192.
+ then (Just . B.fromChunks . (:[])) `fmap` BS.readFile pdfFile
+ else return Nothing
+ case (exit, mbPdf) of
+ (ExitFailure _, _) -> do
+ let logmsg = extractConTeXtMsg out
+ return $ Left logmsg
+ (ExitSuccess, Nothing) -> return $ Left ""
+ (ExitSuccess, Just pdf) -> return $ Right pdf
+