{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- Copyright (C) 2006-2017 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.App Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Does a pandoc conversion based on command-line options. -} module Text.Pandoc.App ( convertWithOpts , Opt(..) , defaultOpts , parseOptions , options ) where import Control.Applicative ((<|>)) import qualified Control.Exception as E import Control.Monad import Control.Monad.Except (throwError, catchError) import Control.Monad.Trans import Data.Monoid import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions, eitherDecode', encode, genericToEncoding) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.Char (toLower, toUpper) import Data.Foldable (foldrM) import Data.List (intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Data.Yaml (decode) import qualified Data.Yaml as Yaml import GHC.Generics import Network.URI (URI (..), parseURI) import Paths_pandoc (getDataDir) import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme) import Skylighting.Parser (addSyntaxDefinition, missingIncludes, parseSyntaxDefinition) import System.Console.GetOpt import System.Directory (Permissions (..), doesFileExist, findExecutable, getAppUserDataDirectory, getPermissions) import System.Environment (getArgs, getEnvironment, getProgName) import System.Exit (ExitCode (..), exitSuccess) import System.FilePath import System.IO (nativeNewline, stdout) import System.IO.Error (isDoesNotExistError) import qualified System.IO as IO (Newline (..)) import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, setResourcePath, getMediaBag, setTrace, report, setUserDataDir, readFileStrict, readDataFile, readDefaultDataFile, setTranslations) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.BCP47 (parseBCP47, Lang(..)) import Text.Pandoc.Lua (runLuaFilter, LuaException(..)) import Text.Pandoc.Writers.Math (defaultMathJaxURL, defaultKaTeXURL) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (headerShift, isURI, openURL, safeRead, tabFilter, eastAsianLineBreakFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (toEntities) import Text.Printf #ifndef _WINDOWS import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif data LineEnding = LF | CRLF | Native deriving (Show, Generic) instance ToJSON LineEnding where toEncoding = genericToEncoding defaultOptions instance FromJSON LineEnding parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt parseOptions options' defaults = do rawArgs <- map UTF8.decodeArg <$> getArgs prg <- getProgName let (actions, args, unrecognizedOpts, errors) = getOpt' Permute options' rawArgs let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts unless (null errors && null unknownOptionErrors) $ E.throwIO $ PandocOptionError $ concat errors ++ unlines unknownOptionErrors ++ ("Try " ++ prg ++ " --help for more information.") -- thread option data structure through all supplied option actions opts <- foldl (>>=) (return defaults) actions return (opts{ optInputFiles = args }) latexEngines :: [String] latexEngines = ["pdflatex", "lualatex", "xelatex"] defaultLatexEngine :: String defaultLatexEngine = "pdflatex" htmlEngines :: [String] htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] defaultHtmlEngine :: String defaultHtmlEngine = "wkhtmltopdf" pdfEngines :: [String] pdfEngines = latexEngines ++ htmlEngines ++ ["context", "pdfroff"] pdfWriterAndProg :: Maybe String -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) pdfWriterAndProg mWriter mEngine = do let panErr msg = liftIO $ E.throwIO $ PandocAppError msg case go mWriter mEngine of (Right writ, Right prog) -> return (writ, Just prog) (Left err, _) -> panErr err (_, Left err) -> panErr err where go Nothing Nothing = (Right "latex", Right defaultLatexEngine) go (Just writer) Nothing = (Right writer, engineForWriter writer) go Nothing (Just engine) = (writerForEngine engine, Right engine) go (Just writer) (Just engine) = let (Right shouldFormat) = writerForEngine engine userFormat = case map toLower writer of "html5" -> "html" x -> x in if userFormat == shouldFormat then (Right writer, Right engine) else (Left $ "pdf-engine " ++ engine ++ " is not compatible with output format " ++ writer ++ ", please use `-t " ++ shouldFormat ++ "`", Left "") writerForEngine "context" = Right "context" writerForEngine "pdfroff" = Right "ms" writerForEngine en | takeBaseName en `elem` latexEngines = Right "latex" | takeBaseName en `elem` htmlEngines = Right "html" writerForEngine _ = Left "pdf-engine not known" engineForWriter "context" = Right "context" engineForWriter "ms" = Right "pdfroff" engineForWriter "latex" = Right defaultLatexEngine engineForWriter "beamer" = Right defaultLatexEngine engineForWriter format | format `elem` ["html", "html5"] = Right defaultHtmlEngine | otherwise = Left $ "cannot produce pdf output with output format " ++ format convertWithOpts :: Opt -> IO () convertWithOpts opts = do let args = optInputFiles opts let outputFile = fromMaybe "-" (optOutputFile opts) let filters = optFilters opts let verbosity = optVerbosity opts when (optDumpArgs opts) $ do UTF8.hPutStrLn stdout outputFile mapM_ (UTF8.hPutStrLn stdout) args exitSuccess epubMetadata <- case optEpubMetadata opts of Nothing -> return Nothing Just fp -> Just <$> UTF8.readFile fp let mathMethod = case (optKaTeXJS opts, optKaTeXStylesheet opts) of (Nothing, _) -> optHTMLMathMethod opts (Just js, ss) -> KaTeX js (fromMaybe (defaultKaTeXURL ++ "katex.min.css") ss) -- --bibliography implies -F pandoc-citeproc for backwards compatibility: let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && optCiteMethod opts `notElem` [Natbib, Biblatex] && "pandoc-citeproc" `notElem` map takeBaseName filters let filters' = if needsCiteproc then "pandoc-citeproc" : filters else filters let sources = case args of [] -> ["-"] xs | optIgnoreArgs opts -> ["-"] | otherwise -> xs datadir <- case optDataDir opts of Nothing -> E.catch (Just <$> getAppUserDataDirectory "pandoc") (\e -> let _ = (e :: E.SomeException) in return Nothing) Just _ -> return $ optDataDir opts -- assign reader and writer based on options and filenames let readerName = case optReader opts of Nothing -> defaultReaderName (if any isURI sources then "html" else "markdown") sources Just x -> map toLower x let nonPdfWriterName Nothing = defaultWriterName outputFile nonPdfWriterName (Just x) = map toLower x let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" (writerName, maybePdfProg) <- if pdfOutput then pdfWriterAndProg (optWriter opts) (optPdfEngine opts) else return (nonPdfWriterName $ optWriter opts, Nothing) let format = takeWhile (`notElem` ['+','-']) $ takeFileName writerName -- in case path to lua script -- disabling the custom writer for now (writer, writerExts) <- if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName then return (TextWriter (\o d -> liftIO $ writeCustom writerName o d) :: Writer PandocIO, mempty) else case getWriter writerName of Left e -> E.throwIO $ PandocAppError $ if format == "pdf" then e ++ "\nTo create a pdf using pandoc, use " ++ "-t latex|beamer|context|ms|html5" ++ "\nand specify an output file with " ++ ".pdf extension (-o filename.pdf)." else e Right (w, es) -> return (w :: Writer PandocIO, es) -- TODO: we have to get the input and the output into the state for -- the sake of the text2tags reader. (reader, readerExts) <- case getReader readerName of Right (r, es) -> return (r :: Reader PandocIO, es) Left e -> E.throwIO $ PandocAppError e' where e' = case readerName of "pdf" -> e ++ "\nPandoc can convert to PDF, but not from PDF." "doc" -> e ++ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." _ -> e let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput let sourceURL = case sources of [] -> Nothing (x:_) -> case parseURI x of Just u | uriScheme u `elem` ["http:","https:"] -> Just $ show u{ uriQuery = "", uriFragment = "" } _ -> Nothing let addStringAsVariable varname s vars = return $ (varname, s) : vars highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts let addSyntaxMap existingmap f = do res <- parseSyntaxDefinition f case res of Left errstr -> E.throwIO $ PandocSyntaxMapError errstr Right syn -> return $ addSyntaxDefinition syn existingmap syntaxMap <- foldM addSyntaxMap defaultSyntaxMap (optSyntaxDefinitions opts) case missingIncludes (M.elems syntaxMap) of [] -> return () xs -> E.throwIO $ PandocSyntaxMapError $ "Missing syntax definitions:\n" ++ unlines (map (\(syn,dep) -> (T.unpack syn ++ " requires " ++ T.unpack dep ++ " through IncludeRules.")) xs) -- We don't want to send output to the terminal if the user -- does 'pandoc -t docx input.txt'; though we allow them to -- force this with '-o -'. On posix systems, we detect -- when stdout is being piped and allow output to stdout -- in that case, but on Windows we can't. #ifdef _WINDOWS let istty = True #else istty <- queryTerminal stdOutput #endif when (not (isTextFormat format) && istty && optOutputFile opts == Nothing) $ E.throwIO $ PandocAppError $ "Cannot write " ++ format ++ " output to terminal.\n" ++ "Specify an output file using the -o option, or " ++ "use '-o -' to force output to stdout." let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" then 0 else optTabStop opts) readSources :: [FilePath] -> PandocIO Text readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> mapM readSource srcs let runIO' :: PandocIO a -> IO a runIO' f = do (res, reports) <- runIOorExplode $ do setTrace (optTrace opts) setVerbosity verbosity x <- f rs <- getLog return (x, rs) case optLogFile opts of Nothing -> return () Just logfile -> B.writeFile logfile (encodeLogMessages reports) let isWarning msg = messageVerbosity msg == WARNING when (optFailIfWarnings opts && any isWarning reports) $ E.throwIO PandocFailOnWarningError return res let eol = case optEol opts of CRLF -> IO.CRLF LF -> IO.LF Native -> nativeNewline -- note: this reverses the list constructed in option parsing, -- which in turn was reversed from the command-line order, -- so we end up with the correct order in the variable list: let withList _ [] vars = return vars withList f (x:xs) vars = f x vars >>= withList f xs let addContentsAsVariable varname fp vars = do s <- UTF8.toString <$> readFileStrict fp return $ (varname, s) : vars runIO' $ do setUserDataDir datadir variables <- withList (addStringAsVariable "sourcefile") (reverse $ optInputFiles opts) (("outputfile", fromMaybe "-" (optOutputFile opts)) : optVariables opts) -- we reverse this list because, unlike -- the other option lists here, it is -- not reversed when parsed from CLI arguments. -- See withList, above. >>= withList (addContentsAsVariable "include-before") (optIncludeBeforeBody opts) >>= withList (addContentsAsVariable "include-after") (optIncludeAfterBody opts) >>= withList (addContentsAsVariable "header-includes") (optIncludeInHeader opts) >>= withList (addStringAsVariable "css") (optCss opts) >>= maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) >>= maybe return (addStringAsVariable "epub-cover-image") (optEpubCoverImage opts) >>= (\vars -> case mathMethod of LaTeXMathML Nothing -> do s <- UTF8.toString <$> readDataFile "LaTeXMathML.js" return $ ("mathml-script", s) : vars _ -> return vars) >>= (\vars -> if format == "dzslides" then do dztempl <- UTF8.toString <$> readDataFile ("dzslides" "template.html") let dzline = "