{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TupleSections #-} {- Copyright (C) 2006-2018 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.App Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Does a pandoc conversion based on command-line options. -} module Text.Pandoc.App ( convertWithOpts , Opt(..) , LineEnding(..) , Filter(..) , defaultOpts , parseOptions , options , applyFilters ) where import qualified Control.Exception as E import Control.Monad import Control.Monad.Except (catchError, throwError) import Control.Monad.Trans import Data.Aeson (defaultOptions, eitherDecode', encode) import Data.Aeson.TH (deriveJSON) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.Char (toLower, toUpper) import Data.Foldable (foldrM) import Data.List (find, intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TE import qualified Data.Text.Encoding.Error as TE import Data.Yaml (decode) import qualified Data.Yaml as Yaml import GHC.Generics import Network.URI (URI (..), parseURI) import Paths_pandoc (getDataDir) import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, defConfig, Indent(..), NumberFormat(..)) import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme, pygments) import Skylighting.Parser (addSyntaxDefinition, missingIncludes, parseSyntaxDefinition) import System.Console.GetOpt import System.Directory (Permissions (..), doesFileExist, findExecutable, getAppUserDataDirectory, getPermissions) import System.Environment (getArgs, getEnvironment, getProgName) import System.Exit (ExitCode (..), exitSuccess) import System.FilePath import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Builder (setMeta, deleteMeta) import Text.Pandoc.Highlighting (highlightingStyles) import Text.Pandoc.Lua (LuaException (..), runLuaFilter) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, ordNub, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Writers.Math (defaultKaTeXURL, defaultMathJaxURL) import Text.Pandoc.XML (toEntities) import Text.Printf #ifndef _WINDOWS import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif data LineEnding = LF | CRLF | Native deriving (Show, Generic) parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt parseOptions options' defaults = do rawArgs <- map UTF8.decodeArg <$> getArgs prg <- getProgName let (actions, args, unrecognizedOpts, errors) = getOpt' Permute options' rawArgs let unknownOptionErrors = foldr (handleUnrecognizedOption . takeWhile (/= '=')) [] unrecognizedOpts unless (null errors && null unknownOptionErrors) $ E.throwIO $ PandocOptionError $ concat errors ++ unlines unknownOptionErrors ++ ("Try " ++ prg ++ " --help for more information.") -- thread option data structure through all supplied option actions opts <- foldl (>>=) (return defaults) actions return (opts{ optInputFiles = args }) latexEngines :: [String] latexEngines = ["pdflatex", "lualatex", "xelatex"] htmlEngines :: [String] htmlEngines = ["wkhtmltopdf", "weasyprint", "prince"] engines :: [(String, String)] engines = map ("html",) htmlEngines ++ map ("html5",) htmlEngines ++ map ("latex",) latexEngines ++ map ("beamer",) latexEngines ++ [ ("ms", "pdfroff") , ("context", "context") ] pdfEngines :: [String] pdfEngines = ordNub $ map snd engines pdfWriterAndProg :: Maybe String -- ^ user-specified writer name -> Maybe String -- ^ user-specified pdf-engine -> IO (String, Maybe String) -- ^ IO (writerName, maybePdfEngineProg) pdfWriterAndProg mWriter mEngine = do let panErr msg = liftIO $ E.throwIO $ PandocAppError msg case go mWriter mEngine of Right (writ, prog) -> return (writ, Just prog) Left err -> panErr err where go Nothing Nothing = Right ("latex", "pdflatex") go (Just writer) Nothing = (writer,) <$> engineForWriter writer go Nothing (Just engine) = (,engine) <$> writerForEngine engine go (Just writer) (Just engine) = case find (== (baseWriterName writer, engine)) engines of Just _ -> Right (writer, engine) Nothing -> Left $ "pdf-engine " ++ engine ++ " is not compatible with output format " ++ writer writerForEngine eng = case [f | (f,e) <- engines, e == eng] of fmt : _ -> Right fmt [] -> Left $ "pdf-engine " ++ eng ++ " not known" engineForWriter w = case [e | (f,e) <- engines, f == baseWriterName w] of eng : _ -> Right eng [] -> Left $ "cannot produce pdf output from " ++ w convertWithOpts :: Opt -> IO () convertWithOpts opts = do let outputFile = fromMaybe "-" (optOutputFile opts) let filters = optFilters opts let verbosity = optVerbosity opts when (optDumpArgs opts) $ do UTF8.hPutStrLn stdout outputFile mapM_ (UTF8.hPutStrLn stdout) (optInputFiles opts) exitSuccess epubMetadata <- case optEpubMetadata opts of Nothing -> return Nothing Just fp -> Just <$> UTF8.readFile fp let isPandocCiteproc (JSONFilter f) = takeBaseName f == "pandoc-citeproc" isPandocCiteproc _ = False -- --bibliography implies -F pandoc-citeproc for backwards compatibility: let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && optCiteMethod opts `notElem` [Natbib, Biblatex] && all (not . isPandocCiteproc) filters let filters' = if needsCiteproc then JSONFilter "pandoc-citeproc" : filters else filters let sources = case optInputFiles opts of [] -> ["-"] xs | optIgnoreArgs opts -> ["-"] | otherwise -> xs datadir <- case optDataDir opts of Nothing -> E.catch (Just <$> getAppUserDataDirectory "pandoc") (\e -> let _ = (e :: E.SomeException) in return Nothing) Just _ -> return $ optDataDir opts -- assign reader and writer based on options and filenames let readerName = fromMaybe ( defaultReaderName (if any isURI sources then "html" else "markdown") sources) (optReader opts) let nonPdfWriterName Nothing = defaultWriterName outputFile nonPdfWriterName (Just x) = x let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" (writerName, maybePdfProg) <- if pdfOutput then pdfWriterAndProg (optWriter opts) (optPdfEngine opts) else return (nonPdfWriterName $ optWriter opts, Nothing) let format = baseWriterName $ takeFileName writerName -- in case path to lua script -- disabling the custom writer for now (writer, writerExts) <- if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName then return (TextWriter (\o d -> writeCustom writerName o d) :: Writer PandocIO, mempty) else case getWriter writerName of Left e -> E.throwIO $ PandocAppError $ if format == "pdf" then e ++ "\nTo create a pdf using pandoc, use " ++ "-t latex|beamer|context|ms|html5" ++ "\nand specify an output file with " ++ ".pdf extension (-o filename.pdf)." else e Right (w, es) -> return (w :: Writer PandocIO, es) -- TODO: we have to get the input and the output into the state for -- the sake of the text2tags reader. (reader, readerExts) <- case getReader readerName of Right (r, es) -> return (r :: Reader PandocIO, es) Left e -> E.throwIO $ PandocAppError e' where e' = case readerName of "pdf" -> e ++ "\nPandoc can convert to PDF, but not from PDF." "doc" -> e ++ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." _ -> e let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput let addStringAsVariable varname s vars = return $ (varname, s) : vars highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts let addSyntaxMap existingmap f = do res <- parseSyntaxDefinition f case res of Left errstr -> E.throwIO $ PandocSyntaxMapError errstr Right syn -> return $ addSyntaxDefinition syn existingmap syntaxMap <- foldM addSyntaxMap defaultSyntaxMap (optSyntaxDefinitions opts) unless (null (optSyntaxDefinitions opts)) $ case missingIncludes (M.elems syntaxMap) of [] -> return () xs -> E.throwIO $ PandocSyntaxMapError $ "Missing syntax definitions:\n" ++ unlines (map (\(syn,dep) -> (T.unpack syn ++ " requires " ++ T.unpack dep ++ " through IncludeRules.")) xs) -- We don't want to send output to the terminal if the user -- does 'pandoc -t docx input.txt'; though we allow them to -- force this with '-o -'. On posix systems, we detect -- when stdout is being piped and allow output to stdout -- in that case, but on Windows we can't. #ifdef _WINDOWS let istty = True #else istty <- queryTerminal stdOutput #endif when (not (isTextFormat format) && istty && isNothing ( optOutputFile opts)) $ E.throwIO $ PandocAppError $ "Cannot write " ++ format ++ " output to terminal.\n" ++ "Specify an output file using the -o option, or " ++ "use '-o -' to force output to stdout." let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" then 0 else optTabStop opts) readSources :: [FilePath] -> PandocIO Text readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> mapM readSource srcs let runIO' :: PandocIO a -> IO a runIO' f = do (res, reports) <- runIOorExplode $ do setTrace (optTrace opts) setVerbosity verbosity x <- f rs <- getLog return (x, rs) case optLogFile opts of Nothing -> return () Just logfile -> B.writeFile logfile (encodeLogMessages reports) let isWarning msg = messageVerbosity msg == WARNING when (optFailIfWarnings opts && any isWarning reports) $ E.throwIO PandocFailOnWarningError return res let eol = case optEol opts of CRLF -> IO.CRLF LF -> IO.LF Native -> nativeNewline -- note: this reverses the list constructed in option parsing, -- which in turn was reversed from the command-line order, -- so we end up with the correct order in the variable list: let withList _ [] vars = return vars withList f (x:xs) vars = f x vars >>= withList f xs let addContentsAsVariable varname fp vars = do s <- UTF8.toString <$> readFileStrict fp return $ (varname, s) : vars runIO' $ do setUserDataDir datadir setInputFiles (optInputFiles opts) setOutputFile (optOutputFile opts) variables <- withList (addStringAsVariable "sourcefile") (reverse $ optInputFiles opts) (("outputfile", fromMaybe "-" (optOutputFile opts)) : optVariables opts) -- we reverse this list because, unlike -- the other option lists here, it is -- not reversed when parsed from CLI arguments. -- See withList, above. >>= withList (addContentsAsVariable "include-before") (optIncludeBeforeBody opts) >>= withList (addContentsAsVariable "include-after") (optIncludeAfterBody opts) >>= withList (addContentsAsVariable "header-includes") (optIncludeInHeader opts) >>= withList (addStringAsVariable "css") (optCss opts) >>= maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) >>= maybe return (addStringAsVariable "epub-cover-image") (optEpubCoverImage opts) >>= (\vars -> case optHTMLMathMethod opts of LaTeXMathML Nothing -> do s <- UTF8.toString <$> readDataFile "LaTeXMathML.js" return $ ("mathml-script", s) : vars _ -> return vars) >>= (\vars -> if format == "dzslides" then do dztempl <- UTF8.toString <$> readDataFile ("dzslides" "template.html") let dzline = "