From 0bbea0cc7650af61870b310ebbf6fb8a9fec09a7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 5 Feb 2017 21:58:45 +0100 Subject: Split pandoc.hs into a module, Text.Pandoc.App, and a small program. The App module provides a function that does a pandoc conversion, based on option settings. The program (pandoc.hs) now does nothing more than parse options and pass them to this function, which can easily be used by other applications (e.g. a GUI wrapper). The Opt structure has been further simplified. API changes: * New exposed module Text.Pandoc.App * Text.Pandoc.Highlighting has been exposed. * highlightingStyles has been moved to Text.Pandoc.Highlighting. --- src/Text/Pandoc/App.hs | 720 ++++++++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Highlighting.hs | 14 +- 2 files changed, 733 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/App.hs (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs new file mode 100644 index 000000000..e51a45395 --- /dev/null +++ b/src/Text/Pandoc/App.hs @@ -0,0 +1,720 @@ +{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-} +{- +Copyright (C) 2006-2016 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.App + Copyright : Copyright (C) 2006-2016 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Does a pandoc conversion based on command-line options. +-} +module Text.Pandoc.App ( + convertWithOpts + , Opt(..) + , defaultOpts + ) where +import Text.Pandoc +import Text.Pandoc.Builder (setMeta) +import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, + headerShift, err, openURL ) +import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) +import Text.Pandoc.XML ( toEntities ) +import Text.Pandoc.Highlighting (highlightingStyles) +import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Process (pipeProcess) +import Skylighting ( Style ) +import System.Environment ( getEnvironment ) +import System.Exit ( ExitCode (..), exitSuccess ) +import System.FilePath +import Data.Char ( toLower ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import System.Directory ( getAppUserDataDirectory, findExecutable, + doesFileExist, Permissions(..), getPermissions ) +import System.IO ( stdout, stderr ) +import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E +import Control.Exception.Extensible ( throwIO ) +import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad (when, unless, (>=>)) +import Data.Maybe (fromMaybe, isNothing, isJust) +import Data.Foldable (foldrM) +import Network.URI (parseURI, isURI, URI(..)) +import qualified Data.ByteString.Lazy as B +import Data.Aeson (eitherDecode', encode) +import Data.Yaml (decode) +import qualified Data.Yaml as Yaml +import qualified Data.Text as T +#ifndef _WINDOWS +import System.Posix.Terminal (queryTerminal) +import System.Posix.IO (stdOutput) +#endif +import Control.Monad.Trans +import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity) + +convertWithOpts :: Opt -> [FilePath] -> IO () +convertWithOpts opts args = do + let outputFile = optOutputFile opts + let filters = optFilters opts + let verbosity = optVerbosity opts + + when (optDumpArgs opts) $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (UTF8.hPutStrLn stdout) args + exitSuccess + + epubStylesheet <- case optEpubStylesheet opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + epubMetadata <- case optEpubMetadata opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" + let mathMethod = + case (optKaTeXJS opts, optKaTeXStylesheet opts) of + (Nothing, _) -> optHTMLMathMethod opts + (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) + + + -- --bibliography implies -F pandoc-citeproc for backwards compatibility: + let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && + optCiteMethod opts `notElem` [Natbib, Biblatex] && + "pandoc-citeproc" `notElem` map takeBaseName filters + let filters' = if needsCiteproc then "pandoc-citeproc" : filters + else filters + + let sources = case args of + [] -> ["-"] + xs | optIgnoreArgs opts -> ["-"] + | otherwise -> xs + + datadir <- case optDataDir opts of + Nothing -> E.catch + (Just <$> getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return $ optDataDir opts + + -- assign reader and writer based on options and filenames + let readerName = case optReader opts of + Nothing -> defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources + Just x -> map toLower x + + let writerName = case optWriter opts of + Nothing -> defaultWriterName outputFile + Just x -> map toLower x + let format = takeWhile (`notElem` ['+','-']) + $ takeFileName writerName -- in case path to lua script + + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + + let laTeXOutput = format `elem` ["latex", "beamer"] + let conTeXtOutput = format == "context" + let html5Output = format == "html5" || format == "html" + + -- disabling the custom writer for now + writer <- if ".lua" `isSuffixOf` format + -- note: use non-lowercased version writerName + then error "custom writers disabled for now" + else case getWriter writerName of + Left e -> err 9 $ + if format == "pdf" + then e ++ + "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return (w :: Writer PandocIO) + + -- TODO: we have to get the input and the output into the state for + -- the sake of the text2tags reader. + reader <- case getReader readerName of + Right r -> return (r :: Reader PandocIO) + Left e -> err 7 e' + where e' = case readerName of + "pdf" -> e ++ + "\nPandoc can convert to PDF, but not from PDF." + "doc" -> e ++ + "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." + _ -> e + + let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput + + templ <- case optTemplate opts of + _ | not standalone -> return Nothing + Nothing -> do + deftemp <- getDefaultTemplate datadir format + case deftemp of + Left e -> throwIO e + Right t -> return (Just t) + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + Just <$> E.catch (UTF8.readFile tp') + (\e -> if isDoesNotExistError e + then E.catch + (readDataFileUTF8 datadir + ("templates" tp')) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') + else throwIO e) + + let addStringAsVariable varname s vars = return $ (varname, s) : vars + + let addContentsAsVariable varname fp vars = do + s <- UTF8.readFile fp + return $ (varname, s) : vars + + -- note: this reverses the list constructed in option parsing, + -- which in turn was reversed from the command-line order, + -- so we end up with the correct order in the variable list: + let withList _ [] vars = return vars + withList f (x:xs) vars = f x vars >>= withList f xs + + variables <- return (optVariables opts) + >>= + withList (addContentsAsVariable "include-before") + (optIncludeBeforeBody opts) + >>= + withList (addContentsAsVariable "include-after") + (optIncludeAfterBody opts) + >>= + withList (addContentsAsVariable "header-includes") + (optIncludeInHeader opts) + >>= + withList (addStringAsVariable "css") (optCss opts) + >>= + maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) + >>= + maybe return (addStringAsVariable "epub-cover-image") + (optEpubCoverImage opts) + >>= + (\vars -> case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFileUTF8 datadir "LaTeXMathML.js" + return $ ("mathml-script", s) : vars + _ -> return vars) + >>= + (\vars -> if format == "dzslides" + then do + dztempl <- readDataFileUTF8 datadir + ("dzslides" "template.html") + let dzline = "