{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables #-} {- Copyright (C) 2006-2016 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Main Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Parses command-line options and calls the appropriate readers and writers. -} module Main where import Text.Pandoc import Text.Pandoc.Builder (setMeta) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Walk (walk) import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile, safeRead, headerShift, normalize, err, warn, openURL ) import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) import Text.Pandoc.XML ( toEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) import Text.Pandoc.Process (pipeProcess) import Text.Highlighting.Kate ( languages, Style, tango, pygments, espresso, zenburn, kate, haddock, monochrome ) import System.Environment ( getArgs, getProgName ) import System.Exit ( ExitCode (..), exitSuccess ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower, toUpper ) import Data.List ( delete, intercalate, isPrefixOf, isSuffixOf, sort ) import System.Directory ( getAppUserDataDirectory, findExecutable, doesFileExist, Permissions(..), getPermissions ) import System.Process ( shell, CreateProcess(..), createProcess_, waitForProcess, StdStream(CreatePipe) ) import System.IO ( stdout, stderr, hClose ) import System.IO.Error ( isDoesNotExistError ) import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad (when, unless, (>=>)) import Data.Maybe (fromMaybe, isNothing, isJust) import Data.Foldable (foldrM) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString as BS import Data.Aeson (eitherDecode', encode) import qualified Data.Map as M import Data.Yaml (decode) import qualified Data.Yaml as Yaml import qualified Data.Text as T import Control.Applicative ((<|>)) import Text.Pandoc.Readers.Txt2Tags (getT2TMeta) import Paths_pandoc (getDataDir) import Text.Printf (printf) import Text.Pandoc.Error type Transform = Pandoc -> Pandoc copyrightMessage :: String copyrightMessage = intercalate "\n" [ "", "Copyright (C) 2006-2016 John MacFarlane", "Web: http://pandoc.org", "This is free software; see the source for copying conditions.", "There is no warranty, not even for merchantability or fitness", "for a particular purpose." ] compileInfo :: String compileInfo = "\nCompiled with texmath " ++ VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++ ".\nSyntax highlighting is supported for the following languages:\n " ++ wrapWords 4 78 [map toLower l | l <- languages, l /= "Alert" && l /= "Alert_indent"] -- | Converts a list of strings into a single string with the items printed as -- comma separated words in lines with a maximum line length. wrapWords :: Int -> Int -> [String] -> String wrapWords indent c = wrap' (c - indent) (c - indent) where wrap' _ _ [] = "" wrap' cols remaining (x:xs) | remaining == cols = x ++ wrap' cols (remaining - length x) xs | (length x + 1) > remaining = ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs | otherwise = ", " ++ x ++ wrap' cols (remaining - length x - 2) xs isTextFormat :: String -> Bool isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] externalFilter :: FilePath -> [String] -> Pandoc -> IO Pandoc externalFilter f args' d = do mbexe <- if '/' `elem` f -- don't check PATH if filter name has a path then return Nothing -- we catch isDoesNotExistError because this will -- be triggered if PATH not set: else E.catch (findExecutable f) (\e -> if isDoesNotExistError e then return Nothing else throwIO e) (f', args'') <- case mbexe of Just x -> return (x, args') Nothing -> do exists <- doesFileExist f if exists then do isExecutable <- executable `fmap` getPermissions f return $ case map toLower $ takeExtension f of _ | isExecutable -> (f, args') ".py" -> ("python", f:args') ".hs" -> ("runhaskell", f:args') ".pl" -> ("perl", f:args') ".rb" -> ("ruby", f:args') ".php" -> ("php", f:args') _ -> (f, args') else err 85 $ "Filter " ++ f ++ " not found" when (f' /= f) $ do mbExe <- findExecutable f' when (isNothing mbExe) $ err 83 $ "Error running filter " ++ f ++ "\n" ++ show f' ++ " not found in path." (exitcode, outbs, errbs) <- E.handle filterException $ pipeProcess Nothing f' args'' $ encode d unless (B.null errbs) $ B.hPutStr stderr errbs case exitcode of ExitSuccess -> return $ either error id $ eitherDecode' outbs ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ "Filter returned error status " ++ show ec where filterException :: E.SomeException -> IO a filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ show e -- | Data structure for command line options. data Opt = Opt { optTabStop :: Int -- ^ Number of spaces per tab , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces , optStandalone :: Bool -- ^ Include header, footer , optReader :: String -- ^ Reader format , optWriter :: String -- ^ Writer format , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX , optTableOfContents :: Bool -- ^ Include table of contents , optTransforms :: [Transform] -- ^ Doc transforms to apply , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set , optMetadata :: M.Map String MetaValue -- ^ Metadata fields to set , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX , optNumberOffset :: [Int] -- ^ Starting number for sections , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 , optSelfContained :: Bool -- ^ Make HTML accessible offline , optSmart :: Bool -- ^ Use smart typography , optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1 , optHtml5 :: Bool -- ^ Produce HTML5 in HTML , optHtmlQTags :: Bool -- ^ Use tags in HTML , optHighlight :: Bool -- ^ Highlight source code , optHighlightStyle :: Style -- ^ Style to use for highlighted code , optChapters :: Bool -- ^ Use chapter for top-level sects , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt , optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx , optEpubStylesheet :: Maybe String -- ^ EPUB stylesheet , optEpubMetadata :: String -- ^ EPUB metadata , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters , optTOCDepth :: Int -- ^ Number of levels to include in TOC , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optVerbose :: Bool -- ^ Verbose diagnostic output , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optDpi :: Int -- ^ Dpi , optWrapText :: WrapOption -- ^ Options for wrapping text , optColumns :: Int -- ^ Line length in characters , optFilters :: [FilePath] -- ^ Filters to apply , optEmailObfuscation :: ObfuscationMethod , optIdentifierPrefix :: String , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks , optDataDir :: Maybe FilePath , optCiteMethod :: CiteMethod -- ^ Method to output cites , optListings :: Bool -- ^ Use listings package for code blocks , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine , optSlideLevel :: Maybe Int -- ^ Header level that creates slides , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 , optAscii :: Bool -- ^ Use ascii characters only in html , optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes , optDefaultImageExtension :: String -- ^ Default image extension , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media , optTrace :: Bool -- ^ Print debug information , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. , optFileScope :: Bool -- ^ Parse input files before combining , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX } -- | Defaults for command-line options. defaultOpts :: Opt defaultOpts = Opt { optTabStop = 4 , optPreserveTabs = False , optStandalone = False , optReader = "" -- null for default reader , optWriter = "" -- null for default writer , optParseRaw = False , optTableOfContents = False , optTransforms = [] , optTemplate = Nothing , optVariables = [] , optMetadata = M.empty , optOutputFile = "-" -- "-" means stdout , optNumberSections = False , optNumberOffset = [0,0,0,0,0,0] , optSectionDivs = False , optIncremental = False , optSelfContained = False , optSmart = False , optOldDashes = False , optHtml5 = False , optHtmlQTags = False , optHighlight = True , optHighlightStyle = pygments , optChapters = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing , optReferenceDocx = Nothing , optEpubStylesheet = Nothing , optEpubMetadata = "" , optEpubFonts = [] , optEpubChapterLevel = 1 , optTOCDepth = 3 , optDumpArgs = False , optIgnoreArgs = False , optVerbose = False , optReferenceLinks = False , optDpi = 96 , optWrapText = WrapAuto , optColumns = 72 , optFilters = [] , optEmailObfuscation = JavascriptObfuscation , optIdentifierPrefix = "" , optIndentedCodeClasses = [] , optDataDir = Nothing , optCiteMethod = Citeproc , optListings = False , optLaTeXEngine = "pdflatex" , optLaTeXEngineArgs = [] , optSlideLevel = Nothing , optSetextHeaders = True , optAscii = False , optTeXLigatures = True , optDefaultImageExtension = "" , optExtractMedia = Nothing , optTrace = False , optTrackChanges = AcceptChanges , optFileScope = False , optKaTeXStylesheet = Nothing , optKaTeXJS = Nothing } -- | A list of functions, each transforming the options data structure -- in response to a command-line option. options :: [OptDescr (Opt -> IO Opt)] options = [ Option "fr" ["from","read"] (ReqArg (\arg opt -> return opt { optReader = arg }) "FORMAT") "" , Option "tw" ["to","write"] (ReqArg (\arg opt -> return opt { optWriter = arg }) "FORMAT") "" , Option "o" ["output"] (ReqArg (\arg opt -> return opt { optOutputFile = arg }) "FILENAME") "" -- "Name of output file" , Option "" ["data-dir"] (ReqArg (\arg opt -> return opt { optDataDir = Just arg }) "DIRECTORY") -- "Directory containing pandoc data files." "" , Option "R" ["parse-raw"] (NoArg (\opt -> return opt { optParseRaw = True })) "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" , Option "S" ["smart"] (NoArg (\opt -> return opt { optSmart = True })) "" -- "Use smart quotes, dashes, and ellipses" , Option "" ["old-dashes"] (NoArg (\opt -> return opt { optSmart = True , optOldDashes = True })) "" -- "Use smart quotes, dashes, and ellipses" , Option "" ["base-header-level"] (ReqArg (\arg opt -> case safeRead arg of Just t | t > 0 -> do let oldTransforms = optTransforms opt let shift = t - 1 return opt{ optTransforms = headerShift shift : oldTransforms } _ -> err 19 "base-header-level must be a number > 0") "NUMBER") "" -- "Headers base level" , Option "" ["indented-code-classes"] (ReqArg (\arg opt -> return opt { optIndentedCodeClasses = words $ map (\c -> if c == ',' then ' ' else c) arg }) "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" , Option "F" ["filter"] (ReqArg (\arg opt -> return opt { optFilters = arg : optFilters opt }) "PROGRAM") "" -- "External JSON filter" , Option "" ["normalize"] (NoArg (\opt -> return opt { optTransforms = normalize : optTransforms opt } )) "" -- "Normalize the Pandoc AST" , Option "p" ["preserve-tabs"] (NoArg (\opt -> return opt { optPreserveTabs = True })) "" -- "Preserve tabs instead of converting to spaces" , Option "" ["tab-stop"] (ReqArg (\arg opt -> case safeRead arg of Just t | t > 0 -> return opt { optTabStop = t } _ -> err 31 "tab-stop must be a number greater than 0") "NUMBER") "" -- "Tab stop (default 4)" , Option "" ["track-changes"] (ReqArg (\arg opt -> do action <- case arg of "accept" -> return AcceptChanges "reject" -> return RejectChanges "all" -> return AllChanges _ -> err 6 ("Unknown option for track-changes: " ++ arg) return opt { optTrackChanges = action }) "accept|reject|all") "" -- "Accepting or reject MS Word track-changes."" , Option "" ["file-scope"] (NoArg (\opt -> return opt { optFileScope = True })) "" -- "Parse input files before combining" , Option "" ["extract-media"] (ReqArg (\arg opt -> return opt { optExtractMedia = Just arg }) "PATH") "" -- "Directory to which to extract embedded media" , Option "s" ["standalone"] (NoArg (\opt -> return opt { optStandalone = True })) "" -- "Include needed header and footer on output" , Option "" ["template"] (ReqArg (\arg opt -> return opt{ optTemplate = Just arg, optStandalone = True }) "FILENAME") "" -- "Use custom template" , Option "M" ["metadata"] (ReqArg (\arg opt -> do let (key,val) = case break (`elem` ":=") arg of (k,_:v) -> (k, readMetaValue v) (k,_) -> (k, MetaBool True) return opt{ optMetadata = addMetadata key val $ optMetadata opt }) "KEY[:VALUE]") "" , Option "V" ["variable"] (ReqArg (\arg opt -> do let (key,val) = case break (`elem` ":=") arg of (k,_:v) -> (k,v) (k,_) -> (k,"true") return opt{ optVariables = (key,val) : optVariables opt }) "KEY[:VALUE]") "" , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do templ <- getDefaultTemplate Nothing arg case templ of Right t -> UTF8.hPutStr stdout t Left e -> error $ show e exitSuccess) "FORMAT") "" -- "Print default template for FORMAT" , Option "" ["print-default-data-file"] (ReqArg (\arg _ -> do readDataFile Nothing arg >>= BS.hPutStr stdout exitSuccess) "FILE") "" -- "Print default data file" , Option "" ["dpi"] (ReqArg (\arg opt -> case safeRead arg of Just t | t > 0 -> return opt { optDpi = t } _ -> err 31 "dpi must be a number greater than 0") "NUMBER") "" -- "Dpi (default 96)" , Option "" ["no-wrap"] (NoArg (\opt -> do warn $ "--no-wrap is deprecated. " ++ "Use --wrap=none or --wrap=preserve instead." return opt { optWrapText = WrapNone })) "" , Option "" ["wrap"] (ReqArg (\arg opt -> case safeRead ("Wrap" ++ uppercaseFirstLetter arg) of Just o -> return opt { optWrapText = o } Nothing -> err 77 "--wrap must be auto, none, or preserve") "[auto|none|preserve]") "" -- "Option for wrapping text in output" , Option "" ["columns"] (ReqArg (\arg opt -> case safeRead arg of Just t | t > 0 -> return opt { optColumns = t } _ -> err 33 "columns must be a number greater than 0") "NUMBER") "" -- "Length of line in characters" , Option "" ["toc", "table-of-contents"] (NoArg (\opt -> return opt { optTableOfContents = True })) "" -- "Include table of contents" , Option "" ["toc-depth"] (ReqArg (\arg opt -> case safeRead arg of Just t | t >= 1 && t <= 6 -> return opt { optTOCDepth = t } _ -> err 57 "TOC level must be a number between 1 and 6") "NUMBER") "" -- "Number of levels to include in TOC" , Option "" ["no-highlight"] (NoArg (\opt -> return opt { optHighlight = False })) "" -- "Don't highlight source code" , Option "" ["highlight-style"] (ReqArg (\arg opt -> do newStyle <- case map toLower arg of "pygments" -> return pygments "tango" -> return tango "espresso" -> return espresso "zenburn" -> return zenburn "kate" -> return kate "monochrome" -> return monochrome "haddock" -> return haddock _ -> err 39 $ "Unknown style :" ++ arg return opt{ optHighlightStyle = newStyle }) "STYLE") "" -- "Style for highlighted code" , Option "H" ["include-in-header"] (ReqArg (\arg opt -> do text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("header-includes",text)] return opt { optVariables = newvars, optStandalone = True }) "FILENAME") "" -- "File to include at end of header (implies -s)" , Option "B" ["include-before-body"] (ReqArg (\arg opt -> do text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("include-before",text)] return opt { optVariables = newvars, optStandalone = True }) "FILENAME") "" -- "File to include before document body" , Option "A" ["include-after-body"] (ReqArg (\arg opt -> do text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("include-after",text)] return opt { optVariables = newvars, optStandalone = True }) "FILENAME") "" -- "File to include after document body" , Option "" ["self-contained"] (NoArg (\opt -> return opt { optSelfContained = True, optStandalone = True })) "" -- "Make slide shows include all the needed js and css" , Option "" ["html-q-tags"] (NoArg (\opt -> return opt { optHtmlQTags = True })) "" -- "Use tags for quotes in HTML" , Option "" ["ascii"] (NoArg (\opt -> return opt { optAscii = True })) "" -- "Use ascii characters only in HTML output" , Option "" ["reference-links"] (NoArg (\opt -> return opt { optReferenceLinks = True } )) "" -- "Use reference links in parsing HTML" , Option "" ["atx-headers"] (NoArg (\opt -> return opt { optSetextHeaders = False } )) "" -- "Use atx-style headers for markdown" , Option "" ["chapters"] (NoArg (\opt -> return opt { optChapters = True })) "" -- "Use chapter for top-level sections in LaTeX, DocBook" , Option "N" ["number-sections"] (NoArg (\opt -> return opt { optNumberSections = True })) "" -- "Number sections in LaTeX" , Option "" ["number-offset"] (ReqArg (\arg opt -> case safeRead ('[':arg ++ "]") of Just ns -> return opt { optNumberOffset = ns, optNumberSections = True } _ -> err 57 "could not parse number-offset") "NUMBERS") "" -- "Starting number for sections, subsections, etc." , Option "" ["no-tex-ligatures"] (NoArg (\opt -> return opt { optTeXLigatures = False })) "" -- "Don't use tex ligatures for quotes, dashes" , Option "" ["listings"] (NoArg (\opt -> return opt { optListings = True })) "" -- "Use listings package for LaTeX code blocks" , Option "i" ["incremental"] (NoArg (\opt -> return opt { optIncremental = True })) "" -- "Make list items display incrementally in Slidy/Slideous/S5" , Option "" ["slide-level"] (ReqArg (\arg opt -> case safeRead arg of Just t | t >= 1 && t <= 6 -> return opt { optSlideLevel = Just t } _ -> err 39 "slide level must be a number between 1 and 6") "NUMBER") "" -- "Force header level for slides" , Option "" ["section-divs"] (NoArg (\opt -> return opt { optSectionDivs = True })) "" -- "Put sections in div tags in HTML" , Option "" ["default-image-extension"] (ReqArg (\arg opt -> return opt { optDefaultImageExtension = arg }) "extension") "" -- "Default extension for extensionless images" , Option "" ["email-obfuscation"] (ReqArg (\arg opt -> do method <- case arg of "references" -> return ReferenceObfuscation "javascript" -> return JavascriptObfuscation "none" -> return NoObfuscation _ -> err 6 ("Unknown obfuscation method: " ++ arg) return opt { optEmailObfuscation = method }) "none|javascript|references") "" -- "Method for obfuscating email in HTML" , Option "" ["id-prefix"] (ReqArg (\arg opt -> return opt { optIdentifierPrefix = arg }) "STRING") "" -- "Prefix to add to automatically generated HTML identifiers" , Option "T" ["title-prefix"] (ReqArg (\arg opt -> do let newvars = ("title-prefix", arg) : optVariables opt return opt { optVariables = newvars, optStandalone = True }) "STRING") "" -- "String to prefix to HTML window title" , Option "c" ["css"] (ReqArg (\arg opt -> do -- add new link to end, so it is included in proper order let newvars = optVariables opt ++ [("css",arg)] return opt { optVariables = newvars, optStandalone = True }) "URL") "" -- "Link to CSS style sheet" , Option "" ["reference-odt"] (ReqArg (\arg opt -> return opt { optReferenceODT = Just arg }) "FILENAME") "" -- "Path of custom reference.odt" , Option "" ["reference-docx"] (ReqArg (\arg opt -> return opt { optReferenceDocx = Just arg }) "FILENAME") "" -- "Path of custom reference.docx" , Option "" ["epub-stylesheet"] (ReqArg (\arg opt -> do text <- UTF8.readFile arg return opt { optEpubStylesheet = Just text }) "FILENAME") "" -- "Path of epub.css" , Option "" ["epub-cover-image"] (ReqArg (\arg opt -> return opt { optVariables = ("epub-cover-image", arg) : optVariables opt }) "FILENAME") "" -- "Path of epub cover image" , Option "" ["epub-metadata"] (ReqArg (\arg opt -> do text <- UTF8.readFile arg return opt { optEpubMetadata = text }) "FILENAME") "" -- "Path of epub metadata file" , Option "" ["epub-embed-font"] (ReqArg (\arg opt -> return opt{ optEpubFonts = arg : optEpubFonts opt }) "FILE") "" -- "Directory of fonts to embed" , Option "" ["epub-chapter-level"] (ReqArg (\arg opt -> case safeRead arg of Just t | t >= 1 && t <= 6 -> return opt { optEpubChapterLevel = t } _ -> err 59 "chapter level must be a number between 1 and 6") "NUMBER") "" -- "Header level at which to split chapters in EPUB" , Option "" ["latex-engine"] (ReqArg (\arg opt -> do let b = takeBaseName arg if b `elem` ["pdflatex", "lualatex", "xelatex"] then return opt { optLaTeXEngine = arg } else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") "PROGRAM") "" -- "Name of latex program to use in generating PDF" , Option "" ["latex-engine-opt"] (ReqArg (\arg opt -> do let oldArgs = optLaTeXEngineArgs opt return opt { optLaTeXEngineArgs = arg : oldArgs }) "STRING") "" -- "Flags to pass to the LaTeX engine, all instances of this option are accumulated and used" , Option "" ["bibliography"] (ReqArg (\arg opt -> return opt{ optMetadata = addMetadata "bibliography" (readMetaValue arg) $ optMetadata opt }) "FILE") "" , Option "" ["csl"] (ReqArg (\arg opt -> return opt{ optMetadata = addMetadata "csl" (readMetaValue arg) $ optMetadata opt }) "FILE") "" , Option "" ["citation-abbreviations"] (ReqArg (\arg opt -> return opt{ optMetadata = addMetadata "citation-abbreviations" (readMetaValue arg) $ optMetadata opt }) "FILE") "" , Option "" ["natbib"] (NoArg (\opt -> return opt { optCiteMethod = Natbib })) "" -- "Use natbib cite commands in LaTeX output" , Option "" ["biblatex"] (NoArg (\opt -> return opt { optCiteMethod = Biblatex })) "" -- "Use biblatex cite commands in LaTeX output" , Option "m" ["latexmathml", "asciimathml"] (OptArg (\arg opt -> return opt { optHTMLMathMethod = LaTeXMathML arg }) "URL") "" -- "Use LaTeXMathML script in html output" , Option "" ["mathml"] (OptArg (\arg opt -> return opt { optHTMLMathMethod = MathML arg }) "URL") "" -- "Use mathml for HTML math" , Option "" ["mimetex"] (OptArg (\arg opt -> do let url' = case arg of Just u -> u ++ "?" Nothing -> "/cgi-bin/mimetex.cgi?" return opt { optHTMLMathMethod = WebTeX url' }) "URL") "" -- "Use mimetex for HTML math" , Option "" ["webtex"] (OptArg (\arg opt -> do let url' = fromMaybe "http://chart.apis.google.com/chart?cht=tx&chl=" arg return opt { optHTMLMathMethod = WebTeX url' }) "URL") "" -- "Use web service for HTML math" , Option "" ["jsmath"] (OptArg (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) "URL") "" -- "Use jsMath for HTML math" , Option "" ["mathjax"] (OptArg (\arg opt -> do let url' = fromMaybe "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML-full" arg return opt { optHTMLMathMethod = MathJax url'}) "URL") "" -- "Use MathJax for HTML math" , Option "" ["katex"] (OptArg (\arg opt -> return opt { optKaTeXJS = arg <|> Just "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.js"}) "URL") "" -- Use KaTeX for HTML Math , Option "" ["katex-stylesheet"] (ReqArg (\arg opt -> return opt { optKaTeXStylesheet = Just arg }) "URL") "" -- Set the KaTeX Stylesheet location , Option "" ["gladtex"] (NoArg (\opt -> return opt { optHTMLMathMethod = GladTeX })) "" -- "Use gladtex for HTML math" , Option "" ["trace"] (NoArg (\opt -> return opt { optTrace = True })) "" -- "Turn on diagnostic tracing in readers." , Option "" ["dump-args"] (NoArg (\opt -> return opt { optDumpArgs = True })) "" -- "Print output filename and arguments to stdout." , Option "" ["ignore-args"] (NoArg (\opt -> return opt { optIgnoreArgs = True })) "" -- "Ignore command-line arguments." , Option "" ["verbose"] (NoArg (\opt -> return opt { optVerbose = True })) "" -- "Verbose diagnostic output." , Option "" ["bash-completion"] (NoArg (\_ -> do ddir <- getDataDir tpl <- readDataFileUTF8 Nothing "bash_completion.tpl" let optnames (Option shorts longs _ _) = map (\c -> ['-',c]) shorts ++ map ("--" ++) longs let allopts = unwords (concatMap optnames options) UTF8.hPutStrLn stdout $ printf tpl allopts (unwords (map fst readers)) (unwords ("pdf": map fst writers)) ddir exitSuccess )) "" -- "Print bash completion script" , Option "v" ["version"] (NoArg (\_ -> do prg <- getProgName defaultDatadir <- getAppUserDataDirectory "pandoc" UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++ "\nDefault user data directory: " ++ defaultDatadir ++ copyrightMessage) exitSuccess )) "" -- "Print version" , Option "h" ["help"] (NoArg (\_ -> do prg <- getProgName UTF8.hPutStr stdout (usageMessage prg options) exitSuccess )) "" -- "Show help" ] addMetadata :: String -> MetaValue -> M.Map String MetaValue -> M.Map String MetaValue addMetadata k v m = case M.lookup k m of Nothing -> M.insert k v m Just (MetaList xs) -> M.insert k (MetaList (xs ++ [v])) m Just x -> M.insert k (MetaList [v, x]) m readMetaValue :: String -> MetaValue readMetaValue s = case decode (UTF8.fromString s) of Just (Yaml.String t) -> MetaString $ T.unpack t Just (Yaml.Bool b) -> MetaBool b _ -> MetaString s -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ wrapWords 16 78 readers'names ++ '\n' : replicate 16 ' ' ++ "[ *only Pandoc's JSON version of native AST]" ++ "\nOutput formats: " ++ wrapWords 16 78 writers'names ++ '\n' : replicate 16 ' ' ++ "[**for pdf output, use latex or beamer and -o FILENAME.pdf]\nOptions:") where writers'names = sort $ "json*" : "pdf**" : delete "json" (map fst writers) readers'names = sort $ "json*" : delete "json" (map fst readers) -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String defaultReaderName fallback [] = fallback defaultReaderName fallback (x:xs) = case takeExtension (map toLower x) of ".xhtml" -> "html" ".html" -> "html" ".htm" -> "html" ".tex" -> "latex" ".latex" -> "latex" ".ltx" -> "latex" ".rst" -> "rst" ".org" -> "org" ".lhs" -> "markdown+lhs" ".db" -> "docbook" ".opml" -> "opml" ".wiki" -> "mediawiki" ".dokuwiki" -> "dokuwiki" ".textile" -> "textile" ".native" -> "native" ".json" -> "json" ".docx" -> "docx" ".t2t" -> "t2t" ".epub" -> "epub" ".odt" -> "odt" ".pdf" -> "pdf" -- so we get an "unknown reader" error ".doc" -> "doc" -- so we get an "unknown reader" error _ -> defaultReaderName fallback xs -- Returns True if extension of first source is .lhs lhsExtension :: [FilePath] -> Bool lhsExtension (x:_) = takeExtension x == ".lhs" lhsExtension _ = False -- Determine default writer based on output file extension defaultWriterName :: FilePath -> String defaultWriterName "-" = "html" -- no output file defaultWriterName x = case takeExtension (map toLower x) of "" -> "markdown" -- empty extension ".tex" -> "latex" ".latex" -> "latex" ".ltx" -> "latex" ".context" -> "context" ".ctx" -> "context" ".rtf" -> "rtf" ".rst" -> "rst" ".s5" -> "s5" ".native" -> "native" ".json" -> "json" ".txt" -> "markdown" ".text" -> "markdown" ".md" -> "markdown" ".markdown" -> "markdown" ".textile" -> "textile" ".lhs" -> "markdown+lhs" ".texi" -> "texinfo" ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" ".docx" -> "docx" ".epub" -> "epub" ".org" -> "org" ".asciidoc" -> "asciidoc" ".adoc" -> "asciidoc" ".pdf" -> "latex" ".fb2" -> "fb2" ".opml" -> "opml" ".icml" -> "icml" ".tei.xml" -> "tei" ".tei" -> "tei" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" -- Transformations of a Pandoc document post-parsing: extractMedia :: MediaBag -> FilePath -> Pandoc -> IO Pandoc extractMedia media dir d = case [fp | (fp, _, _) <- mediaDirectory media] of [] -> return d fps -> do extractMediaBag True dir media return $ walk (adjustImagePath dir fps) d adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline adjustImagePath dir paths (Image attr lab (src, tit)) | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) adjustImagePath _ _ x = x adjustMetadata :: M.Map String MetaValue -> Pandoc -> IO Pandoc adjustMetadata metadata d = return $ M.foldWithKey setMeta d metadata applyTransforms :: [Transform] -> Pandoc -> IO Pandoc applyTransforms transforms d = return $ foldr ($) d transforms applyFilters :: [FilePath] -> [String] -> Pandoc -> IO Pandoc applyFilters filters args d = foldrM ($) d $ map (flip externalFilter args) filters uppercaseFirstLetter :: String -> String uppercaseFirstLetter (c:cs) = toUpper c : cs uppercaseFirstLetter [] = [] main :: IO () main = do rawArgs <- map UTF8.decodeArg <$> getArgs prg <- getProgName let (actions, args, errors) = getOpt Permute options rawArgs unless (null errors) $ err 2 $ concat $ errors ++ ["Try " ++ prg ++ " --help for more information."] -- thread option data structure through all supplied option actions opts <- foldl (>>=) (return defaultOpts) actions convertWithOpts opts args convertWithOpts :: Opt -> [FilePath] -> IO () convertWithOpts opts args = do let Opt { optTabStop = tabStop , optPreserveTabs = preserveTabs , optStandalone = standalone , optReader = readerName , optWriter = writerName , optParseRaw = parseRaw , optVariables = variables , optMetadata = metadata , optTableOfContents = toc , optTransforms = transforms , optTemplate = templatePath , optOutputFile = outputFile , optNumberSections = numberSections , optNumberOffset = numberFrom , optSectionDivs = sectionDivs , optIncremental = incremental , optSelfContained = selfContained , optSmart = smart , optOldDashes = oldDashes , optHtml5 = html5 , optHtmlQTags = htmlQTags , optHighlight = highlight , optHighlightStyle = highlightStyle , optChapters = chapters , optHTMLMathMethod = mathMethod' , optReferenceODT = referenceODT , optReferenceDocx = referenceDocx , optEpubStylesheet = epubStylesheet , optEpubMetadata = epubMetadata , optEpubFonts = epubFonts , optEpubChapterLevel = epubChapterLevel , optTOCDepth = epubTOCDepth , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optVerbose = verbose , optReferenceLinks = referenceLinks , optDpi = dpi , optWrapText = wrap , optColumns = columns , optFilters = filters , optEmailObfuscation = obfuscationMethod , optIdentifierPrefix = idPrefix , optIndentedCodeClasses = codeBlockClasses , optDataDir = mbDataDir , optCiteMethod = citeMethod , optListings = listings , optLaTeXEngine = latexEngine , optLaTeXEngineArgs = latexEngineArgs , optSlideLevel = slideLevel , optSetextHeaders = setextHeaders , optAscii = ascii , optTeXLigatures = texLigatures , optDefaultImageExtension = defaultImageExtension , optExtractMedia = mbExtractMedia , optTrace = trace , optTrackChanges = trackChanges , optFileScope = fileScope , optKaTeXStylesheet = katexStylesheet , optKaTeXJS = katexJS } = opts when dumpArgs $ do UTF8.hPutStrLn stdout outputFile mapM_ (UTF8.hPutStrLn stdout) args exitSuccess let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.css" let mathMethod = case (katexJS, katexStylesheet) of (Nothing, _) -> mathMethod' (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) -- --bibliography implies -F pandoc-citeproc for backwards compatibility: let needsCiteproc = isJust (M.lookup "bibliography" (optMetadata opts)) && optCiteMethod opts `notElem` [Natbib, Biblatex] && "pandoc-citeproc" `notElem` map takeBaseName filters let filters' = if needsCiteproc then "pandoc-citeproc" : filters else filters let sources = if ignoreArgs then [] else args datadir <- case mbDataDir of Nothing -> E.catch (Just <$> getAppUserDataDirectory "pandoc") (\e -> let _ = (e :: E.SomeException) in return Nothing) Just _ -> return mbDataDir -- assign reader and writer based on options and filenames let readerName' = case map toLower readerName of [] -> defaultReaderName (if any isURI sources then "html" else "markdown") sources "html4" -> "html" x -> x let writerName' = case map toLower writerName of [] -> defaultWriterName outputFile "epub2" -> "epub" "html4" -> "html" x -> x let format = takeWhile (`notElem` ['+','-']) $ takeFileName writerName' -- in case path to lua script let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" let laTeXOutput = format `elem` ["latex", "beamer"] let conTeXtOutput = format == "context" let html5Output = format == "html5" let laTeXInput = "latex" `isPrefixOf` readerName' || "beamer" `isPrefixOf` readerName' writer <- if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName then return $ IOStringWriter $ writeCustom writerName else case getWriter writerName' of Left e -> err 9 $ if format == "pdf" then e ++ "\nTo create a pdf with pandoc, use " ++ "the latex or beamer writer and specify\n" ++ "an output file with .pdf extension " ++ "(pandoc -t latex -o filename.pdf)." else e Right w -> return w reader <- if "t2t" == readerName' then (mkStringReader . readTxt2Tags) <$> getT2TMeta sources outputFile else case getReader readerName' of Right r -> return r Left e -> err 7 e' where e' = case readerName' of "pdf" -> e ++ "\nPandoc can convert to PDF, but not from PDF." "doc" -> e ++ "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." _ -> e let standalone' = standalone || not (isTextFormat format) || pdfOutput templ <- case templatePath of _ | not standalone' -> return "" Nothing -> do deftemp <- getDefaultTemplate datadir format case deftemp of Left e -> throwIO e Right t -> return t Just tp -> do -- strip off extensions let tp' = case takeExtension tp of "" -> tp <.> format _ -> tp E.catch (UTF8.readFile tp') (\e -> if isDoesNotExistError e then E.catch (readDataFileUTF8 datadir ("templates" tp')) (\e' -> let _ = (e' :: E.SomeException) in throwIO e') else throwIO e) variables' <- case mathMethod of LaTeXMathML Nothing -> do s <- readDataFileUTF8 datadir "LaTeXMathML.js" return $ ("mathml-script", s) : variables MathML Nothing -> do s <- readDataFileUTF8 datadir "MathMLinHTML.js" return $ ("mathml-script", s) : variables _ -> return variables variables'' <- if format == "dzslides" then do dztempl <- readDataFileUTF8 datadir ("dzslides" "template.html") let dzline = "