From 32c5a8e2dcc1262a8181d0f9b6ffe208e7499481 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 29 Dec 2012 16:42:17 -0800 Subject: Cabal file changes. * Remove executable and library flags. * Expose `Text.Pandoc.XML` and `Text.Pandoc.Biblio`. * Depend on pandoc library in executable, so we don't recompile everything. * Move pandoc.hs from src/ to . --- pandoc.hs | 1052 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1052 insertions(+) create mode 100644 pandoc.hs (limited to 'pandoc.hs') diff --git a/pandoc.hs b/pandoc.hs new file mode 100644 index 000000000..7268f57f8 --- /dev/null +++ b/pandoc.hs @@ -0,0 +1,1052 @@ +{-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-2012 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Main + Copyright : Copyright (C) 2006-2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} +module Main where +import Text.Pandoc +import Text.Pandoc.PDF (tex2pdf) +import Text.Pandoc.Readers.LaTeX (handleIncludes) +import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead, + headerShift, normalize, err, warn ) +import Text.Pandoc.XML ( toEntities, fromEntities ) +import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Highlighting ( languages, Style, tango, pygments, + espresso, zenburn, kate, haddock, monochrome ) +import System.Environment ( getArgs, getProgName ) +import System.Exit ( exitWith, ExitCode (..) ) +import System.FilePath +import System.Console.GetOpt +import Data.Char ( toLower ) +import Data.List ( intercalate, isPrefixOf ) +import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) +import System.IO ( stdout ) +import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E +import Control.Exception.Extensible ( throwIO ) +import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Text.CSL as CSL +import Control.Monad (when, unless, liftM) +import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) +import Network.URI (parseURI, isURI, URI(..)) +import qualified Data.ByteString.Lazy as B +import Text.CSL.Reference (Reference(..)) + +copyrightMessage :: String +copyrightMessage = "\nCopyright (C) 2006-2012 John MacFarlane\n" ++ + "Web: http://johnmacfarlane.net/pandoc\n" ++ + "This is free software; see the source for copying conditions. There is no\n" ++ + "warranty, not even for merchantability or fitness for a particular purpose." + +compileInfo :: String +compileInfo = + "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++ + VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++ + ".\nSyntax highlighting is supported for the following languages:\n " ++ + wrapWords 4 78 languages + +-- | Converts a list of strings into a single string with the items printed as +-- comma separated words in lines with a maximum line length. +wrapWords :: Int -> Int -> [String] -> String +wrapWords indent c = wrap' (c - indent) (c - indent) + where wrap' _ _ [] = "" + wrap' cols remaining (x:xs) = if remaining == cols + then x ++ wrap' cols (remaining - length x) xs + else if (length x + 1) > remaining + then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs + else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs + +isTextFormat :: String -> Bool +isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub","epub3"] + +-- | Data structure for command line options. +data Opt = Opt + { optTabStop :: Int -- ^ Number of spaces per tab + , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: String -- ^ Reader format + , optWriter :: String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optTableOfContents :: Bool -- ^ Include table of contents + , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply + , optTemplate :: Maybe FilePath -- ^ Custom template + , optVariables :: [(String,String)] -- ^ Template variables to set + , optOutputFile :: String -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 + , optSelfContained :: Bool -- ^ Make HTML accessible offline + , optSmart :: Bool -- ^ Use smart typography + , optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1 + , optHtml5 :: Bool -- ^ Produce HTML5 in HTML + , optHighlight :: Bool -- ^ Highlight source code + , optHighlightStyle :: Style -- ^ Style to use for highlighted code + , optChapters :: Bool -- ^ Use chapter for top-level sects + , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx + , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet + , optEPUBMetadata :: String -- ^ EPUB metadata + , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optWrapText :: Bool -- ^ Wrap text + , optColumns :: Int -- ^ Line length in characters + , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply + , optEmailObfuscation :: ObfuscationMethod + , optIdentifierPrefix :: String + , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optDataDir :: Maybe FilePath + , optCiteMethod :: CiteMethod -- ^ Method to output cites + , optBibliography :: [String] + , optCslFile :: Maybe FilePath + , optAbbrevsFile :: Maybe FilePath + , optListings :: Bool -- ^ Use listings package for code blocks + , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf + , optSlideLevel :: Maybe Int -- ^ Header level that creates slides + , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optAscii :: Bool -- ^ Use ascii characters only in html + , optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optTabStop = 4 + , optPreserveTabs = False + , optStandalone = False + , optReader = "" -- null for default reader + , optWriter = "" -- null for default writer + , optParseRaw = False + , optTableOfContents = False + , optTransforms = [] + , optTemplate = Nothing + , optVariables = [] + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optSectionDivs = False + , optIncremental = False + , optSelfContained = False + , optSmart = False + , optOldDashes = False + , optHtml5 = False + , optHighlight = True + , optHighlightStyle = pygments + , optChapters = False + , optHTMLMathMethod = PlainMath + , optReferenceODT = Nothing + , optReferenceDocx = Nothing + , optEPUBStylesheet = Nothing + , optEPUBMetadata = "" + , optEPUBFonts = [] + , optDumpArgs = False + , optIgnoreArgs = False + , optReferenceLinks = False + , optWrapText = True + , optColumns = 72 + , optPlugins = [] + , optEmailObfuscation = JavascriptObfuscation + , optIdentifierPrefix = "" + , optIndentedCodeClasses = [] + , optDataDir = Nothing + , optCiteMethod = Citeproc + , optBibliography = [] + , optCslFile = Nothing + , optAbbrevsFile = Nothing + , optListings = False + , optLaTeXEngine = "pdflatex" + , optSlideLevel = Nothing + , optSetextHeaders = True + , optAscii = False + , optTeXLigatures = True + } + +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = + [ Option "fr" ["from","read"] + (ReqArg + (\arg opt -> return opt { optReader = map toLower arg }) + "FORMAT") + "" + + , Option "tw" ["to","write"] + (ReqArg + (\arg opt -> return opt { optWriter = map toLower arg }) + "FORMAT") + "" + + , Option "o" ["output"] + (ReqArg + (\arg opt -> return opt { optOutputFile = arg }) + "FILENAME") + "" -- "Name of output file" + + , Option "" ["data-dir"] + (ReqArg + (\arg opt -> return opt { optDataDir = Just arg }) + "DIRECTORY") -- "Directory containing pandoc data files." + "" + + , Option "" ["strict"] + (NoArg + (\opt -> do + err 59 $ "The --strict option has been removed.\n" ++ + "Use `markdown_strict' input or output format instead." + return opt )) + "" -- "Disable markdown syntax extensions" + + , Option "R" ["parse-raw"] + (NoArg + (\opt -> return opt { optParseRaw = True })) + "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" + + , Option "S" ["smart"] + (NoArg + (\opt -> return opt { optSmart = True })) + "" -- "Use smart quotes, dashes, and ellipses" + + , Option "" ["old-dashes"] + (NoArg + (\opt -> return opt { optSmart = True + , optOldDashes = True })) + "" -- "Use smart quotes, dashes, and ellipses" + + , Option "" ["base-header-level"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> do + let oldTransforms = optTransforms opt + let shift = t - 1 + return opt{ optTransforms = + headerShift shift : oldTransforms } + _ -> err 19 + "base-header-level must be a number > 0") + "NUMBER") + "" -- "Headers base level" + + , Option "" ["indented-code-classes"] + (ReqArg + (\arg opt -> return opt { optIndentedCodeClasses = words $ + map (\c -> if c == ',' then ' ' else c) arg }) + "STRING") + "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" + + , Option "" ["normalize"] + (NoArg + (\opt -> return opt { optTransforms = + normalize : optTransforms opt } )) + "" -- "Normalize the Pandoc AST" + + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> return opt { optTabStop = t } + _ -> err 31 + "tab-stop must be a number greater than 0") + "NUMBER") + "" -- "Tab stop (default 4)" + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" + + , Option "" ["template"] + (ReqArg + (\arg opt -> do + return opt{ optTemplate = Just arg, + optStandalone = True }) + "FILENAME") + "" -- "Use custom template" + + , Option "V" ["variable"] + (ReqArg + (\arg opt -> do + let (key,val) = case break (`elem` ":=") arg of + (k,_:v) -> (k,v) + (k,_) -> (k,"true") + return opt{ optVariables = (key,val) : optVariables opt }) + "KEY[:VALUE]") + "" -- "Use custom template" + + , Option "D" ["print-default-template"] + (ReqArg + (\arg _ -> do + templ <- getDefaultTemplate Nothing arg + case templ of + Right t -> UTF8.hPutStr stdout t + Left e -> error $ show e + exitWith ExitSuccess) + "FORMAT") + "" -- "Print default template for FORMAT" + + , Option "" ["no-wrap"] + (NoArg + (\opt -> return opt { optWrapText = False })) + "" -- "Do not wrap text in output" + + , Option "" ["columns"] + (ReqArg + (\arg opt -> + case safeRead arg of + Just t | t > 0 -> return opt { optColumns = t } + _ -> err 33 $ + "columns must be a number greater than 0") + "NUMBER") + "" -- "Length of line in characters" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "" ["no-highlight"] + (NoArg + (\opt -> return opt { optHighlight = False })) + "" -- "Don't highlight source code" + + , Option "" ["highlight-style"] + (ReqArg + (\arg opt -> do + newStyle <- case map toLower arg of + "pygments" -> return pygments + "tango" -> return tango + "espresso" -> return espresso + "zenburn" -> return zenburn + "kate" -> return kate + "monochrome" -> return monochrome + "haddock" -> return haddock + _ -> err 39 $ + "Unknown style :" ++ arg + return opt{ optHighlightStyle = newStyle }) + "STYLE") + "" -- "Style for highlighted code" + + , Option "H" ["include-in-header"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("header-includes",text)] + return opt { optVariables = newvars, + optStandalone = True }) + "FILENAME") + "" -- "File to include at end of header (implies -s)" + + , Option "B" ["include-before-body"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("include-before",text)] + return opt { optVariables = newvars, + optStandalone = True }) + "FILENAME") + "" -- "File to include before document body" + + , Option "A" ["include-after-body"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + -- add new ones to end, so they're included in order specified + let newvars = optVariables opt ++ [("include-after",text)] + return opt { optVariables = newvars, + optStandalone = True }) + "FILENAME") + "" -- "File to include after document body" + + , Option "" ["self-contained"] + (NoArg + (\opt -> return opt { optSelfContained = True, + optVariables = ("slidy-url","slidy") : + optVariables opt, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" + + , Option "" ["offline"] + (NoArg + (\opt -> do warn $ "--offline is deprecated. Use --self-contained instead." + return opt { optSelfContained = True, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" + -- deprecated synonym for --self-contained + + , Option "5" ["html5"] + (NoArg + (\opt -> do + warn $ "--html5 is deprecated. " + ++ "Use the html5 output format instead." + return opt { optHtml5 = True })) + "" -- "Produce HTML5 in HTML output" + + , Option "" ["ascii"] + (NoArg + (\opt -> return opt { optAscii = True })) + "" -- "Use ascii characters only in HTML output" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "" ["atx-headers"] + (NoArg + (\opt -> return opt { optSetextHeaders = False } )) + "" -- "Use atx-style headers for markdown" + + , Option "" ["chapters"] + (NoArg + (\opt -> return opt { optChapters = True })) + "" -- "Use chapter for top-level sections in LaTeX, DocBook" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["no-tex-ligatures"] + (NoArg + (\opt -> return opt { optTeXLigatures = False })) + "" -- "Don't use tex ligatures for quotes, dashes" + + , Option "" ["listings"] + (NoArg + (\opt -> return opt { optListings = True })) + "" -- "Use listings package for LaTeX code blocks" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in Slidy/Slideous/S5" + + , Option "" ["slide-level"] + (ReqArg + (\arg opt -> do + case safeRead arg of + Just t | t >= 1 && t <= 6 -> + return opt { optSlideLevel = Just t } + _ -> err 39 $ + "slide level must be a number between 1 and 6") + "NUMBER") + "" -- "Force header level for slides" + + , Option "" ["section-divs"] + (NoArg + (\opt -> return opt { optSectionDivs = True })) + "" -- "Put sections in div tags in HTML" + + , Option "" ["email-obfuscation"] + (ReqArg + (\arg opt -> do + method <- case arg of + "references" -> return ReferenceObfuscation + "javascript" -> return JavascriptObfuscation + "none" -> return NoObfuscation + _ -> err 6 + ("Unknown obfuscation method: " ++ arg) + return opt { optEmailObfuscation = method }) + "none|javascript|references") + "" -- "Method for obfuscating email in HTML" + + , Option "" ["id-prefix"] + (ReqArg + (\arg opt -> return opt { optIdentifierPrefix = arg }) + "STRING") + "" -- "Prefix to add to automatically generated HTML identifiers" + + , Option "T" ["title-prefix"] + (ReqArg + (\arg opt -> do + let newvars = ("title-prefix", arg) : optVariables opt + return opt { optVariables = newvars, + optStandalone = True }) + "STRING") + "" -- "String to prefix to HTML window title" + + , Option "c" ["css"] + (ReqArg + (\arg opt -> do + -- add new link to end, so it is included in proper order + let newvars = optVariables opt ++ [("css",arg)] + return opt { optVariables = newvars, + optStandalone = True }) + "URL") + "" -- "Link to CSS style sheet" + + , Option "" ["reference-odt"] + (ReqArg + (\arg opt -> do + return opt { optReferenceODT = Just arg }) + "FILENAME") + "" -- "Path of custom reference.odt" + + , Option "" ["reference-docx"] + (ReqArg + (\arg opt -> do + return opt { optReferenceDocx = Just arg }) + "FILENAME") + "" -- "Path of custom reference.docx" + + , Option "" ["epub-stylesheet"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBStylesheet = Just text }) + "FILENAME") + "" -- "Path of epub.css" + + , Option "" ["epub-cover-image"] + (ReqArg + (\arg opt -> + return opt { optVariables = + ("epub-cover-image", arg) : optVariables opt }) + "FILENAME") + "" -- "Path of epub cover image" + + , Option "" ["epub-metadata"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBMetadata = text }) + "FILENAME") + "" -- "Path of epub metadata file" + + , Option "" ["epub-embed-font"] + (ReqArg + (\arg opt -> do + return opt{ optEPUBFonts = arg : optEPUBFonts opt }) + "FILE") + "" -- "Directory of fonts to embed" + + , Option "" ["latex-engine"] + (ReqArg + (\arg opt -> do + let b = takeBaseName arg + if (b == "pdflatex" || b == "lualatex" || b == "xelatex") + then return opt { optLaTeXEngine = arg } + else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") + "PROGRAM") + "" -- "Name of latex program to use in generating PDF" + + , Option "" ["bibliography"] + (ReqArg + (\arg opt -> return opt { optBibliography = (optBibliography opt) ++ [arg] }) + "FILENAME") + "" + + , Option "" ["csl"] + (ReqArg + (\arg opt -> return opt { optCslFile = Just arg }) + "FILENAME") + "" + + , Option "" ["citation-abbreviations"] + (ReqArg + (\arg opt -> return opt { optAbbrevsFile = Just arg }) + "FILENAME") + "" + + , Option "" ["natbib"] + (NoArg + (\opt -> return opt { optCiteMethod = Natbib })) + "" -- "Use natbib cite commands in LaTeX output" + + , Option "" ["biblatex"] + (NoArg + (\opt -> return opt { optCiteMethod = Biblatex })) + "" -- "Use biblatex cite commands in LaTeX output" + + , Option "m" ["latexmathml", "asciimathml"] + (OptArg + (\arg opt -> + return opt { optHTMLMathMethod = LaTeXMathML arg }) + "URL") + "" -- "Use LaTeXMathML script in html output" + + , Option "" ["mathml"] + (OptArg + (\arg opt -> + return opt { optHTMLMathMethod = MathML arg }) + "URL") + "" -- "Use mathml for HTML math" + + , Option "" ["mimetex"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u ++ "?" + Nothing -> "/cgi-bin/mimetex.cgi?" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use mimetex for HTML math" + + , Option "" ["webtex"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u + Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl=" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use web service for HTML math" + + , Option "" ["jsmath"] + (OptArg + (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) + "URL") + "" -- "Use jsMath for HTML math" + + , Option "" ["mathjax"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u + Nothing -> "http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" + return opt { optHTMLMathMethod = MathJax url'}) + "URL") + "" -- "Use MathJax for HTML math" + + , Option "" ["gladtex"] + (NoArg + (\opt -> return opt { optHTMLMathMethod = GladTeX })) + "" -- "Use gladtex for HTML math" + + , Option "" ["dump-args"] + (NoArg + (\opt -> return opt { optDumpArgs = True })) + "" -- "Print output filename and arguments to stdout." + + , Option "" ["ignore-args"] + (NoArg + (\opt -> return opt { optIgnoreArgs = True })) + "" -- "Ignore command-line arguments." + + , Option "v" ["version"] + (NoArg + (\_ -> do + prg <- getProgName + UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++ + copyrightMessage) + exitWith ExitSuccess )) + "" -- "Print version" + + , Option "h" ["help"] + (NoArg + (\_ -> do + prg <- getProgName + UTF8.hPutStr stdout (usageMessage prg options) + exitWith ExitSuccess )) + "" -- "Show help" + + ] + +readExtension :: String -> IO Extension +readExtension s = case safeRead ('E':'x':'t':'_':map toLower s) of + Just ext -> return ext + Nothing -> err 59 $ "Unknown extension: " ++ s + +-- Returns usage message +usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage programName = usageInfo + (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ + (wrapWords 16 78 $ readers'names) ++ "\nOutput formats: " ++ + (wrapWords 16 78 $ writers'names) ++ "\nOptions:") + where + writers'names = map fst writers + readers'names = map fst readers + +-- Determine default reader based on source file extensions +defaultReaderName :: String -> [FilePath] -> String +defaultReaderName fallback [] = fallback +defaultReaderName fallback (x:xs) = + case takeExtension (map toLower x) of + ".xhtml" -> "html" + ".html" -> "html" + ".htm" -> "html" + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".rst" -> "rst" + ".lhs" -> "markdown+lhs" + ".db" -> "docbook" + ".wiki" -> "mediawiki" + ".textile" -> "textile" + ".native" -> "native" + ".json" -> "json" + _ -> defaultReaderName fallback xs + +-- Returns True if extension of first source is .lhs +lhsExtension :: [FilePath] -> Bool +lhsExtension (x:_) = takeExtension x == ".lhs" +lhsExtension _ = False + +-- Determine default writer based on output file extension +defaultWriterName :: FilePath -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + case takeExtension (map toLower x) of + "" -> "markdown" -- empty extension + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".context" -> "context" + ".ctx" -> "context" + ".rtf" -> "rtf" + ".rst" -> "rst" + ".s5" -> "s5" + ".native" -> "native" + ".json" -> "json" + ".txt" -> "markdown" + ".text" -> "markdown" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".textile" -> "textile" + ".lhs" -> "markdown+lhs" + ".texi" -> "texinfo" + ".texinfo" -> "texinfo" + ".db" -> "docbook" + ".odt" -> "odt" + ".docx" -> "docx" + ".epub" -> "epub" + ".org" -> "org" + ".asciidoc" -> "asciidoc" + ".pdf" -> "latex" + ".fb2" -> "fb2" + ['.',y] | y `elem` ['1'..'9'] -> "man" + _ -> "html" + +main :: IO () +main = do + + rawArgs <- liftM (map UTF8.decodeArg) getArgs + prg <- getProgName + let compatMode = (prg == "hsmarkdown") + + let (actions, args, errors) = if compatMode + then ([], rawArgs, []) + else getOpt Permute options rawArgs + + unless (null errors) $ + err 2 $ concat $ errors ++ + ["Try " ++ prg ++ " --help for more information."] + + let defaultOpts' = if compatMode + then defaultOpts { optReader = "markdown_strict" + , optWriter = "html" + , optEmailObfuscation = + ReferenceObfuscation } + else defaultOpts + + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaultOpts') actions + + let Opt { optTabStop = tabStop + , optPreserveTabs = preserveTabs + , optStandalone = standalone + , optReader = readerName + , optWriter = writerName + , optParseRaw = parseRaw + , optVariables = variables + , optTableOfContents = toc + , optTransforms = transforms + , optTemplate = templatePath + , optOutputFile = outputFile + , optNumberSections = numberSections + , optSectionDivs = sectionDivs + , optIncremental = incremental + , optSelfContained = selfContained + , optSmart = smart + , optOldDashes = oldDashes + , optHtml5 = html5 + , optHighlight = highlight + , optHighlightStyle = highlightStyle + , optChapters = chapters + , optHTMLMathMethod = mathMethod + , optReferenceODT = referenceODT + , optReferenceDocx = referenceDocx + , optEPUBStylesheet = epubStylesheet + , optEPUBMetadata = epubMetadata + , optEPUBFonts = epubFonts + , optDumpArgs = dumpArgs + , optIgnoreArgs = ignoreArgs + , optReferenceLinks = referenceLinks + , optWrapText = wrap + , optColumns = columns + , optEmailObfuscation = obfuscationMethod + , optIdentifierPrefix = idPrefix + , optIndentedCodeClasses = codeBlockClasses + , optDataDir = mbDataDir + , optBibliography = reffiles + , optCslFile = mbCsl + , optAbbrevsFile = cslabbrevs + , optCiteMethod = citeMethod + , optListings = listings + , optLaTeXEngine = latexEngine + , optSlideLevel = slideLevel + , optSetextHeaders = setextHeaders + , optAscii = ascii + , optTeXLigatures = texLigatures + } = opts + + when dumpArgs $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args + exitWith ExitSuccess + + let sources = if ignoreArgs then [] else args + + datadir <- case mbDataDir of + Nothing -> E.catch + (liftM Just $ getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return mbDataDir + + -- assign reader and writer based on options and filenames + let readerName' = if null readerName + then let fallback = if any isURI sources + then "html" + else "markdown" + in defaultReaderName fallback sources + else readerName + + let writerName' = if null writerName + then defaultWriterName outputFile + else writerName + + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + + let laTeXOutput = "latex" `isPrefixOf` writerName' || + "beamer" `isPrefixOf` writerName' + + when pdfOutput $ do + -- make sure writer is latex or beamer + unless laTeXOutput $ + err 47 $ "cannot produce pdf output with " ++ writerName' ++ " writer" + -- check for latex program + mbLatex <- findExecutable latexEngine + case mbLatex of + Nothing -> err 41 $ + latexEngine ++ " not found. " ++ + latexEngine ++ " is needed for pdf output." + Just _ -> return () + + reader <- case getReader readerName' of + Right r -> return r + Left e -> err 7 e + + let standalone' = standalone || not (isTextFormat writerName') || pdfOutput + + templ <- case templatePath of + _ | not standalone' -> return "" + Nothing -> do + deftemp <- getDefaultTemplate datadir writerName' + case deftemp of + Left e -> throwIO e + Right t -> return t + Just tp -> do + -- strip off extensions + let format = takeWhile (`notElem` "+-") writerName' + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + E.catch (UTF8.readFile tp') + (\e -> if isDoesNotExistError e + then E.catch + (readDataFile datadir $ + "templates" tp') + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') + else throwIO e) + + variables' <- case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFile datadir $ "data" "LaTeXMathML.js" + return $ ("mathml-script", s) : variables + MathML Nothing -> do + s <- readDataFile datadir $ "data""MathMLinHTML.js" + return $ ("mathml-script", s) : variables + _ -> return variables + + variables'' <- if "dzslides" `isPrefixOf` writerName' + then do + dztempl <- readDataFile datadir $ "dzslides" "template.html" + let dzcore = unlines $ dropWhile (not . isPrefixOf "