From fa00df2b8e31cc6299ef975d8f75dd0f3915a004 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 10 Dec 2016 12:36:09 +0100 Subject: pandoc.hs: moved main loop to beginning of file. --- pandoc.hs | 2501 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 1252 insertions(+), 1249 deletions(-) diff --git a/pandoc.hs b/pandoc.hs index 7897d68cf..dd58e79ab 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -78,1420 +78,1423 @@ import System.Posix.IO (stdOutput) import Control.Monad.Trans import Text.Pandoc.Class (withMediaBag, PandocIO, getWarnings) -type Transform = Pandoc -> Pandoc +main :: IO () +main = do -copyrightMessage :: String -copyrightMessage = intercalate "\n" [ - "", - "Copyright (C) 2006-2016 John MacFarlane", - "Web: http://pandoc.org", - "This is free software; see the source for copying conditions.", - "There is no warranty, not even for merchantability or fitness", - "for a particular purpose." ] + rawArgs <- map UTF8.decodeArg <$> getArgs + prg <- getProgName -compileInfo :: String -compileInfo = - "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ - VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting + let (actions, args, errors) = getOpt Permute options rawArgs --- | Converts a list of strings into a single string with the items printed as --- comma separated words in lines with a maximum line length. -wrapWords :: Int -> Int -> [String] -> String -wrapWords indent c = wrap' (c - indent) (c - indent) - where - wrap' _ _ [] = "" - wrap' cols remaining (x:xs) - | remaining == cols = - x ++ wrap' cols (remaining - length x) xs - | (length x + 1) > remaining = - ",\n" ++ replicate indent ' ' ++ x ++ - wrap' cols (cols - length x) xs - | otherwise = - ", " ++ x ++ - wrap' cols (remaining - length x - 2) xs + unless (null errors) $ + err 2 $ concat $ errors ++ + ["Try " ++ prg ++ " --help for more information."] -isTextFormat :: String -> Bool -isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaultOpts) actions + convertWithOpts opts args -externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc -externalFilter f args' d = liftIO $ do - exists <- doesFileExist f - isExecutable <- if exists - then executable <$> getPermissions f - else return True - let (f', args'') = if exists - then case map toLower (takeExtension f) of - _ | isExecutable -> ("." f, args') - ".py" -> ("python", f:args') - ".hs" -> ("runhaskell", f:args') - ".pl" -> ("perl", f:args') - ".rb" -> ("ruby", f:args') - ".php" -> ("php", f:args') - ".js" -> ("node", f:args') - _ -> (f, args') - else (f, args') - unless (exists && isExecutable) $ do - mbExe <- findExecutable f' - when (isNothing mbExe) $ - err 83 $ "Error running filter " ++ f ++ ":\n" ++ - "Could not find executable '" ++ f' ++ "'." - env <- getEnvironment - let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env - (exitcode, outbs) <- E.handle filterException $ - pipeProcess env' f' args'' $ encode d - case exitcode of - ExitSuccess -> return $ either error id $ eitherDecode' outbs - ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ - "Filter returned error status " ++ show ec - where filterException :: E.SomeException -> IO a - filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ - show e +convertWithOpts :: Opt -> [FilePath] -> IO () +convertWithOpts opts args = do + let Opt { optTabStop = tabStop + , optPreserveTabs = preserveTabs + , optStandalone = standalone + , optReader = readerName + , optWriter = writerName + , optParseRaw = parseRaw + , optVariables = variables + , optMetadata = metadata + , optTableOfContents = toc + , optTransforms = transforms + , optTemplate = templatePath + , optOutputFile = outputFile + , optNumberSections = numberSections + , optNumberOffset = numberFrom + , optSectionDivs = sectionDivs + , optIncremental = incremental + , optSelfContained = selfContained + , optSmart = smart + , optOldDashes = oldDashes + , optHtml5 = html5 + , optHtmlQTags = htmlQTags + , optHighlight = highlight + , optHighlightStyle = highlightStyle + , optTopLevelDivision = topLevelDivision + , optHTMLMathMethod = mathMethod' + , optReferenceDoc = referenceDoc + , optEpubStylesheet = epubStylesheet + , optEpubMetadata = epubMetadata + , optEpubFonts = epubFonts + , optEpubChapterLevel = epubChapterLevel + , optTOCDepth = epubTOCDepth + , optDumpArgs = dumpArgs + , optIgnoreArgs = ignoreArgs + , optVerbose = verbose + , optQuiet = quiet + , optFailIfWarnings = failIfWarnings + , optReferenceLinks = referenceLinks + , optReferenceLocation = referenceLocation + , optDpi = dpi + , optWrapText = wrap + , optColumns = columns + , optFilters = filters + , optEmailObfuscation = obfuscationMethod + , optIdentifierPrefix = idPrefix + , optIndentedCodeClasses = codeBlockClasses + , optDataDir = mbDataDir + , optCiteMethod = citeMethod + , optListings = listings + , optLaTeXEngine = latexEngine + , optLaTeXEngineArgs = latexEngineArgs + , optSlideLevel = slideLevel + , optSetextHeaders = setextHeaders + , optAscii = ascii + , optTeXLigatures = texLigatures + , optDefaultImageExtension = defaultImageExtension + , optExtractMedia = mbExtractMedia + , optTrace = trace + , optTrackChanges = trackChanges + , optFileScope = fileScope + , optKaTeXStylesheet = katexStylesheet + , optKaTeXJS = katexJS + } = opts -highlightingStyles :: [(String, Style)] -highlightingStyles = - [("pygments", pygments), - ("tango", tango), - ("espresso", espresso), - ("zenburn", zenburn), - ("kate", kate), - ("monochrome", monochrome), - ("breezedark", breezeDark), - ("haddock", haddock)] + when dumpArgs $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (UTF8.hPutStrLn stdout) args + exitSuccess --- | Data structure for command line options. -data Opt = Opt - { optTabStop :: Int -- ^ Number of spaces per tab - , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces - , optStandalone :: Bool -- ^ Include header, footer - , optReader :: String -- ^ Reader format - , optWriter :: String -- ^ Writer format - , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX - , optTableOfContents :: Bool -- ^ Include table of contents - , optTransforms :: [Transform] -- ^ Doc transforms to apply - , optTemplate :: Maybe FilePath -- ^ Custom template - , optVariables :: [(String,String)] -- ^ Template variables to set - , optMetadata :: M.Map String MetaValue -- ^ Metadata fields to set - , optOutputFile :: String -- ^ Name of output file - , optNumberSections :: Bool -- ^ Number sections in LaTeX - , optNumberOffset :: [Int] -- ^ Starting number for sections - , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 - , optSelfContained :: Bool -- ^ Make HTML accessible offline - , optSmart :: Bool -- ^ Use smart typography - , optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1 - , optHtml5 :: Bool -- ^ Produce HTML5 in HTML - , optHtmlQTags :: Bool -- ^ Use tags in HTML - , optHighlight :: Bool -- ^ Highlight source code - , optHighlightStyle :: Style -- ^ Style to use for highlighted code - , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions - , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math - , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc - , optEpubStylesheet :: Maybe String -- ^ EPUB stylesheet - , optEpubMetadata :: String -- ^ EPUB metadata - , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed - , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters - , optTOCDepth :: Int -- ^ Number of levels to include in TOC - , optDumpArgs :: Bool -- ^ Output command-line arguments - , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optVerbose :: Bool -- ^ Verbose diagnostic output - , optQuiet :: Bool -- ^ Suppress warnings - , optFailIfWarnings :: Bool -- ^ Fail on warnings - , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output - , optDpi :: Int -- ^ Dpi - , optWrapText :: WrapOption -- ^ Options for wrapping text - , optColumns :: Int -- ^ Line length in characters - , optFilters :: [FilePath] -- ^ Filters to apply - , optEmailObfuscation :: ObfuscationMethod - , optIdentifierPrefix :: String - , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks - , optDataDir :: Maybe FilePath - , optCiteMethod :: CiteMethod -- ^ Method to output cites - , optListings :: Bool -- ^ Use listings package for code blocks - , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf - , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine - , optSlideLevel :: Maybe Int -- ^ Header level that creates slides - , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 - , optAscii :: Bool -- ^ Use ascii characters only in html - , optTeXLigatures :: Bool -- ^ Use TeX ligatures for quotes/dashes - , optDefaultImageExtension :: String -- ^ Default image extension - , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media - , optTrace :: Bool -- ^ Print debug information - , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. - , optFileScope :: Bool -- ^ Parse input files before combining - , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX - , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX - } + let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" + let mathMethod = + case (katexJS, katexStylesheet) of + (Nothing, _) -> mathMethod' + (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) --- | Defaults for command-line options. -defaultOpts :: Opt -defaultOpts = Opt - { optTabStop = 4 - , optPreserveTabs = False - , optStandalone = False - , optReader = "" -- null for default reader - , optWriter = "" -- null for default writer - , optParseRaw = False - , optTableOfContents = False - , optTransforms = [] - , optTemplate = Nothing - , optVariables = [] - , optMetadata = M.empty - , optOutputFile = "-" -- "-" means stdout - , optNumberSections = False - , optNumberOffset = [0,0,0,0,0,0] - , optSectionDivs = False - , optIncremental = False - , optSelfContained = False - , optSmart = False - , optOldDashes = False - , optHtml5 = False - , optHtmlQTags = False - , optHighlight = True - , optHighlightStyle = pygments - , optTopLevelDivision = TopLevelDefault - , optHTMLMathMethod = PlainMath - , optReferenceDoc = Nothing - , optEpubStylesheet = Nothing - , optEpubMetadata = "" - , optEpubFonts = [] - , optEpubChapterLevel = 1 - , optTOCDepth = 3 - , optDumpArgs = False - , optIgnoreArgs = False - , optVerbose = False - , optQuiet = False - , optFailIfWarnings = False - , optReferenceLinks = False - , optReferenceLocation = EndOfDocument - , optDpi = 96 - , optWrapText = WrapAuto - , optColumns = 72 - , optFilters = [] - , optEmailObfuscation = NoObfuscation - , optIdentifierPrefix = "" - , optIndentedCodeClasses = [] - , optDataDir = Nothing - , optCiteMethod = Citeproc - , optListings = False - , optLaTeXEngine = "pdflatex" - , optLaTeXEngineArgs = [] - , optSlideLevel = Nothing - , optSetextHeaders = True - , optAscii = False - , optTeXLigatures = True - , optDefaultImageExtension = "" - , optExtractMedia = Nothing - , optTrace = False - , optTrackChanges = AcceptChanges - , optFileScope = False - , optKaTeXStylesheet = Nothing - , optKaTeXJS = Nothing - } --- | A list of functions, each transforming the options data structure --- in response to a command-line option. -options :: [OptDescr (Opt -> IO Opt)] -options = - [ Option "fr" ["from","read"] - (ReqArg - (\arg opt -> return opt { optReader = arg }) - "FORMAT") - "" + -- --bibliography implies -F pandoc-citeproc for backwards compatibility: + let needsCiteproc = isJust (M.lookup "bibliography" (optMetadata opts)) && + optCiteMethod opts `notElem` [Natbib, Biblatex] && + "pandoc-citeproc" `notElem` map takeBaseName filters + let filters' = if needsCiteproc then "pandoc-citeproc" : filters + else filters - , Option "tw" ["to","write"] - (ReqArg - (\arg opt -> return opt { optWriter = arg }) - "FORMAT") - "" + let sources = case args of + [] -> ["-"] + xs | ignoreArgs -> ["-"] + | otherwise -> xs - , Option "o" ["output"] - (ReqArg - (\arg opt -> return opt { optOutputFile = arg }) - "FILENAME") - "" -- "Name of output file" + datadir <- case mbDataDir of + Nothing -> E.catch + (Just <$> getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return mbDataDir - , Option "" ["data-dir"] - (ReqArg - (\arg opt -> return opt { optDataDir = Just arg }) - "DIRECTORY") -- "Directory containing pandoc data files." - "" + -- assign reader and writer based on options and filenames + let readerName' = case map toLower readerName of + [] -> defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources + "html4" -> "html" + x -> x - , Option "R" ["parse-raw"] - (NoArg - (\opt -> return opt { optParseRaw = True })) - "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" + let writerName' = case map toLower writerName of + [] -> defaultWriterName outputFile + "epub2" -> "epub" + "html4" -> "html" + x -> x + let format = takeWhile (`notElem` ['+','-']) + $ takeFileName writerName' -- in case path to lua script - , Option "S" ["smart"] - (NoArg - (\opt -> return opt { optSmart = True })) - "" -- "Use smart quotes, dashes, and ellipses" + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - , Option "" ["old-dashes"] - (NoArg - (\opt -> return opt { optSmart = True - , optOldDashes = True })) - "" -- "Use smart quotes, dashes, and ellipses" + let laTeXOutput = format `elem` ["latex", "beamer"] + let conTeXtOutput = format == "context" + let html5Output = format == "html5" - , Option "" ["base-header-level"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 -> do - let oldTransforms = optTransforms opt - let shift = t - 1 - return opt{ optTransforms = - headerShift shift : oldTransforms } - _ -> err 19 - "base-header-level must be a number > 0") - "NUMBER") - "" -- "Headers base level" + let laTeXInput = "latex" `isPrefixOf` readerName' || + "beamer" `isPrefixOf` readerName' - , Option "" ["indented-code-classes"] - (ReqArg - (\arg opt -> return opt { optIndentedCodeClasses = words $ - map (\c -> if c == ',' then ' ' else c) arg }) - "STRING") - "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" - , Option "F" ["filter"] - (ReqArg - (\arg opt -> return opt { optFilters = arg : optFilters opt }) - "PROGRAM") - "" -- "External JSON filter" + -- disabling the custom writer for now + writer <- if ".lua" `isSuffixOf` format + -- note: use non-lowercased version writerName + then error "custom writers disabled for now" + else case getWriter writerName' of + Left e -> err 9 $ + if format == "pdf" + then e ++ + "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return (w :: Writer PandocIO) - , Option "" ["normalize"] - (NoArg - (\opt -> return opt { optTransforms = - normalize : optTransforms opt } )) - "" -- "Normalize the Pandoc AST" + -- TODO: we have to get the input and the output into the state for + -- the sake of the text2tags reader. + reader <- case getReader readerName' of + Right r -> return (r :: Reader PandocIO) + Left e -> err 7 e' + where e' = case readerName' of + "pdf" -> e ++ + "\nPandoc can convert to PDF, but not from PDF." + "doc" -> e ++ + "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." + _ -> e - , Option "p" ["preserve-tabs"] - (NoArg - (\opt -> return opt { optPreserveTabs = True })) - "" -- "Preserve tabs instead of converting to spaces" + let standalone' = standalone || not (isTextFormat format) || pdfOutput - , Option "" ["tab-stop"] - (ReqArg - (\arg opt -> - case safeRead arg of - Just t | t > 0 -> return opt { optTabStop = t } - _ -> err 31 - "tab-stop must be a number greater than 0") - "NUMBER") - "" -- "Tab stop (default 4)" + templ <- case templatePath of + _ | not standalone' -> return Nothing + Nothing -> do + deftemp <- getDefaultTemplate datadir format + case deftemp of + Left e -> throwIO e + Right t -> return (Just t) + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + Just <$> E.catch (UTF8.readFile tp') + (\e -> if isDoesNotExistError e + then E.catch + (readDataFileUTF8 datadir + ("templates" tp')) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') + else throwIO e) + + variables' <- case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFileUTF8 datadir "LaTeXMathML.js" + return $ ("mathml-script", s) : variables + MathML Nothing -> do + s <- readDataFileUTF8 datadir "MathMLinHTML.js" + return $ ("mathml-script", s) : variables + _ -> return variables - , Option "" ["track-changes"] - (ReqArg - (\arg opt -> do - action <- case arg of - "accept" -> return AcceptChanges - "reject" -> return RejectChanges - "all" -> return AllChanges - _ -> err 6 - ("Unknown option for track-changes: " ++ arg) - return opt { optTrackChanges = action }) - "accept|reject|all") - "" -- "Accepting or reject MS Word track-changes."" + variables'' <- if format == "dzslides" + then do + dztempl <- readDataFileUTF8 datadir + ("dzslides" "template.html") + let dzline = "