diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-02-05 21:58:45 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-02-05 21:58:45 +0100 |
commit | 0bbea0cc7650af61870b310ebbf6fb8a9fec09a7 (patch) | |
tree | 2b043d80c75bba4e80968c6e79bc2970158fdaf2 /src/Text/Pandoc | |
parent | f2103e7e7bc77dab00678b6b2cd7752bbbe63f55 (diff) | |
download | pandoc-0bbea0cc7650af61870b310ebbf6fb8a9fec09a7.tar.gz |
Split pandoc.hs into a module, Text.Pandoc.App, and a small program.
The App module provides a function that does a pandoc conversion,
based on option settings. The program (pandoc.hs) now does nothing
more than parse options and pass them to this function, which can
easily be used by other applications (e.g. a GUI wrapper).
The Opt structure has been further simplified.
API changes:
* New exposed module Text.Pandoc.App
* Text.Pandoc.Highlighting has been exposed.
* highlightingStyles has been moved to Text.Pandoc.Highlighting.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/App.hs | 720 | ||||
-rw-r--r-- | src/Text/Pandoc/Highlighting.hs | 14 |
2 files changed, 733 insertions, 1 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs new file mode 100644 index 000000000..e51a45395 --- /dev/null +++ b/src/Text/Pandoc/App.hs @@ -0,0 +1,720 @@ +{-# LANGUAGE CPP, TupleSections, ScopedTypeVariables, PatternGuards #-} +{- +Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.App + Copyright : Copyright (C) 2006-2016 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Does a pandoc conversion based on command-line options. +-} +module Text.Pandoc.App ( + convertWithOpts + , Opt(..) + , defaultOpts + ) where +import Text.Pandoc +import Text.Pandoc.Builder (setMeta) +import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, + headerShift, err, openURL ) +import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag ) +import Text.Pandoc.XML ( toEntities ) +import Text.Pandoc.Highlighting (highlightingStyles) +import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Process (pipeProcess) +import Skylighting ( Style ) +import System.Environment ( getEnvironment ) +import System.Exit ( ExitCode (..), exitSuccess ) +import System.FilePath +import Data.Char ( toLower ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import System.Directory ( getAppUserDataDirectory, findExecutable, + doesFileExist, Permissions(..), getPermissions ) +import System.IO ( stdout, stderr ) +import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E +import Control.Exception.Extensible ( throwIO ) +import qualified Text.Pandoc.UTF8 as UTF8 +import Control.Monad (when, unless, (>=>)) +import Data.Maybe (fromMaybe, isNothing, isJust) +import Data.Foldable (foldrM) +import Network.URI (parseURI, isURI, URI(..)) +import qualified Data.ByteString.Lazy as B +import Data.Aeson (eitherDecode', encode) +import Data.Yaml (decode) +import qualified Data.Yaml as Yaml +import qualified Data.Text as T +#ifndef _WINDOWS +import System.Posix.Terminal (queryTerminal) +import System.Posix.IO (stdOutput) +#endif +import Control.Monad.Trans +import Text.Pandoc.Class (withMediaBag, PandocIO, getLog, setVerbosity) + +convertWithOpts :: Opt -> [FilePath] -> IO () +convertWithOpts opts args = do + let outputFile = optOutputFile opts + let filters = optFilters opts + let verbosity = optVerbosity opts + + when (optDumpArgs opts) $ + do UTF8.hPutStrLn stdout outputFile + mapM_ (UTF8.hPutStrLn stdout) args + exitSuccess + + epubStylesheet <- case optEpubStylesheet opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + epubMetadata <- case optEpubMetadata opts of + Nothing -> return Nothing + Just fp -> Just <$> UTF8.readFile fp + + let csscdn = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.6.0/katex.min.css" + let mathMethod = + case (optKaTeXJS opts, optKaTeXStylesheet opts) of + (Nothing, _) -> optHTMLMathMethod opts + (Just js, ss) -> KaTeX js (fromMaybe csscdn ss) + + + -- --bibliography implies -F pandoc-citeproc for backwards compatibility: + let needsCiteproc = isJust (lookup "bibliography" (optMetadata opts)) && + optCiteMethod opts `notElem` [Natbib, Biblatex] && + "pandoc-citeproc" `notElem` map takeBaseName filters + let filters' = if needsCiteproc then "pandoc-citeproc" : filters + else filters + + let sources = case args of + [] -> ["-"] + xs | optIgnoreArgs opts -> ["-"] + | otherwise -> xs + + datadir <- case optDataDir opts of + Nothing -> E.catch + (Just <$> getAppUserDataDirectory "pandoc") + (\e -> let _ = (e :: E.SomeException) + in return Nothing) + Just _ -> return $ optDataDir opts + + -- assign reader and writer based on options and filenames + let readerName = case optReader opts of + Nothing -> defaultReaderName + (if any isURI sources + then "html" + else "markdown") sources + Just x -> map toLower x + + let writerName = case optWriter opts of + Nothing -> defaultWriterName outputFile + Just x -> map toLower x + let format = takeWhile (`notElem` ['+','-']) + $ takeFileName writerName -- in case path to lua script + + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + + let laTeXOutput = format `elem` ["latex", "beamer"] + let conTeXtOutput = format == "context" + let html5Output = format == "html5" || format == "html" + + -- disabling the custom writer for now + writer <- if ".lua" `isSuffixOf` format + -- note: use non-lowercased version writerName + then error "custom writers disabled for now" + else case getWriter writerName of + Left e -> err 9 $ + if format == "pdf" + then e ++ + "\nTo create a pdf with pandoc, use " ++ + "the latex or beamer writer and specify\n" ++ + "an output file with .pdf extension " ++ + "(pandoc -t latex -o filename.pdf)." + else e + Right w -> return (w :: Writer PandocIO) + + -- TODO: we have to get the input and the output into the state for + -- the sake of the text2tags reader. + reader <- case getReader readerName of + Right r -> return (r :: Reader PandocIO) + Left e -> err 7 e' + where e' = case readerName of + "pdf" -> e ++ + "\nPandoc can convert to PDF, but not from PDF." + "doc" -> e ++ + "\nPandoc can convert from DOCX, but not from DOC.\nTry using Word to save your DOC file as DOCX, and convert that with pandoc." + _ -> e + + let standalone = optStandalone opts || not (isTextFormat format) || pdfOutput + + templ <- case optTemplate opts of + _ | not standalone -> return Nothing + Nothing -> do + deftemp <- getDefaultTemplate datadir format + case deftemp of + Left e -> throwIO e + Right t -> return (Just t) + Just tp -> do + -- strip off extensions + let tp' = case takeExtension tp of + "" -> tp <.> format + _ -> tp + Just <$> E.catch (UTF8.readFile tp') + (\e -> if isDoesNotExistError e + then E.catch + (readDataFileUTF8 datadir + ("templates" </> tp')) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') + else throwIO e) + + let addStringAsVariable varname s vars = return $ (varname, s) : vars + + let addContentsAsVariable varname fp vars = do + s <- UTF8.readFile fp + return $ (varname, s) : vars + + -- note: this reverses the list constructed in option parsing, + -- which in turn was reversed from the command-line order, + -- so we end up with the correct order in the variable list: + let withList _ [] vars = return vars + withList f (x:xs) vars = f x vars >>= withList f xs + + variables <- return (optVariables opts) + >>= + withList (addContentsAsVariable "include-before") + (optIncludeBeforeBody opts) + >>= + withList (addContentsAsVariable "include-after") + (optIncludeAfterBody opts) + >>= + withList (addContentsAsVariable "header-includes") + (optIncludeInHeader opts) + >>= + withList (addStringAsVariable "css") (optCss opts) + >>= + maybe return (addStringAsVariable "title-prefix") (optTitlePrefix opts) + >>= + maybe return (addStringAsVariable "epub-cover-image") + (optEpubCoverImage opts) + >>= + (\vars -> case mathMethod of + LaTeXMathML Nothing -> do + s <- readDataFileUTF8 datadir "LaTeXMathML.js" + return $ ("mathml-script", s) : vars + _ -> return vars) + >>= + (\vars -> if format == "dzslides" + then do + dztempl <- readDataFileUTF8 datadir + ("dzslides" </> "template.html") + let dzline = "<!-- {{{{ dzslides core" + let dzcore = unlines + $ dropWhile (not . (dzline `isPrefixOf`)) + $ lines dztempl + return $ ("dzslides-core", dzcore) : vars + else return vars) + + let sourceURL = case sources of + [] -> Nothing + (x:_) -> case parseURI x of + Just u + | uriScheme u `elem` ["http:","https:"] -> + Just $ show u{ uriQuery = "", + uriFragment = "" } + _ -> Nothing + + let readerOpts = def{ readerStandalone = standalone + , readerParseRaw = optParseRaw opts + , readerColumns = optColumns opts + , readerTabStop = optTabStop opts + , readerIndentedCodeClasses = optIndentedCodeClasses opts + , readerApplyMacros = not laTeXOutput + , readerDefaultImageExtension = + optDefaultImageExtension opts + , readerTrackChanges = optTrackChanges opts + } + + highlightStyle <- lookupHighlightStyle $ optHighlightStyle opts + + let writerOptions = def { writerTemplate = templ, + writerVariables = variables, + writerTabStop = optTabStop opts, + writerTableOfContents = optTableOfContents opts, + writerHTMLMathMethod = mathMethod, + writerIncremental = optIncremental opts, + writerCiteMethod = optCiteMethod opts, + writerNumberSections = optNumberSections opts, + writerNumberOffset = optNumberOffset opts, + writerSectionDivs = optSectionDivs opts, + writerReferenceLinks = optReferenceLinks opts, + writerReferenceLocation = optReferenceLocation opts, + writerDpi = optDpi opts, + writerWrapText = optWrapText opts, + writerColumns = optColumns opts, + writerEmailObfuscation = optEmailObfuscation opts, + writerIdentifierPrefix = optIdentifierPrefix opts, + writerSourceURL = sourceURL, + writerUserDataDir = datadir, + writerHtmlQTags = optHtmlQTags opts, + writerTopLevelDivision = optTopLevelDivision opts, + writerListings = optListings opts, + writerSlideLevel = optSlideLevel opts, + writerHighlightStyle = highlightStyle, + writerSetextHeaders = optSetextHeaders opts, + writerEpubMetadata = epubMetadata, + writerEpubStylesheet = epubStylesheet, + writerEpubFonts = optEpubFonts opts, + writerEpubChapterLevel = optEpubChapterLevel opts, + writerTOCDepth = optTOCDepth opts, + writerReferenceDoc = optReferenceDoc opts, + writerLaTeXArgs = optLaTeXEngineArgs opts + } + + +#ifdef _WINDOWS + let istty = True +#else + istty <- queryTerminal stdOutput +#endif + when (istty && not (isTextFormat format) && outputFile == "-") $ + err 5 $ "Cannot write " ++ format ++ " output to stdout.\n" ++ + "Specify an output file using the -o option." + + + let transforms = case optBaseHeaderLevel opts of + x | x > 1 -> [headerShift (x - 1)] + | otherwise -> [] + + let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" + then 0 + else optTabStop opts) + + readSources :: MonadIO m => [FilePath] -> m String + readSources srcs = convertTabs . intercalate "\n" <$> + mapM readSource srcs + + let runIO' :: PandocIO a -> IO a + runIO' f = do + (res, reports) <- runIOorExplode $ do + setVerbosity verbosity + x <- f + rs <- getLog + return (x, rs) + let isWarning (WARNING, _) = True + isWarning _ = False + when (optFailIfWarnings opts && any isWarning reports) $ + err 3 "Failing because there were warnings." + return res + + let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) + sourceToDoc sources' = + case reader of + StringReader r + | optFileScope opts || readerName == "json" -> do + pairs <- mapM + (readSource >=> withMediaBag . r readerOpts) sources + return (mconcat (map fst pairs), mconcat (map snd pairs)) + | otherwise -> + readSources sources' >>= withMediaBag . r readerOpts + ByteStringReader r -> do + pairs <- mapM (readFile' >=> + withMediaBag . r readerOpts) sources + return (mconcat (map fst pairs), mconcat (map snd pairs)) + + runIO' $ do + (doc, media) <- sourceToDoc sources + doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> + return . flip (foldr addMetadata) (optMetadata opts) >=> + applyTransforms transforms >=> + applyFilters datadir filters' [format]) doc + + case writer of + -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile + ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile + StringWriter f + | pdfOutput -> do + -- make sure writer is latex or beamer or context or html5 + unless (laTeXOutput || conTeXtOutput || html5Output) $ + err 47 $ "cannot produce pdf output with " ++ format ++ + " writer" + + let pdfprog = case () of + _ | conTeXtOutput -> "context" + _ | html5Output -> "wkhtmltopdf" + _ -> optLaTeXEngine opts + -- check for pdf creating program + mbPdfProg <- liftIO $ findExecutable pdfprog + when (isNothing mbPdfProg) $ + err 41 $ pdfprog ++ " not found. " ++ + pdfprog ++ " is needed for pdf output." + + res <- makePDF pdfprog f writerOptions verbosity media doc' + case res of + Right pdf -> writeFnBinary outputFile pdf + Left err' -> liftIO $ do + B.hPutStr stderr err' + B.hPut stderr $ B.pack [10] + err 43 "Error producing PDF" + | otherwise -> do + let htmlFormat = format `elem` + ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] + selfcontain = if optSelfContained opts && htmlFormat + then makeSelfContained writerOptions media + else return + handleEntities = if htmlFormat && optAscii opts + then toEntities + else id + output <- f writerOptions doc' + selfcontain (output ++ ['\n' | not standalone]) >>= + writerFn outputFile . handleEntities + +type Transform = Pandoc -> Pandoc + +isTextFormat :: String -> Bool +isTextFormat s = s `notElem` ["odt","docx","epub","epub3"] + +externalFilter :: MonadIO m => FilePath -> [String] -> Pandoc -> m Pandoc +externalFilter f args' d = liftIO $ do + exists <- doesFileExist f + isExecutable <- if exists + then executable <$> getPermissions f + else return True + let (f', args'') = if exists + then case map toLower (takeExtension f) of + _ | isExecutable -> ("." </> f, args') + ".py" -> ("python", f:args') + ".hs" -> ("runhaskell", f:args') + ".pl" -> ("perl", f:args') + ".rb" -> ("ruby", f:args') + ".php" -> ("php", f:args') + ".js" -> ("node", f:args') + _ -> (f, args') + else (f, args') + unless (exists && isExecutable) $ do + mbExe <- findExecutable f' + when (isNothing mbExe) $ + err 83 $ "Error running filter " ++ f ++ ":\n" ++ + "Could not find executable '" ++ f' ++ "'." + env <- getEnvironment + let env' = Just $ ("PANDOC_VERSION", pandocVersion) : env + (exitcode, outbs) <- E.handle filterException $ + pipeProcess env' f' args'' $ encode d + case exitcode of + ExitSuccess -> return $ either error id $ eitherDecode' outbs + ExitFailure ec -> err 83 $ "Error running filter " ++ f ++ "\n" ++ + "Filter returned error status " ++ show ec + where filterException :: E.SomeException -> IO a + filterException e = err 83 $ "Error running filter " ++ f ++ "\n" ++ + show e + +-- | Data structure for command line options. +data Opt = Opt + { optTabStop :: Int -- ^ Number of spaces per tab + , optPreserveTabs :: Bool -- ^ Preserve tabs instead of converting to spaces + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: Maybe String -- ^ Reader format + , optWriter :: Maybe String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optTableOfContents :: Bool -- ^ Include table of contents + , optBaseHeaderLevel :: Int -- ^ Base header level + , optTemplate :: Maybe FilePath -- ^ Custom template + , optVariables :: [(String,String)] -- ^ Template variables to set + , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optOutputFile :: FilePath -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optNumberOffset :: [Int] -- ^ Starting number for sections + , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , optIncremental :: Bool -- ^ Use incremental lists in Slidy/Slideous/S5 + , optSelfContained :: Bool -- ^ Make HTML accessible offline + , optHtmlQTags :: Bool -- ^ Use <q> tags in HTML + , optHighlightStyle :: Maybe String -- ^ Style to use for highlighted code + , optTopLevelDivision :: TopLevelDivision -- ^ Type of the top-level divisions + , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optReferenceDoc :: Maybe FilePath -- ^ Path of reference doc + , optEpubStylesheet :: Maybe FilePath -- ^ EPUB stylesheet + , optEpubMetadata :: Maybe FilePath -- ^ EPUB metadata + , optEpubFonts :: [FilePath] -- ^ EPUB fonts to embed + , optEpubChapterLevel :: Int -- ^ Header level at which to split chapters + , optEpubCoverImage :: Maybe FilePath -- ^ Cover image for epub + , optTOCDepth :: Int -- ^ Number of levels to include in TOC + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optVerbosity :: Verbosity -- ^ Verbosity of diagnostic output + , optFailIfWarnings :: Bool -- ^ Fail on warnings + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optReferenceLocation :: ReferenceLocation -- ^ location for footnotes and link references in markdown output + , optDpi :: Int -- ^ Dpi + , optWrapText :: WrapOption -- ^ Options for wrapping text + , optColumns :: Int -- ^ Line length in characters + , optFilters :: [FilePath] -- ^ Filters to apply + , optEmailObfuscation :: ObfuscationMethod + , optIdentifierPrefix :: String + , optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks + , optDataDir :: Maybe FilePath + , optCiteMethod :: CiteMethod -- ^ Method to output cites + , optListings :: Bool -- ^ Use listings package for code blocks + , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf + , optLaTeXEngineArgs :: [String] -- ^ Flags to pass to the latex-engine + , optSlideLevel :: Maybe Int -- ^ Header level that creates slides + , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optAscii :: Bool -- ^ Use ascii characters only in html + , optDefaultImageExtension :: String -- ^ Default image extension + , optExtractMedia :: Maybe FilePath -- ^ Path to extract embedded media + , optTrackChanges :: TrackChanges -- ^ Accept or reject MS Word track-changes. + , optFileScope :: Bool -- ^ Parse input files before combining + , optKaTeXStylesheet :: Maybe String -- ^ Path to stylesheet for KaTeX + , optKaTeXJS :: Maybe String -- ^ Path to js file for KaTeX + , optTitlePrefix :: Maybe String -- ^ Prefix for title + , optCss :: [FilePath] -- ^ CSS files to link to + , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before + , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body + , optIncludeInHeader :: [FilePath] -- ^ Files to include in header + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optTabStop = 4 + , optPreserveTabs = False + , optStandalone = False + , optReader = Nothing + , optWriter = Nothing + , optParseRaw = False + , optTableOfContents = False + , optBaseHeaderLevel = 1 + , optTemplate = Nothing + , optVariables = [] + , optMetadata = [] + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optNumberOffset = [0,0,0,0,0,0] + , optSectionDivs = False + , optIncremental = False + , optSelfContained = False + , optHtmlQTags = False + , optHighlightStyle = Just "pygments" + , optTopLevelDivision = TopLevelDefault + , optHTMLMathMethod = PlainMath + , optReferenceDoc = Nothing + , optEpubStylesheet = Nothing + , optEpubMetadata = Nothing + , optEpubFonts = [] + , optEpubChapterLevel = 1 + , optEpubCoverImage = Nothing + , optTOCDepth = 3 + , optDumpArgs = False + , optIgnoreArgs = False + , optVerbosity = WARNING + , optFailIfWarnings = False + , optReferenceLinks = False + , optReferenceLocation = EndOfDocument + , optDpi = 96 + , optWrapText = WrapAuto + , optColumns = 72 + , optFilters = [] + , optEmailObfuscation = NoObfuscation + , optIdentifierPrefix = "" + , optIndentedCodeClasses = [] + , optDataDir = Nothing + , optCiteMethod = Citeproc + , optListings = False + , optLaTeXEngine = "pdflatex" + , optLaTeXEngineArgs = [] + , optSlideLevel = Nothing + , optSetextHeaders = True + , optAscii = False + , optDefaultImageExtension = "" + , optExtractMedia = Nothing + , optTrackChanges = AcceptChanges + , optFileScope = False + , optKaTeXStylesheet = Nothing + , optKaTeXJS = Nothing + , optTitlePrefix = Nothing + , optCss = [] + , optIncludeBeforeBody = [] + , optIncludeAfterBody = [] + , optIncludeInHeader = [] + } + +addMetadata :: (String, String) -> Pandoc -> Pandoc +addMetadata (k, v) (Pandoc meta bs) = Pandoc meta' bs + where meta' = case lookupMeta k meta of + Nothing -> setMeta k v' meta + Just (MetaList xs) -> + setMeta k (MetaList (xs ++ [v'])) meta + Just x -> setMeta k (MetaList [x, v']) meta + v' = readMetaValue v + +readMetaValue :: String -> MetaValue +readMetaValue s = case decode (UTF8.fromString s) of + Just (Yaml.String t) -> MetaString $ T.unpack t + Just (Yaml.Bool b) -> MetaBool b + _ -> MetaString s + +-- Determine default reader based on source file extensions +defaultReaderName :: String -> [FilePath] -> String +defaultReaderName fallback [] = fallback +defaultReaderName fallback (x:xs) = + case takeExtension (map toLower x) of + ".xhtml" -> "html" + ".html" -> "html" + ".htm" -> "html" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".rst" -> "rst" + ".org" -> "org" + ".lhs" -> "markdown+lhs" + ".db" -> "docbook" + ".opml" -> "opml" + ".wiki" -> "mediawiki" + ".dokuwiki" -> "dokuwiki" + ".textile" -> "textile" + ".native" -> "native" + ".json" -> "json" + ".docx" -> "docx" + ".t2t" -> "t2t" + ".epub" -> "epub" + ".odt" -> "odt" + ".pdf" -> "pdf" -- so we get an "unknown reader" error + ".doc" -> "doc" -- so we get an "unknown reader" error + _ -> defaultReaderName fallback xs + +-- Determine default writer based on output file extension +defaultWriterName :: FilePath -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + case takeExtension (map toLower x) of + "" -> "markdown" -- empty extension + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".context" -> "context" + ".ctx" -> "context" + ".rtf" -> "rtf" + ".rst" -> "rst" + ".s5" -> "s5" + ".native" -> "native" + ".json" -> "json" + ".txt" -> "markdown" + ".text" -> "markdown" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".textile" -> "textile" + ".lhs" -> "markdown+lhs" + ".texi" -> "texinfo" + ".texinfo" -> "texinfo" + ".db" -> "docbook" + ".odt" -> "odt" + ".docx" -> "docx" + ".epub" -> "epub" + ".org" -> "org" + ".asciidoc" -> "asciidoc" + ".adoc" -> "asciidoc" + ".pdf" -> "latex" + ".fb2" -> "fb2" + ".opml" -> "opml" + ".icml" -> "icml" + ".tei.xml" -> "tei" + ".tei" -> "tei" + ['.',y] | y `elem` ['1'..'9'] -> "man" + _ -> "html" + +-- Transformations of a Pandoc document post-parsing: + +extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc +extractMedia media dir d = + case [fp | (fp, _, _) <- mediaDirectory media] of + [] -> return d + fps -> do + extractMediaBag True dir media + return $ walk (adjustImagePath dir fps) d + +adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline +adjustImagePath dir paths (Image attr lab (src, tit)) + | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) +adjustImagePath _ _ x = x + +applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc +applyTransforms transforms d = return $ foldr ($) d transforms + + -- First we check to see if a filter is found. If not, and if it's + -- not an absolute path, we check to see whether it's in `userdir/filters`. + -- If not, we leave it unchanged. +expandFilterPath :: MonadIO m => Maybe FilePath -> FilePath -> m FilePath +expandFilterPath mbDatadir fp = liftIO $ do + fpExists <- doesFileExist fp + if fpExists + then return fp + else case mbDatadir of + Just datadir | isRelative fp -> do + let filterPath = (datadir </> "filters" </> fp) + filterPathExists <- doesFileExist filterPath + if filterPathExists + then return filterPath + else return fp + _ -> return fp + +applyFilters :: MonadIO m + => Maybe FilePath -> [FilePath] -> [String] -> Pandoc -> m Pandoc +applyFilters mbDatadir filters args d = do + expandedFilters <- mapM (expandFilterPath mbDatadir) filters + foldrM ($) d $ map (flip externalFilter args) expandedFilters + +readSource :: MonadIO m => FilePath -> m String +readSource "-" = liftIO UTF8.getContents +readSource src = case parseURI src of + Just u | uriScheme u `elem` ["http:","https:"] -> + readURI src + | uriScheme u == "file:" -> + liftIO $ UTF8.readFile (uriPath u) + _ -> liftIO $ UTF8.readFile src + +readURI :: MonadIO m => FilePath -> m String +readURI src = do + res <- liftIO $ openURL src + case res of + Left e -> liftIO $ throwIO e + Right (bs,_) -> return $ UTF8.toString bs + +readFile' :: MonadIO m => FilePath -> m B.ByteString +readFile' "-" = liftIO $ B.getContents +readFile' f = liftIO $ B.readFile f + +writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () +writeFnBinary "-" = liftIO . B.putStr +writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) + +writerFn :: MonadIO m => FilePath -> String -> m () +writerFn "-" = liftIO . UTF8.putStr +writerFn f = liftIO . UTF8.writeFile f + +lookupHighlightStyle :: Maybe String -> IO (Maybe Style) +lookupHighlightStyle Nothing = return Nothing +lookupHighlightStyle (Just s) = + case lookup (map toLower s) highlightingStyles of + Just sty -> return (Just sty) + Nothing -> err 68 $ "Unknown highlight-style " ++ s diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 896682389..df060915c 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Exports functions for syntax highlighting. -} -module Text.Pandoc.Highlighting ( languages +module Text.Pandoc.Highlighting ( highlightingStyles + , languages , languagesByExtension , highlight , formatLaTeXInline @@ -57,6 +58,17 @@ import qualified Data.Map as M import Control.Monad import qualified Data.Text as T +highlightingStyles :: [(String, Style)] +highlightingStyles = + [("pygments", pygments), + ("tango", tango), + ("espresso", espresso), + ("zenburn", zenburn), + ("kate", kate), + ("monochrome", monochrome), + ("breezedark", breezeDark), + ("haddock", haddock)] + languages :: [String] languages = [T.unpack (T.toLower (sName s)) | s <- M.elems defaultSyntaxMap] |