diff options
author | schrieveslaach <schrieveslaach@online.de> | 2017-06-12 15:52:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-12 15:52:29 +0200 |
commit | 635f299b441e238ccd34e3ad61c5e36f0ca30067 (patch) | |
tree | 11cfc34402975bad208f9a48d075fe2ace959e70 /src/Text/Pandoc/App.hs | |
parent | 181c56d4003aa83abed23b95a452c4890aa3797c (diff) | |
parent | 23f3c2d7b4796d1af742a74999ce67924bf2abb3 (diff) | |
download | pandoc-635f299b441e238ccd34e3ad61c5e36f0ca30067.tar.gz |
Merge branch 'master' into textcolor-support
Diffstat (limited to 'src/Text/Pandoc/App.hs')
-rw-r--r-- | src/Text/Pandoc/App.hs | 207 |
1 files changed, 126 insertions, 81 deletions
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index c38ebdd84..19066e8b7 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,8 +1,9 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.App - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2017 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> @@ -40,45 +41,50 @@ module Text.Pandoc.App ( import Control.Applicative ((<|>)) import qualified Control.Exception as E import Control.Monad +import Control.Monad.Except (throwError) import Control.Monad.Trans -import Data.Aeson (eitherDecode', encode) +import Data.Monoid +import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions, eitherDecode', + encode, genericToEncoding) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import Data.Char (toLower, toUpper) -import qualified Data.Set as Set import Data.Foldable (foldrM) import Data.List (intercalate, isPrefixOf, isSuffixOf, sort) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) +import qualified Data.Set as Set +import Data.Text (Text) import qualified Data.Text as T import Data.Yaml (decode) import qualified Data.Yaml as Yaml -import Network.URI (URI (..), isURI, parseURI) +import GHC.Generics +import Network.URI (URI (..), parseURI) import Paths_pandoc (getDataDir) import Skylighting (Style, Syntax (..), defaultSyntaxMap, parseTheme) -import Skylighting.Parser (missingIncludes, parseSyntaxDefinition, - addSyntaxDefinition) +import Skylighting.Parser (addSyntaxDefinition, missingIncludes, + parseSyntaxDefinition) import System.Console.GetOpt import System.Directory (Permissions (..), doesFileExist, findExecutable, getAppUserDataDirectory, getPermissions) import System.Environment (getArgs, getEnvironment, getProgName) import System.Exit (ExitCode (..), exitSuccess) import System.FilePath -import System.IO (stdout) +import System.IO (nativeNewline, stdout) +import qualified System.IO as IO (Newline (..)) import System.IO.Error (isDoesNotExistError) import Text.Pandoc import Text.Pandoc.Builder (setMeta) -import Text.Pandoc.Class (PandocIO, getLog, withMediaBag) +import Text.Pandoc.Class (PandocIO, extractMedia, fillMediaBag, getLog, + setResourcePath, withMediaBag) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Lua ( runLuaFilter ) -import Text.Pandoc.MediaBag (MediaBag, extractMediaBag, mediaDirectory) +import Text.Pandoc.Lua (runLuaFilter) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.Process (pipeProcess) -import Text.Pandoc.SelfContained (makeSelfContained, makeDataURI) -import Text.Pandoc.Shared (headerShift, openURL, readDataFile, +import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) +import Text.Pandoc.Shared (headerShift, isURI, openURL, readDataFile, readDataFileUTF8, safeRead, tabFilter) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (toEntities) import Text.Printf #ifndef _WINDOWS @@ -86,6 +92,12 @@ import System.Posix.IO (stdOutput) import System.Posix.Terminal (queryTerminal) #endif +data LineEnding = LF | CRLF | Native deriving (Show, Generic) + +instance ToJSON LineEnding where + toEncoding = genericToEncoding defaultOptions +instance FromJSON LineEnding + parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt parseOptions options' defaults = do rawArgs <- map UTF8.decodeArg <$> getArgs @@ -171,7 +183,7 @@ convertWithOpts opts = do -- disabling the custom writer for now writer <- if ".lua" `isSuffixOf` format -- note: use non-lowercased version writerName - then return (StringWriter + then return (TextWriter (\o d -> liftIO $ writeCustom writerName o d) :: Writer PandocIO) else case getWriter writerName of @@ -233,10 +245,9 @@ convertWithOpts opts = do withList f (x:xs) vars = f x vars >>= withList f xs variables <- - return (("outputfile", optOutputFile opts) : optVariables opts) - >>= + withList (addStringAsVariable "sourcefile") - (reverse $ optInputFiles opts) + (reverse $ optInputFiles opts) (("outputfile", optOutputFile opts) : optVariables opts) -- we reverse this list because, unlike -- the other option lists here, it is -- not reversed when parsed from CLI arguments. @@ -372,8 +383,8 @@ convertWithOpts opts = do then 0 else optTabStop opts) - readSources :: (Functor m, MonadIO m) => [FilePath] -> m String - readSources srcs = convertTabs . intercalate "\n" <$> + readSources :: [FilePath] -> PandocIO Text + readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> mapM readSource srcs let runIO' :: PandocIO a -> IO a @@ -391,42 +402,47 @@ convertWithOpts opts = do E.throwIO PandocFailOnWarningError return res - let sourceToDoc :: [FilePath] -> PandocIO (Pandoc, MediaBag) + let sourceToDoc :: [FilePath] -> PandocIO Pandoc sourceToDoc sources' = case reader of - StringReader r - | optFileScope opts || readerName == "json" -> do - pairs <- mapM - (readSource >=> withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) + TextReader r + | optFileScope opts || readerName == "json" -> + mconcat <$> mapM (readSource >=> r readerOpts) sources | otherwise -> - readSources sources' >>= withMediaBag . r readerOpts - ByteStringReader r -> do - pairs <- mapM (readFile' >=> - withMediaBag . r readerOpts) sources - return (mconcat (map fst pairs), mconcat (map snd pairs)) + readSources sources' >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (readFile' >=> r readerOpts) sources metadata <- if format == "jats" && - lookup "csl" (optMetadata opts) == Nothing && - lookup "citation-style" (optMetadata opts) == Nothing + isNothing (lookup "csl" (optMetadata opts)) && + isNothing (lookup "citation-style" (optMetadata opts)) then do jatsCSL <- readDataFile datadir "jats.csl" let jatsEncoded = makeDataURI ("application/xml", jatsCSL) return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + let eol = case optEol opts of + CRLF -> IO.CRLF + LF -> IO.LF + Native -> nativeNewline + runIO' $ do - (doc, media) <- sourceToDoc sources - doc' <- (maybe return (extractMedia media) (optExtractMedia opts) >=> - return . flip (foldr addMetadata) metadata >=> - applyTransforms transforms >=> - applyLuaFilters datadir (optLuaFilters opts) [format] >=> - applyFilters datadir filters' [format]) doc + setResourcePath (optResourcePath opts) + (doc, media) <- withMediaBag $ sourceToDoc sources >>= + ( (if isJust (optExtractMedia opts) + then fillMediaBag (writerSourceURL writerOptions) + else return) + >=> maybe return extractMedia (optExtractMedia opts) + >=> return . flip (foldr addMetadata) metadata + >=> applyTransforms transforms + >=> applyLuaFilters datadir (optLuaFilters opts) [format] + >=> applyFilters datadir filters' [format] + ) case writer of - -- StringWriter f -> f writerOptions doc' >>= writerFn outputFile - ByteStringWriter f -> f writerOptions doc' >>= writeFnBinary outputFile - StringWriter f + ByteStringWriter f -> f writerOptions doc >>= writeFnBinary outputFile + TextWriter f | pdfOutput -> do -- make sure writer is latex, beamer, context, html5 or ms unless (laTeXOutput || conTeXtOutput || html5Output || @@ -445,7 +461,7 @@ convertWithOpts opts = do when (isNothing mbPdfProg) $ liftIO $ E.throwIO $ PandocPDFProgramNotFoundError pdfprog - res <- makePDF pdfprog f writerOptions verbosity media doc' + res <- makePDF pdfprog f writerOptions verbosity media doc case res of Right pdf -> writeFnBinary outputFile pdf Left err' -> liftIO $ @@ -453,18 +469,23 @@ convertWithOpts opts = do | otherwise -> do let htmlFormat = format `elem` ["html","html4","html5","s5","slidy","slideous","dzslides","revealjs"] - selfcontain = if optSelfContained opts && htmlFormat - then makeSelfContained writerOptions - else return handleEntities = if (htmlFormat || format == "docbook4" || format == "docbook5" || format == "docbook") && optAscii opts then toEntities else id - output <- f writerOptions doc' - selfcontain (output ++ ['\n' | not standalone]) >>= - writerFn outputFile . handleEntities + addNl = if standalone + then id + else (<> T.singleton '\n') + output <- (addNl . handleEntities) <$> f writerOptions doc + writerFn eol outputFile =<< + if optSelfContained opts && htmlFormat + -- TODO not maximally efficient; change type + -- of makeSelfContained so it works w/ Text + then T.pack <$> makeSelfContained writerOptions + (T.unpack output) + else return output type Transform = Pandoc -> Pandoc @@ -568,7 +589,13 @@ data Opt = Opt , optIncludeBeforeBody :: [FilePath] -- ^ Files to include before , optIncludeAfterBody :: [FilePath] -- ^ Files to include after body , optIncludeInHeader :: [FilePath] -- ^ Files to include in header - } + , optResourcePath :: [FilePath] -- ^ Path to search for images etc + , optEol :: LineEnding -- ^ Style of line-endings to use + } deriving (Generic, Show) + +instance ToJSON Opt where + toEncoding = genericToEncoding defaultOptions +instance FromJSON Opt -- | Defaults for command-line options. defaultOpts :: Opt @@ -636,6 +663,8 @@ defaultOpts = Opt , optIncludeBeforeBody = [] , optIncludeAfterBody = [] , optIncludeInHeader = [] + , optResourcePath = ["."] + , optEol = Native } addMetadata :: (String, String) -> Pandoc -> Pandoc @@ -728,19 +757,6 @@ defaultWriterName x = -- Transformations of a Pandoc document post-parsing: -extractMedia :: MonadIO m => MediaBag -> FilePath -> Pandoc -> m Pandoc -extractMedia media dir d = - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - extractMediaBag True dir media - return $ walk (adjustImagePath dir fps) d - -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | src `elem` paths = Image attr lab (dir ++ "/" ++ src, tit) -adjustImagePath _ _ x = x - applyTransforms :: Monad m => [Transform] -> Pandoc -> m Pandoc applyTransforms transforms d = return $ foldr ($) d transforms @@ -773,17 +789,23 @@ applyFilters mbDatadir filters args d = do expandedFilters <- mapM (expandFilterPath mbDatadir) filters foldrM ($) d $ map (flip externalFilter args) expandedFilters -readSource :: MonadIO m => FilePath -> m String -readSource "-" = liftIO UTF8.getContents +readSource :: FilePath -> PandocIO Text +readSource "-" = liftIO (UTF8.toText <$> BS.getContents) readSource src = case parseURI src of Just u | uriScheme u `elem` ["http:","https:"] -> readURI src | uriScheme u == "file:" -> - liftIO $ UTF8.readFile (uriPath u) - _ -> liftIO $ UTF8.readFile src - -readURI :: MonadIO m => FilePath -> m String -readURI src = liftIO $ (UTF8.toString . fst) <$> openURL src + liftIO $ UTF8.toText <$> + BS.readFile (uriPath u) + _ -> liftIO $ UTF8.toText <$> + BS.readFile src + +readURI :: FilePath -> PandocIO Text +readURI src = do + res <- liftIO $ openURL src + case res of + Left e -> throwError $ PandocHttpError src e + Right (contents, _) -> return $ UTF8.toText contents readFile' :: MonadIO m => FilePath -> m B.ByteString readFile' "-" = liftIO B.getContents @@ -793,9 +815,10 @@ writeFnBinary :: MonadIO m => FilePath -> B.ByteString -> m () writeFnBinary "-" = liftIO . B.putStr writeFnBinary f = liftIO . B.writeFile (UTF8.encodePath f) -writerFn :: MonadIO m => FilePath -> String -> m () -writerFn "-" = liftIO . UTF8.putStr -writerFn f = liftIO . UTF8.writeFile f +writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () +-- TODO this implementation isn't maximally efficient: +writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack +writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack lookupHighlightStyle :: Maybe String -> IO (Maybe Style) lookupHighlightStyle Nothing = return Nothing @@ -968,6 +991,19 @@ options = "NUMBER") "" -- "Dpi (default 96)" + , Option "" ["eol"] + (ReqArg + (\arg opt -> + case toLower <$> arg of + "crlf" -> return opt { optEol = CRLF } + "lf" -> return opt { optEol = LF } + "native" -> return opt { optEol = Native } + -- mac-syntax (cr) is not supported in ghc-base. + _ -> E.throwIO $ PandocOptionError + "--eol must be crlf, lf, or native") + "crlf|lf|native") + "" -- "EOL (default OS-dependent)" + , Option "" ["wrap"] (ReqArg (\arg opt -> @@ -1046,6 +1082,14 @@ options = "FILE") "" -- "File to include after document body" + , Option "" ["resource-path"] + (ReqArg + (\arg opt -> return opt { optResourcePath = + splitSearchPath arg }) + "SEARCHPATH") + "" -- "Paths to search for images and other resources" + + , Option "" ["self-contained"] (NoArg (\opt -> return opt { optSelfContained = True, @@ -1388,8 +1432,8 @@ options = map ("--" ++) longs let allopts = unwords (concatMap optnames options) UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords readers'names) - (unwords writers'names) + (unwords readersNames) + (unwords writersNames) (unwords $ map fst highlightingStyles) ddir exitSuccess )) @@ -1398,14 +1442,14 @@ options = , Option "" ["list-input-formats"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) readers'names + mapM_ (UTF8.hPutStrLn stdout) readersNames exitSuccess )) "" , Option "" ["list-output-formats"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout) writers'names + mapM_ (UTF8.hPutStrLn stdout) writersNames exitSuccess )) "" @@ -1509,14 +1553,15 @@ uppercaseFirstLetter :: String -> String uppercaseFirstLetter (c:cs) = toUpper c : cs uppercaseFirstLetter [] = [] -readers'names :: [String] -readers'names = sort (map fst (readers :: [(String, Reader PandocIO)])) +readersNames :: [String] +readersNames = sort (map fst (readers :: [(String, Reader PandocIO)])) -writers'names :: [String] -writers'names = sort (map fst (writers :: [(String, Writer PandocIO)])) +writersNames :: [String] +writersNames = sort (map fst (writers :: [(String, Writer PandocIO)])) splitField :: String -> (String, String) splitField s = case break (`elem` ":=") s of (k,_:v) -> (k,v) (k,[]) -> (k,"true") + |