aboutsummaryrefslogtreecommitdiff
path: root/src/pandoc.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/pandoc.hs')
-rw-r--r--src/pandoc.hs250
1 files changed, 122 insertions, 128 deletions
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 2f85906d5..63a0df51a 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -33,7 +33,7 @@ module Main where
import Text.Pandoc
import Text.Pandoc.PDF (tex2pdf)
import Text.Pandoc.Readers.LaTeX (handleIncludes)
-import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
+import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead,
headerShift, findDataFile, normalize, err, warn )
import Text.Pandoc.XML ( toEntities, fromEntities )
import Text.Pandoc.SelfContained ( makeSelfContained )
@@ -44,10 +44,11 @@ import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
import Data.Char ( toLower )
-import Data.List ( intercalate, isSuffixOf, isPrefixOf )
+import Data.List ( intercalate, isPrefixOf )
import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
import System.IO ( stdout )
import System.IO.Error ( isDoesNotExistError )
+import qualified Control.Exception as E
import Control.Exception.Extensible ( throwIO )
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Text.CSL as CSL
@@ -56,7 +57,7 @@ import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 (toString )
+import Data.ByteString.Lazy.UTF8 (toString)
import Text.CSL.Reference (Reference(..))
#if MIN_VERSION_base(4,4,0)
#else
@@ -97,8 +98,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent)
then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs
else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
-nonTextFormats :: [String]
-nonTextFormats = ["odt","docx","epub"]
+isTextFormat :: String -> Bool
+isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub"]
-- | Data structure for command line options.
data Opt = Opt
@@ -131,7 +132,6 @@ data Opt = Opt
, optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
- , optStrict :: Bool -- ^ Use strict markdown syntax
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, optWrapText :: Bool -- ^ Wrap text
, optColumns :: Int -- ^ Line length in characters
@@ -184,7 +184,6 @@ defaultOpts = Opt
, optEPUBFonts = []
, optDumpArgs = False
, optIgnoreArgs = False
- , optStrict = False
, optReferenceLinks = False
, optWrapText = True
, optColumns = 72
@@ -235,7 +234,10 @@ options =
, Option "" ["strict"]
(NoArg
- (\opt -> return opt { optStrict = True } ))
+ (\opt -> do
+ err 59 $ "The --strict option has been removed.\n" ++
+ "Use `markdown_strict' input or output format instead."
+ return opt ))
"" -- "Disable markdown syntax extensions"
, Option "R" ["parse-raw"]
@@ -257,13 +259,13 @@ options =
, Option "" ["base-header-level"]
(ReqArg
(\arg opt ->
- case reads arg of
- [(t,"")] | t > 0 -> do
+ case safeRead arg of
+ Just t | t > 0 -> do
let oldTransforms = optTransforms opt
let shift = t - 1
return opt{ optTransforms =
headerShift shift : oldTransforms }
- _ -> err 19
+ _ -> err 19
"base-header-level must be a number > 0")
"NUMBER")
"" -- "Headers base level"
@@ -289,9 +291,9 @@ options =
, Option "" ["tab-stop"]
(ReqArg
(\arg opt ->
- case reads arg of
- [(t,"")] | t > 0 -> return opt { optTabStop = t }
- _ -> err 31
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optTabStop = t }
+ _ -> err 31
"tab-stop must be a number greater than 0")
"NUMBER")
"" -- "Tab stop (default 4)"
@@ -338,9 +340,9 @@ options =
, Option "" ["columns"]
(ReqArg
(\arg opt ->
- case reads arg of
- [(t,"")] | t > 0 -> return opt { optColumns = t }
- _ -> err 33 $
+ case safeRead arg of
+ Just t | t > 0 -> return opt { optColumns = t }
+ _ -> err 33 $
"columns must be a number greater than 0")
"NUMBER")
"" -- "Length of line in characters"
@@ -472,10 +474,10 @@ options =
, Option "" ["slide-level"]
(ReqArg
(\arg opt -> do
- case reads arg of
- [(t,"")] | t >= 1 && t <= 6 ->
+ case safeRead arg of
+ Just t | t >= 1 && t <= 6 ->
return opt { optSlideLevel = Just t }
- _ -> err 39 $
+ _ -> err 39 $
"slide level must be a number between 1 and 6")
"NUMBER")
"" -- "Force header level for slides"
@@ -690,12 +692,20 @@ options =
]
+readExtension :: String -> IO Extension
+readExtension s = case safeRead ('E':'x':'t':'_':map toLower s) of
+ Just ext -> return ext
+ Nothing -> err 59 $ "Unknown extension: " ++ s
+
-- Returns usage message
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName = usageInfo
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
- (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++
- (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:")
+ (wrapWords 16 78 $ readers'names) ++ "\nOutput formats: " ++
+ (wrapWords 16 78 $ writers'names) ++ "\nOptions:")
+ where
+ writers'names = map fst writers
+ readers'names = map fst readers
-- Determine default reader based on source file extensions
defaultReaderName :: String -> [FilePath] -> String
@@ -752,6 +762,7 @@ defaultWriterName x =
".org" -> "org"
".asciidoc" -> "asciidoc"
".pdf" -> "latex"
+ ".fb2" -> "fb2"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
@@ -771,9 +782,10 @@ main = do
["Try " ++ prg ++ " --help for more information."]
let defaultOpts' = if compatMode
- then defaultOpts { optReader = "markdown"
+ then defaultOpts { optReader = "markdown_strict"
, optWriter = "html"
- , optStrict = True }
+ , optEmailObfuscation =
+ ReferenceObfuscation }
else defaultOpts
-- thread option data structure through all supplied option actions
@@ -808,7 +820,6 @@ main = do
, optEPUBFonts = epubFonts
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
- , optStrict = strict
, optReferenceLinks = referenceLinks
, optWrapText = wrap
, optColumns = columns
@@ -836,9 +847,10 @@ main = do
let sources = if ignoreArgs then [] else args
datadir <- case mbDataDir of
- Nothing -> catch
+ Nothing -> E.catch
(liftM Just $ getAppUserDataDirectory "pandoc")
- (const $ return Nothing)
+ (\e -> let _ = (e :: E.SomeException)
+ in return Nothing)
Just _ -> return mbDataDir
-- assign reader and writer based on options and filenames
@@ -855,8 +867,8 @@ main = do
let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
- let laTeXOutput = writerName' == "latex" || writerName' == "beamer" ||
- writerName' == "latex+lhs" || writerName' == "beamer+lhs"
+ let laTeXOutput = "latex" `isPrefixOf` writerName' ||
+ "beamer" `isPrefixOf` writerName'
when pdfOutput $ do
-- make sure writer is latex or beamer
@@ -870,11 +882,11 @@ main = do
latexEngine ++ " is needed for pdf output."
Just _ -> return ()
- reader <- case (lookup readerName' readers) of
- Just r -> return r
- Nothing -> err 7 ("Unknown reader: " ++ readerName')
+ reader <- case getReader readerName' of
+ Right r -> return r
+ Left e -> err 7 e
- let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput
+ let standalone' = standalone || not (isTextFormat writerName') || pdfOutput
templ <- case templatePath of
_ | not standalone' -> return ""
@@ -884,26 +896,20 @@ main = do
Left e -> throwIO e
Right t -> return t
Just tp -> do
- -- strip off "+lhs" if present
- let format = takeWhile (/='+') writerName'
+ -- strip off extensions
+ let format = takeWhile (`notElem` "+-") writerName'
let tp' = case takeExtension tp of
"" -> tp <.> format
_ -> tp
- catch (UTF8.readFile tp')
+ E.catch (UTF8.readFile tp')
(\e -> if isDoesNotExistError e
- then catch
+ then E.catch
(readDataFile datadir $
"templates" </> tp')
- (\_ -> throwIO e)
+ (\e' -> let _ = (e' :: E.SomeException)
+ in throwIO e')
else throwIO e)
- let slideVariant = case writerName' of
- "s5" -> S5Slides
- "slidy" -> SlidySlides
- "slideous" -> SlideousSlides
- "dzslides" -> DZSlides
- _ -> NoSlides
-
variables' <- case mathMethod of
LaTeXMathML Nothing -> do
s <- readDataFile datadir $ "data" </> "LaTeXMathML.js"
@@ -913,20 +919,22 @@ main = do
return $ ("mathml-script", s) : variables
_ -> return variables
- variables'' <- case slideVariant of
- DZSlides -> do
+ variables'' <- if "dzslides" `isPrefixOf` writerName'
+ then do
dztempl <- readDataFile datadir $ "dzslides" </> "template.html"
let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core")
$ lines dztempl
return $ ("dzslides-core", dzcore) : variables'
- _ -> return variables'
+ else return variables'
-- unescape reference ids, which may contain XML entities, so
-- that we can do lookups with regular string equality
let unescapeRefId ref = ref{ refId = fromEntities (refId ref) }
- refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e ->
- err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e)
+ refs <- mapM (\f -> E.catch (CSL.readBiblioFile f)
+ (\e -> let _ = (e :: E.SomeException)
+ in err 23 $ "Error reading bibliography `" ++ f ++
+ "'" ++ "\n" ++ show e))
reffiles >>=
return . map unescapeRefId . concat
@@ -934,62 +942,54 @@ main = do
then "."
else takeDirectory (head sources)
- let startParserState =
- defaultParserState { stateParseRaw = parseRaw,
- stateTabStop = tabStop,
- stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
- lhsExtension sources,
- stateStandalone = standalone',
- stateCitations = map CSL.refId refs,
- stateSmart = smart || (texLigatures &&
- (laTeXOutput || writerName' == "context")),
- stateOldDashes = oldDashes,
- stateColumns = columns,
- stateStrict = strict,
- stateIndentedCodeClasses = codeBlockClasses,
- stateApplyMacros = not laTeXOutput
- }
-
- let writerOptions = defaultWriterOptions
- { writerStandalone = standalone',
- writerTemplate = templ,
- writerVariables = variables'',
- writerEPUBMetadata = epubMetadata,
- writerTabStop = tabStop,
- writerTableOfContents = toc &&
- writerName' /= "s5",
- writerHTMLMathMethod = mathMethod,
- writerSlideVariant = slideVariant,
- writerIncremental = incremental,
- writerCiteMethod = citeMethod,
- writerBiblioFiles = reffiles,
- writerIgnoreNotes = False,
- writerNumberSections = numberSections,
- writerSectionDivs = sectionDivs,
- writerStrictMarkdown = strict,
- writerReferenceLinks = referenceLinks,
- writerWrapText = wrap,
- writerColumns = columns,
- writerLiterateHaskell = False,
- writerEmailObfuscation = if strict
- then ReferenceObfuscation
- else obfuscationMethod,
- writerIdentifierPrefix = idPrefix,
- writerSourceDirectory = sourceDir,
- writerUserDataDir = datadir,
- writerHtml5 = html5 ||
- slideVariant == DZSlides,
- writerChapters = chapters,
- writerListings = listings,
- writerBeamer = False,
- writerSlideLevel = slideLevel,
- writerHighlight = highlight,
- writerHighlightStyle = highlightStyle,
- writerSetextHeaders = setextHeaders,
- writerTeXLigatures = texLigatures
- }
-
- when (writerName' `elem` nonTextFormats&& outputFile == "-") $
+ let readerOpts = def{ readerSmart = smart || (texLigatures &&
+ (laTeXOutput || "context" `isPrefixOf` writerName'))
+ , readerStandalone = standalone'
+ , readerParseRaw = parseRaw
+ , readerColumns = columns
+ , readerTabStop = tabStop
+ , readerOldDashes = oldDashes
+ , readerCitations = map CSL.refId refs
+ , readerIndentedCodeClasses = codeBlockClasses
+ , readerApplyMacros = not laTeXOutput
+ }
+
+ let writerOptions = def { writerStandalone = standalone',
+ writerTemplate = templ,
+ writerVariables = variables'',
+ writerEPUBMetadata = epubMetadata,
+ writerTabStop = tabStop,
+ writerTableOfContents = toc,
+ writerHTMLMathMethod = mathMethod,
+ writerIncremental = incremental,
+ writerCiteMethod = citeMethod,
+ writerBiblioFiles = reffiles,
+ writerIgnoreNotes = False,
+ writerNumberSections = numberSections,
+ writerSectionDivs = sectionDivs,
+ writerReferenceLinks = referenceLinks,
+ writerWrapText = wrap,
+ writerColumns = columns,
+ writerEmailObfuscation = obfuscationMethod,
+ writerIdentifierPrefix = idPrefix,
+ writerSourceDirectory = sourceDir,
+ writerUserDataDir = datadir,
+ writerHtml5 = html5,
+ writerChapters = chapters,
+ writerListings = listings,
+ writerBeamer = False,
+ writerSlideLevel = slideLevel,
+ writerHighlight = highlight,
+ writerHighlightStyle = highlightStyle,
+ writerSetextHeaders = setextHeaders,
+ writerTeXLigatures = texLigatures,
+ writerEpubStylesheet = epubStylesheet,
+ writerEpubFonts = epubFonts,
+ writerReferenceODT = referenceODT,
+ writerReferenceDocx = referenceDocx
+ }
+
+ when (not (isTextFormat writerName') && outputFile == "-") $
err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
"Specify an output file using the -o option."
@@ -1009,12 +1009,12 @@ main = do
then handleIncludes
else return
- doc <- (reader startParserState) `fmap` (readSources sources >>=
+ doc <- (reader readerOpts) `fmap` (readSources sources >>=
handleIncludes' . convertTabs . intercalate "\n")
let doc0 = foldr ($) doc transforms
- doc1 <- if writerName' == "rtf"
+ doc1 <- if "rtf" `isPrefixOf` writerName'
then bottomUpM rtfEmbedImage doc0
else return doc0
@@ -1042,31 +1042,25 @@ main = do
writerFn "-" = UTF8.putStr
writerFn f = UTF8.writeFile f
- case lookup writerName' writers of
- Nothing
- | writerName' == "epub" ->
- writeEPUB epubStylesheet epubFonts writerOptions doc2
- >>= writeBinary
- | writerName' == "odt" ->
- writeODT referenceODT writerOptions doc2 >>= writeBinary
- | writerName' == "docx" ->
- writeDocx referenceDocx writerOptions doc2 >>= writeBinary
- | otherwise -> err 9 ("Unknown writer: " ++ writerName')
- Just w
- | pdfOutput -> do
- res <- tex2pdf latexEngine $ w writerOptions doc2
+ case getWriter writerName' of
+ Left e -> err 9 e
+ Right (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile
+ Right (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary
+ Right (PureStringWriter f)
+ | pdfOutput -> do
+ res <- tex2pdf latexEngine $ f writerOptions doc2
case res of
Right pdf -> writeBinary pdf
Left err' -> err 43 $ toString err'
- Just w
- | htmlFormat && ascii ->
- writerFn outputFile =<< selfcontain (toEntities result)
- | otherwise ->
- writerFn outputFile =<< selfcontain result
- where result = w writerOptions doc2 ++ ['\n' | not standalone']
- htmlFormat = writerName' `elem`
+ | otherwise -> selfcontain (f writerOptions doc2 ++
+ ['\n' | not standalone'])
+ >>= writerFn outputFile . handleEntities
+ where htmlFormat = writerName' `elem`
["html","html+lhs","html5","html5+lhs",
"s5","slidy","slideous","dzslides"]
selfcontain = if selfContained && htmlFormat
then makeSelfContained datadir
else return
+ handleEntities = if htmlFormat && ascii
+ then toEntities
+ else id