diff options
Diffstat (limited to 'src/pandoc.hs')
-rw-r--r-- | src/pandoc.hs | 250 |
1 files changed, 122 insertions, 128 deletions
diff --git a/src/pandoc.hs b/src/pandoc.hs index 2f85906d5..63a0df51a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -33,7 +33,7 @@ module Main where import Text.Pandoc import Text.Pandoc.PDF (tex2pdf) import Text.Pandoc.Readers.LaTeX (handleIncludes) -import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, +import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead, headerShift, findDataFile, normalize, err, warn ) import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) @@ -44,10 +44,11 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) -import Data.List ( intercalate, isSuffixOf, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) import System.IO ( stdout ) import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.CSL as CSL @@ -56,7 +57,7 @@ import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString ) +import Data.ByteString.Lazy.UTF8 (toString) import Text.CSL.Reference (Reference(..)) #if MIN_VERSION_base(4,4,0) #else @@ -97,8 +98,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent) then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs -nonTextFormats :: [String] -nonTextFormats = ["odt","docx","epub"] +isTextFormat :: String -> Bool +isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub"] -- | Data structure for command line options. data Opt = Opt @@ -131,7 +132,6 @@ data Opt = Opt , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optStrict :: Bool -- ^ Use strict markdown syntax , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optColumns :: Int -- ^ Line length in characters @@ -184,7 +184,6 @@ defaultOpts = Opt , optEPUBFonts = [] , optDumpArgs = False , optIgnoreArgs = False - , optStrict = False , optReferenceLinks = False , optWrapText = True , optColumns = 72 @@ -235,7 +234,10 @@ options = , Option "" ["strict"] (NoArg - (\opt -> return opt { optStrict = True } )) + (\opt -> do + err 59 $ "The --strict option has been removed.\n" ++ + "Use `markdown_strict' input or output format instead." + return opt )) "" -- "Disable markdown syntax extensions" , Option "R" ["parse-raw"] @@ -257,13 +259,13 @@ options = , Option "" ["base-header-level"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> do + case safeRead arg of + Just t | t > 0 -> do let oldTransforms = optTransforms opt let shift = t - 1 return opt{ optTransforms = headerShift shift : oldTransforms } - _ -> err 19 + _ -> err 19 "base-header-level must be a number > 0") "NUMBER") "" -- "Headers base level" @@ -289,9 +291,9 @@ options = , Option "" ["tab-stop"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> return opt { optTabStop = t } - _ -> err 31 + case safeRead arg of + Just t | t > 0 -> return opt { optTabStop = t } + _ -> err 31 "tab-stop must be a number greater than 0") "NUMBER") "" -- "Tab stop (default 4)" @@ -338,9 +340,9 @@ options = , Option "" ["columns"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> return opt { optColumns = t } - _ -> err 33 $ + case safeRead arg of + Just t | t > 0 -> return opt { optColumns = t } + _ -> err 33 $ "columns must be a number greater than 0") "NUMBER") "" -- "Length of line in characters" @@ -472,10 +474,10 @@ options = , Option "" ["slide-level"] (ReqArg (\arg opt -> do - case reads arg of - [(t,"")] | t >= 1 && t <= 6 -> + case safeRead arg of + Just t | t >= 1 && t <= 6 -> return opt { optSlideLevel = Just t } - _ -> err 39 $ + _ -> err 39 $ "slide level must be a number between 1 and 6") "NUMBER") "" -- "Force header level for slides" @@ -690,12 +692,20 @@ options = ] +readExtension :: String -> IO Extension +readExtension s = case safeRead ('E':'x':'t':'_':map toLower s) of + Just ext -> return ext + Nothing -> err 59 $ "Unknown extension: " ++ s + -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ - (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++ - (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:") + (wrapWords 16 78 $ readers'names) ++ "\nOutput formats: " ++ + (wrapWords 16 78 $ writers'names) ++ "\nOptions:") + where + writers'names = map fst writers + readers'names = map fst readers -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -752,6 +762,7 @@ defaultWriterName x = ".org" -> "org" ".asciidoc" -> "asciidoc" ".pdf" -> "latex" + ".fb2" -> "fb2" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -771,9 +782,10 @@ main = do ["Try " ++ prg ++ " --help for more information."] let defaultOpts' = if compatMode - then defaultOpts { optReader = "markdown" + then defaultOpts { optReader = "markdown_strict" , optWriter = "html" - , optStrict = True } + , optEmailObfuscation = + ReferenceObfuscation } else defaultOpts -- thread option data structure through all supplied option actions @@ -808,7 +820,6 @@ main = do , optEPUBFonts = epubFonts , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs - , optStrict = strict , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns @@ -836,9 +847,10 @@ main = do let sources = if ignoreArgs then [] else args datadir <- case mbDataDir of - Nothing -> catch + Nothing -> E.catch (liftM Just $ getAppUserDataDirectory "pandoc") - (const $ return Nothing) + (\e -> let _ = (e :: E.SomeException) + in return Nothing) Just _ -> return mbDataDir -- assign reader and writer based on options and filenames @@ -855,8 +867,8 @@ main = do let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - let laTeXOutput = writerName' == "latex" || writerName' == "beamer" || - writerName' == "latex+lhs" || writerName' == "beamer+lhs" + let laTeXOutput = "latex" `isPrefixOf` writerName' || + "beamer" `isPrefixOf` writerName' when pdfOutput $ do -- make sure writer is latex or beamer @@ -870,11 +882,11 @@ main = do latexEngine ++ " is needed for pdf output." Just _ -> return () - reader <- case (lookup readerName' readers) of - Just r -> return r - Nothing -> err 7 ("Unknown reader: " ++ readerName') + reader <- case getReader readerName' of + Right r -> return r + Left e -> err 7 e - let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput + let standalone' = standalone || not (isTextFormat writerName') || pdfOutput templ <- case templatePath of _ | not standalone' -> return "" @@ -884,26 +896,20 @@ main = do Left e -> throwIO e Right t -> return t Just tp -> do - -- strip off "+lhs" if present - let format = takeWhile (/='+') writerName' + -- strip off extensions + let format = takeWhile (`notElem` "+-") writerName' let tp' = case takeExtension tp of "" -> tp <.> format _ -> tp - catch (UTF8.readFile tp') + E.catch (UTF8.readFile tp') (\e -> if isDoesNotExistError e - then catch + then E.catch (readDataFile datadir $ "templates" </> tp') - (\_ -> throwIO e) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') else throwIO e) - let slideVariant = case writerName' of - "s5" -> S5Slides - "slidy" -> SlidySlides - "slideous" -> SlideousSlides - "dzslides" -> DZSlides - _ -> NoSlides - variables' <- case mathMethod of LaTeXMathML Nothing -> do s <- readDataFile datadir $ "data" </> "LaTeXMathML.js" @@ -913,20 +919,22 @@ main = do return $ ("mathml-script", s) : variables _ -> return variables - variables'' <- case slideVariant of - DZSlides -> do + variables'' <- if "dzslides" `isPrefixOf` writerName' + then do dztempl <- readDataFile datadir $ "dzslides" </> "template.html" let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core") $ lines dztempl return $ ("dzslides-core", dzcore) : variables' - _ -> return variables' + else return variables' -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } - refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e -> - err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e) + refs <- mapM (\f -> E.catch (CSL.readBiblioFile f) + (\e -> let _ = (e :: E.SomeException) + in err 23 $ "Error reading bibliography `" ++ f ++ + "'" ++ "\n" ++ show e)) reffiles >>= return . map unescapeRefId . concat @@ -934,62 +942,54 @@ main = do then "." else takeDirectory (head sources) - let startParserState = - defaultParserState { stateParseRaw = parseRaw, - stateTabStop = tabStop, - stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || - lhsExtension sources, - stateStandalone = standalone', - stateCitations = map CSL.refId refs, - stateSmart = smart || (texLigatures && - (laTeXOutput || writerName' == "context")), - stateOldDashes = oldDashes, - stateColumns = columns, - stateStrict = strict, - stateIndentedCodeClasses = codeBlockClasses, - stateApplyMacros = not laTeXOutput - } - - let writerOptions = defaultWriterOptions - { writerStandalone = standalone', - writerTemplate = templ, - writerVariables = variables'', - writerEPUBMetadata = epubMetadata, - writerTabStop = tabStop, - writerTableOfContents = toc && - writerName' /= "s5", - writerHTMLMathMethod = mathMethod, - writerSlideVariant = slideVariant, - writerIncremental = incremental, - writerCiteMethod = citeMethod, - writerBiblioFiles = reffiles, - writerIgnoreNotes = False, - writerNumberSections = numberSections, - writerSectionDivs = sectionDivs, - writerStrictMarkdown = strict, - writerReferenceLinks = referenceLinks, - writerWrapText = wrap, - writerColumns = columns, - writerLiterateHaskell = False, - writerEmailObfuscation = if strict - then ReferenceObfuscation - else obfuscationMethod, - writerIdentifierPrefix = idPrefix, - writerSourceDirectory = sourceDir, - writerUserDataDir = datadir, - writerHtml5 = html5 || - slideVariant == DZSlides, - writerChapters = chapters, - writerListings = listings, - writerBeamer = False, - writerSlideLevel = slideLevel, - writerHighlight = highlight, - writerHighlightStyle = highlightStyle, - writerSetextHeaders = setextHeaders, - writerTeXLigatures = texLigatures - } - - when (writerName' `elem` nonTextFormats&& outputFile == "-") $ + let readerOpts = def{ readerSmart = smart || (texLigatures && + (laTeXOutput || "context" `isPrefixOf` writerName')) + , readerStandalone = standalone' + , readerParseRaw = parseRaw + , readerColumns = columns + , readerTabStop = tabStop + , readerOldDashes = oldDashes + , readerCitations = map CSL.refId refs + , readerIndentedCodeClasses = codeBlockClasses + , readerApplyMacros = not laTeXOutput + } + + let writerOptions = def { writerStandalone = standalone', + writerTemplate = templ, + writerVariables = variables'', + writerEPUBMetadata = epubMetadata, + writerTabStop = tabStop, + writerTableOfContents = toc, + writerHTMLMathMethod = mathMethod, + writerIncremental = incremental, + writerCiteMethod = citeMethod, + writerBiblioFiles = reffiles, + writerIgnoreNotes = False, + writerNumberSections = numberSections, + writerSectionDivs = sectionDivs, + writerReferenceLinks = referenceLinks, + writerWrapText = wrap, + writerColumns = columns, + writerEmailObfuscation = obfuscationMethod, + writerIdentifierPrefix = idPrefix, + writerSourceDirectory = sourceDir, + writerUserDataDir = datadir, + writerHtml5 = html5, + writerChapters = chapters, + writerListings = listings, + writerBeamer = False, + writerSlideLevel = slideLevel, + writerHighlight = highlight, + writerHighlightStyle = highlightStyle, + writerSetextHeaders = setextHeaders, + writerTeXLigatures = texLigatures, + writerEpubStylesheet = epubStylesheet, + writerEpubFonts = epubFonts, + writerReferenceODT = referenceODT, + writerReferenceDocx = referenceDocx + } + + when (not (isTextFormat writerName') && outputFile == "-") $ err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++ "Specify an output file using the -o option." @@ -1009,12 +1009,12 @@ main = do then handleIncludes else return - doc <- (reader startParserState) `fmap` (readSources sources >>= + doc <- (reader readerOpts) `fmap` (readSources sources >>= handleIncludes' . convertTabs . intercalate "\n") let doc0 = foldr ($) doc transforms - doc1 <- if writerName' == "rtf" + doc1 <- if "rtf" `isPrefixOf` writerName' then bottomUpM rtfEmbedImage doc0 else return doc0 @@ -1042,31 +1042,25 @@ main = do writerFn "-" = UTF8.putStr writerFn f = UTF8.writeFile f - case lookup writerName' writers of - Nothing - | writerName' == "epub" -> - writeEPUB epubStylesheet epubFonts writerOptions doc2 - >>= writeBinary - | writerName' == "odt" -> - writeODT referenceODT writerOptions doc2 >>= writeBinary - | writerName' == "docx" -> - writeDocx referenceDocx writerOptions doc2 >>= writeBinary - | otherwise -> err 9 ("Unknown writer: " ++ writerName') - Just w - | pdfOutput -> do - res <- tex2pdf latexEngine $ w writerOptions doc2 + case getWriter writerName' of + Left e -> err 9 e + Right (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary + Right (PureStringWriter f) + | pdfOutput -> do + res <- tex2pdf latexEngine $ f writerOptions doc2 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ toString err' - Just w - | htmlFormat && ascii -> - writerFn outputFile =<< selfcontain (toEntities result) - | otherwise -> - writerFn outputFile =<< selfcontain result - where result = w writerOptions doc2 ++ ['\n' | not standalone'] - htmlFormat = writerName' `elem` + | otherwise -> selfcontain (f writerOptions doc2 ++ + ['\n' | not standalone']) + >>= writerFn outputFile . handleEntities + where htmlFormat = writerName' `elem` ["html","html+lhs","html5","html5+lhs", "s5","slidy","slideous","dzslides"] selfcontain = if selfContained && htmlFormat then makeSelfContained datadir else return + handleEntities = if htmlFormat && ascii + then toEntities + else id |