diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-03 22:14:03 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-03 22:14:03 +0000 |
commit | 4a841bfc5464907adea4cdd655485565565b40ae (patch) | |
tree | 36c0a21e3639614c8d25b5fb1909c32d0ab11dcd /Main.hs | |
parent | 3116d30133196e1bb258f7e74e03d4a85f3b21ae (diff) | |
download | pandoc-4a841bfc5464907adea4cdd655485565565b40ae.tar.gz |
Use template haskell to avoid the need for templates:
+ Added library Text.Pandoc.Include, with a template haskell
function $(includeStrFrom fname) to include a file as a string
constant at compile time.
+ This removes the need for the 'templates' directory or Makefile
target. These have been removed.
+ The base source directory has been changed from src to .
+ A new 'data' directory has been added, containing the ASCIIMathML.js
script, writer headers, and S5 files.
+ The src/wrappers directory has been moved to 'wrappers'.
+ The Text.Pandoc.ASCIIMathML library is no longer needed, since
Text.Pandoc.Writers.HTML can use includeStrFrom to include the
ASCIIMathML.js code directly. It has been removed.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1063 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Main.hs')
-rw-r--r-- | Main.hs | 512 |
1 files changed, 512 insertions, 0 deletions
diff --git a/Main.hs b/Main.hs new file mode 100644 index 000000000..ae9f61f7f --- /dev/null +++ b/Main.hs @@ -0,0 +1,512 @@ +{- +Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Main + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley@edu> + Stability : alpha + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} +module Main where +import Text.Pandoc +import Text.Pandoc.UTF8 +import Text.Pandoc.Shared ( joinWithSep ) +import Text.Regex ( mkRegex, matchRegex ) +import System.Environment ( getArgs, getProgName, getEnvironment ) +import System.Exit ( exitWith, ExitCode (..) ) +import System.Console.GetOpt +import System.IO +import Data.Maybe ( fromMaybe ) +import Data.List ( isPrefixOf ) +import Data.Char ( toLower ) +import Control.Monad ( (>>=) ) + +copyrightMessage :: String +copyrightMessage = "\nCopyright (C) 2006-7 John MacFarlane\n\ + \Web: http://johnmacfarlane.net/pandoc\n\ + \This is free software; see the source for copying conditions. There is no\n\ + \warranty, not even for merchantability or fitness for a particular purpose." + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native" , readPandoc) + ,("markdown" , readMarkdown) + ,("rst" , readRST) + ,("html" , readHtml) + ,("latex" , readLaTeX) + ] + +-- | Reader for native Pandoc format. +readPandoc :: ParserState -> String -> Pandoc +readPandoc state input = read input + +-- | Association list of formats and pairs of writers and default headers. +writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] +writers = [("native" , (writeDoc, "")) + ,("html" , (writeHtmlString, "")) + ,("s5" , (writeS5String, defaultS5Header)) + ,("docbook" , (writeDocbook, defaultDocbookHeader)) + ,("latex" , (writeLaTeX, defaultLaTeXHeader)) + ,("context" , (writeConTeXt, defaultConTeXtHeader)) + ,("man" , (writeMan, "")) + ,("markdown" , (writeMarkdown, "")) + ,("rst" , (writeRST, "")) + ,("rtf" , (writeRTF, defaultRTFHeader)) + ] + +-- | Writer for Pandoc native format. +writeDoc :: WriterOptions -> Pandoc -> String +writeDoc options = prettyPandoc + +-- | Data structure for command line options. +data Opt = Opt + { optPreserveTabs :: Bool -- ^ Convert tabs to spaces + , optTabStop :: Int -- ^ Number of spaces per tab + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: String -- ^ Reader format + , optWriter :: String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optCSS :: String -- ^ CSS file to link to + , optTableOfContents :: Bool -- ^ Include table of contents + , optIncludeInHeader :: String -- ^ File to include in header + , optIncludeBeforeBody :: String -- ^ File to include at top of body + , optIncludeAfterBody :: String -- ^ File to include at end of body + , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" + , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optOutputFile :: String -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optIncremental :: Bool -- ^ Use incremental lists in S5 + , optSmart :: Bool -- ^ Use smart typography + , optUseASCIIMathML :: Bool -- ^ Use ASCIIMathML + , optASCIIMathMLURL :: Maybe String -- ^ URL to ASCIIMathML.js + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optStrict :: Bool -- ^ Use strict markdown syntax + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optWrapText :: Bool -- ^ Wrap text + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optPreserveTabs = False + , optTabStop = 4 + , optStandalone = False + , optReader = "" -- null for default reader + , optWriter = "" -- null for default writer + , optParseRaw = False + , optCSS = "" + , optTableOfContents = False + , optIncludeInHeader = "" + , optIncludeBeforeBody = "" + , optIncludeAfterBody = "" + , optCustomHeader = "DEFAULT" + , optTitlePrefix = "" + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optIncremental = False + , optSmart = False + , optUseASCIIMathML = False + , optASCIIMathMLURL = Nothing + , optDumpArgs = False + , optIgnoreArgs = False + , optStrict = False + , optReferenceLinks = False + , optWrapText = True + } + +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = + [ Option "fr" ["from","read"] + (ReqArg + (\arg opt -> return opt { optReader = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (joinWithSep ", " $ map fst readers) ++ ")") + + , Option "tw" ["to","write"] + (ReqArg + (\arg opt -> return opt { optWriter = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")") + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" + + , Option "o" ["output"] + (ReqArg + (\arg opt -> return opt { optOutputFile = arg }) + "FILENAME") + "" -- "Name of output file" + + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] + (ReqArg + (\arg opt -> return opt { optTabStop = (read arg) } ) + "TABSTOP") + "" -- "Tab stop (default 4)" + + , Option "" ["strict"] + (NoArg + (\opt -> return opt { optStrict = True } )) + "" -- "Disable markdown syntax extensions" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "R" ["parse-raw"] + (NoArg + (\opt -> return opt { optParseRaw = True })) + "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" + + , Option "S" ["smart"] + (NoArg + (\opt -> return opt { optSmart = True })) + "" -- "Use smart quotes, dashes, and ellipses" + + , Option "m" ["asciimathml"] + (OptArg + (\arg opt -> return opt { optUseASCIIMathML = True, + optASCIIMathMLURL = arg, + optStandalone = True }) + "URL") + "" -- "Use ASCIIMathML script in html output" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in S5" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["no-wrap"] + (NoArg + (\opt -> return opt { optWrapText = False })) + "" -- "Do not wrap text in output" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "c" ["css"] + (ReqArg + (\arg opt -> return opt { optCSS = arg, + optStandalone = True }) + "CSS") + "" -- "Link to CSS style sheet" + + , Option "H" ["include-in-header"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeInHeader = fromUTF8 text, + optStandalone = True }) + "FILENAME") + "" -- "File to include at end of header (implies -s)" + + , Option "B" ["include-before-body"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeBeforeBody = fromUTF8 text }) + "FILENAME") + "" -- "File to include before document body" + + , Option "A" ["include-after-body"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optIncludeAfterBody = fromUTF8 text }) + "FILENAME") + "" -- "File to include after document body" + + , Option "C" ["custom-header"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optCustomHeader = fromUTF8 text, + optStandalone = True }) + "FILENAME") + "" -- "File to use for custom header (implies -s)" + + , Option "T" ["title-prefix"] + (ReqArg + (\arg opt -> return opt { optTitlePrefix = arg, + optStandalone = True }) + "STRING") + "" -- "String to prefix to HTML window title" + + , Option "D" ["print-default-header"] + (ReqArg + (\arg opt -> do + let header = case (lookup arg writers) of + Just (writer, head) -> head + Nothing -> error ("Unknown reader: " ++ arg) + hPutStr stdout header + exitWith ExitSuccess) + "FORMAT") + "" -- "Print default header for FORMAT" + + , Option "" ["dump-args"] + (NoArg + (\opt -> return opt { optDumpArgs = True })) + "" -- "Print output filename and arguments to stdout." + + , Option "" ["ignore-args"] + (NoArg + (\opt -> return opt { optIgnoreArgs = True })) + "" -- "Ignore command-line arguments." + + , Option "v" ["version"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ + copyrightMessage) + exitWith $ ExitFailure 4)) + "" -- "Print version" + + , Option "h" ["help"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStr stderr (usageMessage prg options) + exitWith $ ExitFailure 2)) + "" -- "Show help" + ] + +-- Returns usage message +usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage programName options = usageInfo + (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ + (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++ + (joinWithSep ", " $ map fst writers) ++ "\nOptions:") + options + +-- Determine default reader based on source file extensions +defaultReaderName :: [String] -> String +defaultReaderName [] = "markdown" +defaultReaderName (x:xs) = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> defaultReaderName xs -- no extension + Just ["xhtml"] -> "html" + Just ["html"] -> "html" + Just ["htm"] -> "html" + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["rst"] -> "rst" + Just ["native"] -> "native" + Just _ -> "markdown" + +-- Determine default writer based on output file extension +defaultWriterName :: String -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + let x' = map toLower x in + case (matchRegex (mkRegex ".*\\.(.*)") x') of + Nothing -> "markdown" -- no extension + Just [""] -> "markdown" -- empty extension + Just ["tex"] -> "latex" + Just ["latex"] -> "latex" + Just ["ltx"] -> "latex" + Just ["context"] -> "context" + Just ["ctx"] -> "context" + Just ["rtf"] -> "rtf" + Just ["rst"] -> "rst" + Just ["s5"] -> "s5" + Just ["native"] -> "native" + Just ["txt"] -> "markdown" + Just ["text"] -> "markdown" + Just ["md"] -> "markdown" + Just ["markdown"] -> "markdown" + Just ["db"] -> "docbook" + Just ["xml"] -> "docbook" + Just ["sgml"] -> "docbook" + Just [[x]] | x `elem` ['1'..'9'] -> "man" + Just _ -> "html" + +main = do + + rawArgs <- getArgs + prg <- getProgName + let compatMode = (prg == "hsmarkdown") + + let (actions, args, errors) = if compatMode + then ([], rawArgs, []) + else getOpt Permute options rawArgs + + if (not (null errors)) + then do + name <- getProgName + mapM (\e -> hPutStrLn stderr e) errors + hPutStr stderr (usageMessage name options) + exitWith $ ExitFailure 2 + else + return () + + let defaultOpts' = if compatMode + then defaultOpts { optReader = "markdown" + , optWriter = "html" + , optStrict = True } + else defaultOpts + + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaultOpts') actions + + let Opt { optPreserveTabs = preserveTabs + , optTabStop = tabStop + , optStandalone = standalone + , optReader = readerName + , optWriter = writerName + , optParseRaw = parseRaw + , optCSS = css + , optTableOfContents = toc + , optIncludeInHeader = includeHeader + , optIncludeBeforeBody = includeBefore + , optIncludeAfterBody = includeAfter + , optCustomHeader = customHeader + , optTitlePrefix = titlePrefix + , optOutputFile = outputFile + , optNumberSections = numberSections + , optIncremental = incremental + , optSmart = smart + , optUseASCIIMathML = useAsciiMathML + , optASCIIMathMLURL = asciiMathMLURL + , optDumpArgs = dumpArgs + , optIgnoreArgs = ignoreArgs + , optStrict = strict + , optReferenceLinks = referenceLinks + , optWrapText = wrap + } = opts + + if dumpArgs + then do + hPutStrLn stdout outputFile + mapM (\arg -> hPutStrLn stdout arg) args + exitWith $ ExitSuccess + else return () + + let sources = if ignoreArgs then [] else args + + -- assign reader and writer based on options and filenames + let readerName' = if null readerName + then defaultReaderName sources + else readerName + + let writerName' = if null writerName + then defaultWriterName outputFile + else writerName + + reader <- case (lookup readerName' readers) of + Just r -> return r + Nothing -> error ("Unknown reader: " ++ readerName') + + (writer, defaultHeader) <- case (lookup writerName' writers) of + Just (w,h) -> return (w, h) + Nothing -> error ("Unknown writer: " ++ writerName') + + output <- if (outputFile == "-") + then return stdout + else openFile outputFile WriteMode + + environment <- getEnvironment + let columns = case lookup "COLUMNS" environment of + Just cols -> read cols + Nothing -> stateColumns defaultParserState + + let tabFilter _ [] = "" + tabFilter _ ('\n':xs) = '\n':(tabFilter tabStop xs) + -- remove DOS line endings + tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs) + tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs) + tabFilter spsToNextStop ('\t':xs) = + if preserveTabs + then '\t':(tabFilter tabStop xs) + else replicate spsToNextStop ' ' ++ tabFilter tabStop xs + tabFilter 1 (x:xs) = + x:(tabFilter tabStop xs) + tabFilter spsToNextStop (x:xs) = + x:(tabFilter (spsToNextStop - 1) xs) + + let startParserState = + defaultParserState { stateParseRaw = parseRaw, + stateTabStop = tabStop, + stateStandalone = standalone && (not strict), + stateSmart = smart || writerName' `elem` + ["latex", "context"], + stateColumns = columns, + stateStrict = strict } + let csslink = if (css == "") + then "" + else "<link rel=\"stylesheet\" href=\"" ++ css ++ + "\" type=\"text/css\" media=\"all\" />\n" + let header = (if (customHeader == "DEFAULT") + then defaultHeader + else customHeader) ++ csslink ++ includeHeader + let writerOptions = WriterOptions { writerStandalone = standalone && + (not strict), + writerHeader = header, + writerTitlePrefix = titlePrefix, + writerTabStop = tabStop, + writerTableOfContents = toc && + (not strict) && + writerName/="s5", + writerUseASCIIMathML = useAsciiMathML, + writerASCIIMathMLURL = asciiMathMLURL, + writerS5 = (writerName=="s5"), + writerIgnoreNotes = False, + writerIncremental = incremental, + writerNumberSections = numberSections, + writerIncludeBefore = includeBefore, + writerIncludeAfter = includeAfter, + writerStrictMarkdown = strict, + writerReferenceLinks = referenceLinks, + writerWrapText = wrap } + + (readSources sources) >>= (hPutStrLn output . toUTF8 . + (writer writerOptions) . + (reader startParserState) . tabFilter tabStop . + fromUTF8 . (joinWithSep "\n")) >> + hClose output + + where + readSources [] = mapM readSource ["-"] + readSources sources = mapM readSource sources + readSource "-" = getContents + readSource source = readFile source |