aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Main.hs681
-rw-r--r--src/Text/Pandoc.hs114
-rw-r--r--src/Text/Pandoc/Biblio.hs66
-rw-r--r--src/Text/Pandoc/Blocks.hs146
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs327
-rw-r--r--src/Text/Pandoc/DefaultHeaders.hs69
-rw-r--r--src/Text/Pandoc/Definition.hs150
-rw-r--r--src/Text/Pandoc/Highlighting.hs64
-rw-r--r--src/Text/Pandoc/LaTeXMathML.hs14
-rw-r--r--src/Text/Pandoc/ODT.hs88
-rw-r--r--src/Text/Pandoc/Plugins.hs69
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs675
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs774
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs1243
-rw-r--r--src/Text/Pandoc/Readers/RST.hs707
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs233
-rw-r--r--src/Text/Pandoc/Shared.hs953
-rw-r--r--src/Text/Pandoc/TH.hs65
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs302
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs262
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs557
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs331
-rw-r--r--src/Text/Pandoc/Writers/Man.hs301
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs396
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs396
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs568
-rw-r--r--src/Text/Pandoc/Writers/RST.hs346
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs291
-rw-r--r--src/Text/Pandoc/Writers/S5.hs157
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs474
-rw-r--r--src/Text/Pandoc/XML.hs88
31 files changed, 10907 insertions, 0 deletions
diff --git a/src/Main.hs b/src/Main.hs
new file mode 100644
index 000000000..e498b3c0a
--- /dev/null
+++ b/src/Main.hs
@@ -0,0 +1,681 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Main
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley@edu>
+ Stability : alpha
+ Portability : portable
+
+Parses command-line options and calls the appropriate readers and
+writers.
+-}
+module Main where
+import Text.Pandoc
+import Text.Pandoc.ODT
+import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) )
+import Text.Pandoc.Highlighting ( languages )
+import System.Environment ( getArgs, getProgName, getEnvironment )
+import System.Exit ( exitWith, ExitCode (..) )
+import System.FilePath
+import System.Console.GetOpt
+import Data.Maybe ( fromMaybe )
+import Data.Char ( toLower )
+import Data.List ( intercalate, isSuffixOf )
+import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
+import System.IO ( stdout, stderr )
+import System.IO.UTF8
+#ifdef _CITEPROC
+import Text.CSL
+import Text.Pandoc.Biblio
+#endif
+import Text.Pandoc.Plugins (getPlugin)
+import Control.Monad (foldM, when, unless)
+
+copyrightMessage :: String
+copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++
+ "Web: http://johnmacfarlane.net/pandoc\n" ++
+ "This is free software; see the source for copying conditions. There is no\n" ++
+ "warranty, not even for merchantability or fitness for a particular purpose."
+
+compileInfo :: String
+compileInfo =
+#ifdef _CITEPROC
+ " +citeproc" ++
+#else
+ " -citeproc" ++
+#endif
+#ifdef _HIGHLIGHTING
+ " +highlighting" ++
+#else
+ " -highlighting" ++
+#endif
+ if null languages
+ then "\n"
+ else "\nCompiled with syntax highlighting support for:\n" ++ wrapWords 78 languages
+
+-- | Converts a list of strings into a single string with the items printed as
+-- comma separated words in lines with a maximum line length.
+wrapWords :: Int -> [String] -> String
+wrapWords c = wrap' c c where
+ wrap' _ _ [] = ""
+ wrap' cols remaining (x:xs) = if remaining == cols
+ then x ++ wrap' cols (remaining - length x) xs
+ else if (length x + 1) > remaining
+ then ",\n" ++ x ++ wrap' cols (cols - length x) xs
+ else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
+
+-- | Association list of formats and readers.
+readers :: [(String, ParserState -> String -> Pandoc)]
+readers = [("native" , readPandoc)
+ ,("markdown" , readMarkdown)
+ ,("markdown+lhs" , readMarkdown)
+ ,("rst" , readRST)
+ ,("rst+lhs" , readRST)
+ ,("html" , readHtml)
+ ,("latex" , readLaTeX)
+ ,("latex+lhs" , readLaTeX)
+ ]
+
+-- | Reader for native Pandoc format.
+readPandoc :: ParserState -> String -> Pandoc
+readPandoc _ = read
+
+-- | Association list of formats and pairs of writers and default headers.
+writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ]
+writers = [("native" , (writeDoc, ""))
+ ,("html" , (writeHtmlString, ""))
+ ,("html+lhs" , (writeHtmlString, ""))
+ ,("s5" , (writeS5String, defaultS5Header))
+ ,("docbook" , (writeDocbook, defaultDocbookHeader))
+ ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader))
+ ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader))
+ ,("latex" , (writeLaTeX, defaultLaTeXHeader))
+ ,("latex+lhs" , (writeLaTeX, defaultLaTeXHeader))
+ ,("context" , (writeConTeXt, defaultConTeXtHeader))
+ ,("texinfo" , (writeTexinfo, ""))
+ ,("man" , (writeMan, ""))
+ ,("markdown" , (writeMarkdown, ""))
+ ,("markdown+lhs" , (writeMarkdown, ""))
+ ,("rst" , (writeRST, ""))
+ ,("rst+lhs" , (writeRST, ""))
+ ,("mediawiki" , (writeMediaWiki, ""))
+ ,("rtf" , (writeRTF, defaultRTFHeader))
+ ]
+
+isNonTextOutput :: String -> Bool
+isNonTextOutput = (`elem` ["odt"])
+
+-- | Writer for Pandoc native format.
+writeDoc :: WriterOptions -> Pandoc -> String
+writeDoc _ = prettyPandoc
+
+-- | Data structure for command line options.
+data Opt = Opt
+ { optPreserveTabs :: Bool -- ^ Convert tabs to spaces
+ , optTabStop :: Int -- ^ Number of spaces per tab
+ , optStandalone :: Bool -- ^ Include header, footer
+ , optReader :: String -- ^ Reader format
+ , optWriter :: String -- ^ Writer format
+ , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
+ , optCSS :: [String] -- ^ CSS file to link to
+ , optTableOfContents :: Bool -- ^ Include table of contents
+ , optIncludeInHeader :: String -- ^ File to include in header
+ , optIncludeBeforeBody :: String -- ^ File to include at top of body
+ , optIncludeAfterBody :: String -- ^ File to include at end of body
+ , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
+ , optTitlePrefix :: String -- ^ Optional prefix for HTML title
+ , optOutputFile :: String -- ^ Name of output file
+ , optNumberSections :: Bool -- ^ Number sections in LaTeX
+ , optIncremental :: Bool -- ^ Use incremental lists in S5
+ , optSmart :: Bool -- ^ Use smart typography
+ , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
+ , optDumpArgs :: Bool -- ^ Output command-line arguments
+ , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
+ , optStrict :: Bool -- ^ Use strict markdown syntax
+ , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , optWrapText :: Bool -- ^ Wrap text
+ , optSanitizeHTML :: Bool -- ^ Sanitize HTML
+ , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
+ , optEmailObfuscation :: ObfuscationMethod
+#ifdef _CITEPROC
+ , optBiblioFile :: String
+ , optBiblioFormat :: String
+ , optCslFile :: String
+#endif
+ }
+
+-- | Defaults for command-line options.
+defaultOpts :: Opt
+defaultOpts = Opt
+ { optPreserveTabs = False
+ , optTabStop = 4
+ , optStandalone = False
+ , optReader = "" -- null for default reader
+ , optWriter = "" -- null for default writer
+ , optParseRaw = False
+ , optCSS = []
+ , optTableOfContents = False
+ , optIncludeInHeader = ""
+ , optIncludeBeforeBody = ""
+ , optIncludeAfterBody = ""
+ , optCustomHeader = "DEFAULT"
+ , optTitlePrefix = ""
+ , optOutputFile = "-" -- "-" means stdout
+ , optNumberSections = False
+ , optIncremental = False
+ , optSmart = False
+ , optHTMLMathMethod = PlainMath
+ , optDumpArgs = False
+ , optIgnoreArgs = False
+ , optStrict = False
+ , optReferenceLinks = False
+ , optWrapText = True
+ , optSanitizeHTML = False
+ , optPlugins = []
+ , optEmailObfuscation = JavascriptObfuscation
+#ifdef _CITEPROC
+ , optBiblioFile = []
+ , optBiblioFormat = []
+ , optCslFile = []
+#endif
+ }
+
+-- | A list of functions, each transforming the options data structure
+-- in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
+ [ Option "fr" ["from","read"]
+ (ReqArg
+ (\arg opt -> return opt { optReader = map toLower arg })
+ "FORMAT")
+ "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")")
+
+ , Option "tw" ["to","write"]
+ (ReqArg
+ (\arg opt -> return opt { optWriter = map toLower arg })
+ "FORMAT")
+ "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")")
+
+ , Option "s" ["standalone"]
+ (NoArg
+ (\opt -> return opt { optStandalone = True }))
+ "" -- "Include needed header and footer on output"
+
+ , Option "o" ["output"]
+ (ReqArg
+ (\arg opt -> return opt { optOutputFile = arg })
+ "FILENAME")
+ "" -- "Name of output file"
+
+ , Option "p" ["preserve-tabs"]
+ (NoArg
+ (\opt -> return opt { optPreserveTabs = True }))
+ "" -- "Preserve tabs instead of converting to spaces"
+
+ , Option "" ["tab-stop"]
+ (ReqArg
+ (\arg opt -> return opt { optTabStop = (read arg) } )
+ "TABSTOP")
+ "" -- "Tab stop (default 4)"
+
+ , Option "" ["strict"]
+ (NoArg
+ (\opt -> return opt { optStrict = True } ))
+ "" -- "Disable markdown syntax extensions"
+
+ , Option "" ["reference-links"]
+ (NoArg
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
+
+ , Option "R" ["parse-raw"]
+ (NoArg
+ (\opt -> return opt { optParseRaw = True }))
+ "" -- "Parse untranslatable HTML codes and LaTeX environments as raw"
+
+ , Option "S" ["smart"]
+ (NoArg
+ (\opt -> return opt { optSmart = True }))
+ "" -- "Use smart quotes, dashes, and ellipses"
+
+ , Option "m" ["latexmathml", "asciimathml"]
+ (OptArg
+ (\arg opt -> return opt { optHTMLMathMethod =
+ LaTeXMathML arg })
+ "URL")
+ "" -- "Use LaTeXMathML script in html output"
+
+ , Option "" ["mimetex"]
+ (OptArg
+ (\arg opt -> return opt { optHTMLMathMethod = MimeTeX
+ (fromMaybe "/cgi-bin/mimetex.cgi" arg)})
+ "URL")
+ "" -- "Use mimetex for HTML math"
+
+ , Option "" ["jsmath"]
+ (OptArg
+ (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
+ "URL")
+ "" -- "Use jsMath for HTML math"
+
+ , Option "" ["gladtex"]
+ (NoArg
+ (\opt -> return opt { optHTMLMathMethod = GladTeX }))
+ "" -- "Use gladtex for HTML math"
+
+ , Option "i" ["incremental"]
+ (NoArg
+ (\opt -> return opt { optIncremental = True }))
+ "" -- "Make list items display incrementally in S5"
+
+ , Option "N" ["number-sections"]
+ (NoArg
+ (\opt -> return opt { optNumberSections = True }))
+ "" -- "Number sections in LaTeX"
+
+ , Option "" ["no-wrap"]
+ (NoArg
+ (\opt -> return opt { optWrapText = False }))
+ "" -- "Do not wrap text in output"
+
+ , Option "" ["sanitize-html"]
+ (NoArg
+ (\opt -> return opt { optSanitizeHTML = True }))
+ "" -- "Sanitize HTML"
+
+ , Option "" ["email-obfuscation"]
+ (ReqArg
+ (\arg opt -> do
+ method <- case arg of
+ "references" -> return ReferenceObfuscation
+ "javascript" -> return JavascriptObfuscation
+ "none" -> return NoObfuscation
+ _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
+ exitWith (ExitFailure 6)
+ return opt { optEmailObfuscation = method })
+ "none|javascript|references")
+ "" -- "Method for obfuscating email in HTML"
+
+ , Option "" ["toc", "table-of-contents"]
+ (NoArg
+ (\opt -> return opt { optTableOfContents = True }))
+ "" -- "Include table of contents"
+
+ , Option "c" ["css"]
+ (ReqArg
+ (\arg opt -> do
+ let old = optCSS opt
+ return opt { optCSS = old ++ [arg],
+ optStandalone = True })
+ "CSS")
+ "" -- "Link to CSS style sheet"
+
+ , Option "H" ["include-in-header"]
+ (ReqArg
+ (\arg opt -> do
+ let old = optIncludeInHeader opt
+ text <- readFile arg
+ return opt { optIncludeInHeader = old ++ text,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to include at end of header (implies -s)"
+
+ , Option "B" ["include-before-body"]
+ (ReqArg
+ (\arg opt -> do
+ let old = optIncludeBeforeBody opt
+ text <- readFile arg
+ return opt { optIncludeBeforeBody = old ++ text })
+ "FILENAME")
+ "" -- "File to include before document body"
+
+ , Option "A" ["include-after-body"]
+ (ReqArg
+ (\arg opt -> do
+ let old = optIncludeAfterBody opt
+ text <- readFile arg
+ return opt { optIncludeAfterBody = old ++ text })
+ "FILENAME")
+ "" -- "File to include after document body"
+
+ , Option "C" ["custom-header"]
+ (ReqArg
+ (\arg opt -> do
+ text <- readFile arg
+ return opt { optCustomHeader = text,
+ optStandalone = True })
+ "FILENAME")
+ "" -- "File to use for custom header (implies -s)"
+
+ , Option "P" ["plugins"]
+ (ReqArg
+ (\arg opt -> do
+ let pluginModules = splitBy ',' arg
+ plugins <- mapM getPlugin pluginModules
+ return opt { optPlugins = plugins })
+ "MODULE[,MODULE...]")
+ "" -- "Haskell modules"
+
+ , Option "T" ["title-prefix"]
+ (ReqArg
+ (\arg opt -> return opt { optTitlePrefix = arg,
+ optStandalone = True })
+ "STRING")
+ "" -- "String to prefix to HTML window title"
+
+ , Option "D" ["print-default-header"]
+ (ReqArg
+ (\arg _ -> do
+ let header = case (lookup arg writers) of
+ Just (_, h) -> h
+ Nothing -> error ("Unknown reader: " ++ arg)
+ hPutStr stdout header
+ exitWith ExitSuccess)
+ "FORMAT")
+ "" -- "Print default header for FORMAT"
+#ifdef _CITEPROC
+ , Option "" ["biblio"]
+ (ReqArg
+ (\arg opt -> return opt { optBiblioFile = arg} )
+ "FILENAME")
+ ""
+ , Option "" ["biblio-format"]
+ (ReqArg
+ (\arg opt -> return opt { optBiblioFormat = arg} )
+ "STRING")
+ ""
+ , Option "" ["csl"]
+ (ReqArg
+ (\arg opt -> return opt { optCslFile = arg} )
+ "FILENAME")
+ ""
+#endif
+ , Option "" ["dump-args"]
+ (NoArg
+ (\opt -> return opt { optDumpArgs = True }))
+ "" -- "Print output filename and arguments to stdout."
+
+ , Option "" ["ignore-args"]
+ (NoArg
+ (\opt -> return opt { optIgnoreArgs = True }))
+ "" -- "Ignore command-line arguments."
+
+ , Option "v" ["version"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ compileInfo ++
+ copyrightMessage)
+ exitWith $ ExitFailure 4))
+ "" -- "Print version"
+
+ , Option "h" ["help"]
+ (NoArg
+ (\_ -> do
+ prg <- getProgName
+ hPutStr stderr (usageMessage prg options)
+ exitWith $ ExitFailure 2))
+ "" -- "Show help"
+ ]
+
+-- Returns usage message
+usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
+usageMessage programName = usageInfo
+ (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
+ (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++
+ (intercalate ", " $ map fst writers) ++ "\nOptions:")
+
+-- Determine default reader based on source file extensions
+defaultReaderName :: [FilePath] -> String
+defaultReaderName [] = "markdown"
+defaultReaderName (x:xs) =
+ case takeExtension (map toLower x) of
+ ".xhtml" -> "html"
+ ".html" -> "html"
+ ".htm" -> "html"
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".rst" -> "rst"
+ ".lhs" -> "markdown+lhs"
+ ".native" -> "native"
+ _ -> defaultReaderName xs
+
+-- Returns True if extension of first source is .lhs
+lhsExtension :: [FilePath] -> Bool
+lhsExtension (x:_) = takeExtension x == ".lhs"
+lhsExtension _ = False
+
+-- Determine default writer based on output file extension
+defaultWriterName :: FilePath -> String
+defaultWriterName "-" = "html" -- no output file
+defaultWriterName x =
+ case takeExtension (map toLower x) of
+ "" -> "markdown" -- empty extension
+ ".tex" -> "latex"
+ ".latex" -> "latex"
+ ".ltx" -> "latex"
+ ".context" -> "context"
+ ".ctx" -> "context"
+ ".rtf" -> "rtf"
+ ".rst" -> "rst"
+ ".s5" -> "s5"
+ ".native" -> "native"
+ ".txt" -> "markdown"
+ ".text" -> "markdown"
+ ".md" -> "markdown"
+ ".markdown" -> "markdown"
+ ".lhs" -> "markdown+lhs"
+ ".texi" -> "texinfo"
+ ".texinfo" -> "texinfo"
+ ".db" -> "docbook"
+ ".odt" -> "odt"
+ ['.',y] | y `elem` ['1'..'9'] -> "man"
+ _ -> "html"
+
+main :: IO ()
+main = do
+
+ rawArgs <- getArgs
+ prg <- getProgName
+ let compatMode = (prg == "hsmarkdown")
+
+ let (actions, args, errors) = if compatMode
+ then ([], rawArgs, [])
+ else getOpt Permute options rawArgs
+
+ unless (null errors) $
+ do name <- getProgName
+ mapM_ (\e -> hPutStrLn stderr e) errors
+ hPutStr stderr (usageMessage name options)
+ exitWith $ ExitFailure 2
+
+ let defaultOpts' = if compatMode
+ then defaultOpts { optReader = "markdown"
+ , optWriter = "html"
+ , optStrict = True }
+ else defaultOpts
+
+ -- thread option data structure through all supplied option actions
+ opts <- foldl (>>=) (return defaultOpts') actions
+
+ let Opt { optPreserveTabs = preserveTabs
+ , optTabStop = tabStop
+ , optStandalone = standalone
+ , optReader = readerName
+ , optWriter = writerName
+ , optParseRaw = parseRaw
+ , optCSS = css
+ , optTableOfContents = toc
+ , optIncludeInHeader = includeHeader
+ , optIncludeBeforeBody = includeBefore
+ , optIncludeAfterBody = includeAfter
+ , optCustomHeader = customHeader
+ , optTitlePrefix = titlePrefix
+ , optOutputFile = outputFile
+ , optNumberSections = numberSections
+ , optIncremental = incremental
+ , optSmart = smart
+ , optHTMLMathMethod = mathMethod
+ , optDumpArgs = dumpArgs
+ , optIgnoreArgs = ignoreArgs
+ , optStrict = strict
+ , optReferenceLinks = referenceLinks
+ , optWrapText = wrap
+ , optSanitizeHTML = sanitize
+ , optPlugins = plugins
+ , optEmailObfuscation = obfuscationMethod
+#ifdef _CITEPROC
+ , optBiblioFile = biblioFile
+ , optBiblioFormat = biblioFormat
+ , optCslFile = cslFile
+#endif
+ } = opts
+
+ when dumpArgs $
+ do hPutStrLn stdout outputFile
+ mapM_ (\arg -> hPutStrLn stdout arg) args
+ exitWith ExitSuccess
+
+ let sources = if ignoreArgs then [] else args
+
+ -- assign reader and writer based on options and filenames
+ let readerName' = if null readerName
+ then defaultReaderName sources
+ else readerName
+
+ let writerName' = if null writerName
+ then defaultWriterName outputFile
+ else writerName
+
+ reader <- case (lookup readerName' readers) of
+ Just r -> return r
+ Nothing -> error ("Unknown reader: " ++ readerName')
+
+ (writer, defaultHeader) <- case (lookup writerName' writers) of
+ Just (w,h) -> return (w, h)
+ Nothing -> error ("Unknown writer: " ++ writerName')
+
+ environment <- getEnvironment
+ let columns = case lookup "COLUMNS" environment of
+ Just cols -> read cols
+ Nothing -> stateColumns defaultParserState
+
+ let tabFilter _ [] = ""
+ tabFilter _ ('\n':xs) = '\n' : tabFilter tabStop xs
+ -- remove DOS line endings
+ tabFilter _ ('\r':'\n':xs) = '\n' : tabFilter tabStop xs
+ tabFilter _ ('\r':xs) = '\n' : tabFilter tabStop xs
+ tabFilter spsToNextStop ('\t':xs) =
+ if preserveTabs
+ then '\t' : tabFilter tabStop xs
+ else replicate spsToNextStop ' ' ++ tabFilter tabStop xs
+ tabFilter 1 (x:xs) =
+ x : tabFilter tabStop xs
+ tabFilter spsToNextStop (x:xs) =
+ x : tabFilter (spsToNextStop - 1) xs
+
+ let standalone' = (standalone && not strict) || isNonTextOutput writerName'
+
+#ifdef _CITEPROC
+ refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat
+#endif
+
+ let startParserState =
+ defaultParserState { stateParseRaw = parseRaw,
+ stateTabStop = tabStop,
+ stateSanitizeHTML = sanitize,
+ stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
+ lhsExtension sources,
+ stateStandalone = standalone',
+#ifdef _CITEPROC
+ stateCitations = map citeKey refs,
+#endif
+ stateSmart = smart || writerName' `elem`
+ ["latex", "context"],
+ stateColumns = columns,
+ stateStrict = strict }
+ let csslink = if null css
+ then ""
+ else concatMap
+ (\f -> "<link rel=\"stylesheet\" href=\"" ++
+ f ++ "\" type=\"text/css\" media=\"all\" />\n")
+ css
+ let header = (if customHeader == "DEFAULT"
+ then defaultHeader
+ else customHeader) ++ csslink ++ includeHeader
+ let writerOptions = WriterOptions { writerStandalone = standalone',
+ writerHeader = header,
+ writerTitlePrefix = titlePrefix,
+ writerTabStop = tabStop,
+ writerTableOfContents = toc &&
+ not strict &&
+ writerName' /= "s5",
+ writerHTMLMathMethod = mathMethod,
+ writerS5 = (writerName' == "s5"),
+ writerIgnoreNotes = False,
+ writerIncremental = incremental,
+ writerNumberSections = numberSections,
+ writerIncludeBefore = includeBefore,
+ writerIncludeAfter = includeAfter,
+ writerStrictMarkdown = strict,
+ writerReferenceLinks = referenceLinks,
+ writerWrapText = wrap,
+ writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
+ lhsExtension [outputFile],
+ writerEmailObfuscation = if strict
+ then ReferenceObfuscation
+ else obfuscationMethod }
+
+ when (isNonTextOutput writerName' && outputFile == "-") $
+ do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option.")
+ exitWith $ ExitFailure 5
+
+ let sourceDirRelative = if null sources
+ then ""
+ else takeDirectory (head sources)
+
+ let readSources [] = mapM readSource ["-"]
+ readSources srcs = mapM readSource srcs
+ readSource "-" = getContents
+ readSource src = readFile src
+
+ doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources)
+
+ doc' <- do
+#ifdef _CITEPROC
+ processBiblio cslFile refs doc
+#else
+ return doc
+#endif
+
+ doc'' <- foldM (flip ($)) doc' plugins
+
+ let writerOutput = writer writerOptions doc'' ++ "\n"
+
+ case writerName' of
+ "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput
+ _ -> if outputFile == "-"
+ then putStr writerOutput
+ else writeFile outputFile writerOutput
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
new file mode 100644
index 000000000..e97103f97
--- /dev/null
+++ b/src/Text/Pandoc.hs
@@ -0,0 +1,114 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This helper module exports the main writers, readers, and data
+structure definitions from the Pandoc libraries.
+
+A typical application will chain together a reader and a writer
+to convert strings from one format to another. For example, the
+following simple program will act as a filter converting markdown
+fragments to reStructuredText, using reference-style links instead of
+inline links:
+
+> module Main where
+> import Text.Pandoc
+> import qualified System.IO.UTF8 as U
+>
+> markdownToRST :: String -> String
+> markdownToRST =
+> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
+> readMarkdown defaultParserState
+>
+> main = U.getContents >>= U.putStrLn . markdownToRST
+
+-}
+
+module Text.Pandoc
+ (
+ -- * Definitions
+ module Text.Pandoc.Definition
+ -- * Readers: converting /to/ Pandoc format
+ , readMarkdown
+ , readRST
+ , readLaTeX
+ , readHtml
+ -- * Parser state used in readers
+ , ParserState (..)
+ , defaultParserState
+ , ParserContext (..)
+ , QuoteContext (..)
+ , KeyTable
+ , NoteTable
+ , HeaderType (..)
+ -- * Writers: converting /from/ Pandoc format
+ , writeMarkdown
+ , writeRST
+ , writeLaTeX
+ , writeConTeXt
+ , writeTexinfo
+ , writeHtml
+ , writeHtmlString
+ , writeS5
+ , writeS5String
+ , writeDocbook
+ , writeOpenDocument
+ , writeMan
+ , writeMediaWiki
+ , writeRTF
+ , prettyPandoc
+ -- * Writer options used in writers
+ , WriterOptions (..)
+ , defaultWriterOptions
+ -- * Default headers for various output formats
+ , module Text.Pandoc.DefaultHeaders
+ -- * Version
+ , pandocVersion
+ ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.LaTeX
+import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Writers.Markdown
+import Text.Pandoc.Writers.RST
+import Text.Pandoc.Writers.LaTeX
+import Text.Pandoc.Writers.ConTeXt
+import Text.Pandoc.Writers.Texinfo
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.Docbook
+import Text.Pandoc.Writers.OpenDocument
+import Text.Pandoc.Writers.Man
+import Text.Pandoc.Writers.RTF
+import Text.Pandoc.Writers.MediaWiki
+import Text.Pandoc.DefaultHeaders
+import Text.Pandoc.Shared
+
+-- | Version number of pandoc library.
+pandocVersion :: String
+pandocVersion = "1.1"
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
new file mode 100644
index 000000000..1d93f19c1
--- /dev/null
+++ b/src/Text/Pandoc/Biblio.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE PatternGuards #-}
+{-
+Copyright (C) 2008 Andrea Rossato <andrea.rossato@ing.unitn.it>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Biblio
+ Copyright : Copyright (C) 2008 Andrea Rossato
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
+ Stability : alpha
+ Portability : portable
+-}
+
+module Text.Pandoc.Biblio ( processBiblio ) where
+
+import Control.Monad ( when )
+import Data.List
+import Text.CSL
+import Text.Pandoc.Definition
+
+-- | Process a 'Pandoc' document by adding citations formatted
+-- according to a CSL style, using 'citeproc' from citeproc-hs.
+processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc
+processBiblio cf r p
+ = if null r then return p
+ else do
+ when (null cf) $ error "Missing the needed citation style file"
+ csl <- readCSLFile cf
+ let groups = queryPandoc getCite p
+ result = citeproc csl r groups
+ cits_map = zip groups (citations result)
+ biblioList = map (read . renderPandoc' csl) (bibliography result)
+ Pandoc m b = processPandoc (processCite csl cits_map) p
+ return $ Pandoc m $ b ++ biblioList
+
+-- | Substitute 'Cite' elements with formatted citations.
+processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline
+processCite s cs il
+ | Cite t _ <- il = Cite t (process t)
+ | otherwise = il
+ where
+ process t = case elemIndex t (map fst cs) of
+ Just i -> read . renderPandoc s $ snd (cs !! i)
+ Nothing -> [Str ("Error processing " ++ show t)]
+
+-- | Retrieve all citations from a 'Pandoc' docuument. To be used with
+-- 'queryPandoc'.
+getCite :: Inline -> [[(String,String)]]
+getCite i | Cite t _ <- i = [t]
+ | otherwise = []
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
new file mode 100644
index 000000000..122931773
--- /dev/null
+++ b/src/Text/Pandoc/Blocks.hs
@@ -0,0 +1,146 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Blocks
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for the manipulation of fixed-width blocks of text.
+These are used in the construction of plain-text tables.
+-}
+
+module Text.Pandoc.Blocks
+ (
+ TextBlock (..),
+ docToBlock,
+ blockToDoc,
+ widthOfBlock,
+ heightOfBlock,
+ hcatBlocks,
+ hsepBlocks,
+ centerAlignBlock,
+ leftAlignBlock,
+ rightAlignBlock
+ )
+where
+import Text.PrettyPrint
+import Data.List ( intersperse )
+
+-- | A fixed-width block of text. Parameters are width of block,
+-- height of block, and list of lines.
+data TextBlock = TextBlock Int Int [String]
+instance Show TextBlock where
+ show x = show $ blockToDoc x
+
+-- | Break lines in a list of lines so that none are greater than
+-- a given width.
+breakLines :: Int -- ^ Maximum length of lines.
+ -> [String] -- ^ List of lines.
+ -> [String]
+breakLines _ [] = []
+breakLines width (l:ls) =
+ if length l > width
+ then (take width l):(breakLines width ((drop width l):ls))
+ else l:(breakLines width ls)
+
+-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
+docToBlock :: Int -- ^ Width of text block.
+ -> Doc -- ^ @Doc@ to convert.
+ -> TextBlock
+docToBlock width doc =
+ let rendered = renderStyle (style {lineLength = width,
+ ribbonsPerLine = 1}) doc
+ lns = breakLines width $ lines rendered
+ in TextBlock width (length lns) lns
+
+-- | Convert a @TextBlock@ to a @Doc@ element.
+blockToDoc :: TextBlock -> Doc
+blockToDoc (TextBlock _ _ lns) =
+ if null lns
+ then empty
+ else vcat $ map text lns
+
+-- | Returns width of a @TextBlock@ (number of columns).
+widthOfBlock :: TextBlock -> Int
+widthOfBlock (TextBlock width _ _) = width
+
+-- | Returns height of a @TextBlock@ (number of rows).
+heightOfBlock :: TextBlock -> Int
+heightOfBlock (TextBlock _ height _) = height
+
+-- | Pads a string out to a given width using spaces.
+hPad :: Int -- ^ Desired width.
+ -> String -- ^ String to pad.
+ -> String
+hPad width line =
+ let linelen = length line
+ in if linelen <= width
+ then line ++ replicate (width - linelen) ' '
+ else take width line
+
+-- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in
+-- which they appear side by side.
+hcatBlocks :: [TextBlock] -> TextBlock
+hcatBlocks [] = TextBlock 0 0 []
+hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd.
+hcatBlocks ((TextBlock width1 height1 lns1):xs) =
+ let (TextBlock width2 height2 lns2) = hcatBlocks xs
+ height = max height1 height2
+ width = width1 + width2
+ lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) ""
+ lns2' = lns2 ++ replicate (height - height2) ""
+ lns = zipWith (++) lns1' lns2'
+ in TextBlock width height lns
+
+-- | Like @hcatBlocks@, but inserts space between the @TextBlock@s.
+hsepBlocks :: [TextBlock] -> TextBlock
+hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "]))
+
+isWhitespace :: Char -> Bool
+isWhitespace x = x `elem` " \t"
+
+-- | Left-aligns the contents of a @TextBlock@ within the block.
+leftAlignBlock :: TextBlock -> TextBlock
+leftAlignBlock (TextBlock width height lns) =
+ TextBlock width height $ map (dropWhile isWhitespace) lns
+
+-- | Right-aligns the contents of a @TextBlock@ within the block.
+rightAlignBlock :: TextBlock -> TextBlock
+rightAlignBlock (TextBlock width height lns) =
+ let rightAlignLine ln =
+ let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln
+ in reverse (rest ++ spaces)
+ in TextBlock width height $ map rightAlignLine lns
+
+-- | Centers the contents of a @TextBlock@ within the block.
+centerAlignBlock :: TextBlock -> TextBlock
+centerAlignBlock (TextBlock width height lns) =
+ let centerAlignLine ln =
+ let ln' = hPad width ln
+ (startSpaces, rest) = span isWhitespace ln'
+ endSpaces = takeWhile isWhitespace (reverse ln')
+ numSpaces = length (startSpaces ++ endSpaces)
+ startSpaces' = replicate (quot numSpaces 2) ' '
+ in startSpaces' ++ rest
+ in TextBlock width height $ map centerAlignLine lns
+
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
new file mode 100644
index 000000000..b0f4f6019
--- /dev/null
+++ b/src/Text/Pandoc/CharacterReferences.hs
@@ -0,0 +1,327 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.CharacterReferences
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing character references.
+-}
+module Text.Pandoc.CharacterReferences (
+ characterReference,
+ decodeCharacterReferences,
+ ) where
+import Data.Char ( chr )
+import Text.ParserCombinators.Parsec
+import qualified Data.Map as Map
+
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+ char '&'
+ character <- numRef <|> entity
+ char ';'
+ return character
+
+numRef :: GenParser Char st Char
+numRef = do
+ char '#'
+ num <- hexNum <|> decNum
+ return $ chr $ num
+
+hexNum :: GenParser Char st Int
+hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++)
+
+decNum :: GenParser Char st Int
+decNum = many1 digit >>= return . read
+
+entity :: GenParser Char st Char
+entity = do
+ body <- many1 alphaNum
+ return $ Map.findWithDefault '?' body entityTable
+
+-- | Convert entities in a string to characters.
+decodeCharacterReferences :: String -> String
+decodeCharacterReferences str =
+ case parse (many (characterReference <|> anyChar)) str str of
+ Left err -> error $ "\nError: " ++ show err
+ Right result -> result
+
+entityTable :: Map.Map String Char
+entityTable = Map.fromList entityTableList
+
+entityTableList :: [(String, Char)]
+entityTableList = [
+ ("quot", chr 34),
+ ("amp", chr 38),
+ ("lt", chr 60),
+ ("gt", chr 62),
+ ("nbsp", chr 160),
+ ("iexcl", chr 161),
+ ("cent", chr 162),
+ ("pound", chr 163),
+ ("curren", chr 164),
+ ("yen", chr 165),
+ ("brvbar", chr 166),
+ ("sect", chr 167),
+ ("uml", chr 168),
+ ("copy", chr 169),
+ ("ordf", chr 170),
+ ("laquo", chr 171),
+ ("not", chr 172),
+ ("shy", chr 173),
+ ("reg", chr 174),
+ ("macr", chr 175),
+ ("deg", chr 176),
+ ("plusmn", chr 177),
+ ("sup2", chr 178),
+ ("sup3", chr 179),
+ ("acute", chr 180),
+ ("micro", chr 181),
+ ("para", chr 182),
+ ("middot", chr 183),
+ ("cedil", chr 184),
+ ("sup1", chr 185),
+ ("ordm", chr 186),
+ ("raquo", chr 187),
+ ("frac14", chr 188),
+ ("frac12", chr 189),
+ ("frac34", chr 190),
+ ("iquest", chr 191),
+ ("Agrave", chr 192),
+ ("Aacute", chr 193),
+ ("Acirc", chr 194),
+ ("Atilde", chr 195),
+ ("Auml", chr 196),
+ ("Aring", chr 197),
+ ("AElig", chr 198),
+ ("Ccedil", chr 199),
+ ("Egrave", chr 200),
+ ("Eacute", chr 201),
+ ("Ecirc", chr 202),
+ ("Euml", chr 203),
+ ("Igrave", chr 204),
+ ("Iacute", chr 205),
+ ("Icirc", chr 206),
+ ("Iuml", chr 207),
+ ("ETH", chr 208),
+ ("Ntilde", chr 209),
+ ("Ograve", chr 210),
+ ("Oacute", chr 211),
+ ("Ocirc", chr 212),
+ ("Otilde", chr 213),
+ ("Ouml", chr 214),
+ ("times", chr 215),
+ ("Oslash", chr 216),
+ ("Ugrave", chr 217),
+ ("Uacute", chr 218),
+ ("Ucirc", chr 219),
+ ("Uuml", chr 220),
+ ("Yacute", chr 221),
+ ("THORN", chr 222),
+ ("szlig", chr 223),
+ ("agrave", chr 224),
+ ("aacute", chr 225),
+ ("acirc", chr 226),
+ ("atilde", chr 227),
+ ("auml", chr 228),
+ ("aring", chr 229),
+ ("aelig", chr 230),
+ ("ccedil", chr 231),
+ ("egrave", chr 232),
+ ("eacute", chr 233),
+ ("ecirc", chr 234),
+ ("euml", chr 235),
+ ("igrave", chr 236),
+ ("iacute", chr 237),
+ ("icirc", chr 238),
+ ("iuml", chr 239),
+ ("eth", chr 240),
+ ("ntilde", chr 241),
+ ("ograve", chr 242),
+ ("oacute", chr 243),
+ ("ocirc", chr 244),
+ ("otilde", chr 245),
+ ("ouml", chr 246),
+ ("divide", chr 247),
+ ("oslash", chr 248),
+ ("ugrave", chr 249),
+ ("uacute", chr 250),
+ ("ucirc", chr 251),
+ ("uuml", chr 252),
+ ("yacute", chr 253),
+ ("thorn", chr 254),
+ ("yuml", chr 255),
+ ("OElig", chr 338),
+ ("oelig", chr 339),
+ ("Scaron", chr 352),
+ ("scaron", chr 353),
+ ("Yuml", chr 376),
+ ("fnof", chr 402),
+ ("circ", chr 710),
+ ("tilde", chr 732),
+ ("Alpha", chr 913),
+ ("Beta", chr 914),
+ ("Gamma", chr 915),
+ ("Delta", chr 916),
+ ("Epsilon", chr 917),
+ ("Zeta", chr 918),
+ ("Eta", chr 919),
+ ("Theta", chr 920),
+ ("Iota", chr 921),
+ ("Kappa", chr 922),
+ ("Lambda", chr 923),
+ ("Mu", chr 924),
+ ("Nu", chr 925),
+ ("Xi", chr 926),
+ ("Omicron", chr 927),
+ ("Pi", chr 928),
+ ("Rho", chr 929),
+ ("Sigma", chr 931),
+ ("Tau", chr 932),
+ ("Upsilon", chr 933),
+ ("Phi", chr 934),
+ ("Chi", chr 935),
+ ("Psi", chr 936),
+ ("Omega", chr 937),
+ ("alpha", chr 945),
+ ("beta", chr 946),
+ ("gamma", chr 947),
+ ("delta", chr 948),
+ ("epsilon", chr 949),
+ ("zeta", chr 950),
+ ("eta", chr 951),
+ ("theta", chr 952),
+ ("iota", chr 953),
+ ("kappa", chr 954),
+ ("lambda", chr 955),
+ ("mu", chr 956),
+ ("nu", chr 957),
+ ("xi", chr 958),
+ ("omicron", chr 959),
+ ("pi", chr 960),
+ ("rho", chr 961),
+ ("sigmaf", chr 962),
+ ("sigma", chr 963),
+ ("tau", chr 964),
+ ("upsilon", chr 965),
+ ("phi", chr 966),
+ ("chi", chr 967),
+ ("psi", chr 968),
+ ("omega", chr 969),
+ ("thetasym", chr 977),
+ ("upsih", chr 978),
+ ("piv", chr 982),
+ ("ensp", chr 8194),
+ ("emsp", chr 8195),
+ ("thinsp", chr 8201),
+ ("zwnj", chr 8204),
+ ("zwj", chr 8205),
+ ("lrm", chr 8206),
+ ("rlm", chr 8207),
+ ("ndash", chr 8211),
+ ("mdash", chr 8212),
+ ("lsquo", chr 8216),
+ ("rsquo", chr 8217),
+ ("sbquo", chr 8218),
+ ("ldquo", chr 8220),
+ ("rdquo", chr 8221),
+ ("bdquo", chr 8222),
+ ("dagger", chr 8224),
+ ("Dagger", chr 8225),
+ ("bull", chr 8226),
+ ("hellip", chr 8230),
+ ("permil", chr 8240),
+ ("prime", chr 8242),
+ ("Prime", chr 8243),
+ ("lsaquo", chr 8249),
+ ("rsaquo", chr 8250),
+ ("oline", chr 8254),
+ ("frasl", chr 8260),
+ ("euro", chr 8364),
+ ("image", chr 8465),
+ ("weierp", chr 8472),
+ ("real", chr 8476),
+ ("trade", chr 8482),
+ ("alefsym", chr 8501),
+ ("larr", chr 8592),
+ ("uarr", chr 8593),
+ ("rarr", chr 8594),
+ ("darr", chr 8595),
+ ("harr", chr 8596),
+ ("crarr", chr 8629),
+ ("lArr", chr 8656),
+ ("uArr", chr 8657),
+ ("rArr", chr 8658),
+ ("dArr", chr 8659),
+ ("hArr", chr 8660),
+ ("forall", chr 8704),
+ ("part", chr 8706),
+ ("exist", chr 8707),
+ ("empty", chr 8709),
+ ("nabla", chr 8711),
+ ("isin", chr 8712),
+ ("notin", chr 8713),
+ ("ni", chr 8715),
+ ("prod", chr 8719),
+ ("sum", chr 8721),
+ ("minus", chr 8722),
+ ("lowast", chr 8727),
+ ("radic", chr 8730),
+ ("prop", chr 8733),
+ ("infin", chr 8734),
+ ("ang", chr 8736),
+ ("and", chr 8743),
+ ("or", chr 8744),
+ ("cap", chr 8745),
+ ("cup", chr 8746),
+ ("int", chr 8747),
+ ("there4", chr 8756),
+ ("sim", chr 8764),
+ ("cong", chr 8773),
+ ("asymp", chr 8776),
+ ("ne", chr 8800),
+ ("equiv", chr 8801),
+ ("le", chr 8804),
+ ("ge", chr 8805),
+ ("sub", chr 8834),
+ ("sup", chr 8835),
+ ("nsub", chr 8836),
+ ("sube", chr 8838),
+ ("supe", chr 8839),
+ ("oplus", chr 8853),
+ ("otimes", chr 8855),
+ ("perp", chr 8869),
+ ("sdot", chr 8901),
+ ("lceil", chr 8968),
+ ("rceil", chr 8969),
+ ("lfloor", chr 8970),
+ ("rfloor", chr 8971),
+ ("lang", chr 9001),
+ ("rang", chr 9002),
+ ("loz", chr 9674),
+ ("spades", chr 9824),
+ ("clubs", chr 9827),
+ ("hearts", chr 9829),
+ ("diams", chr 9830)
+ ]
diff --git a/src/Text/Pandoc/DefaultHeaders.hs b/src/Text/Pandoc/DefaultHeaders.hs
new file mode 100644
index 000000000..e9c1f17e5
--- /dev/null
+++ b/src/Text/Pandoc/DefaultHeaders.hs
@@ -0,0 +1,69 @@
+{-# LANGUAGE CPP, TemplateHaskell #-}
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.DefaultHeaders
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Default headers for Pandoc writers.
+-}
+module Text.Pandoc.DefaultHeaders (
+ defaultLaTeXHeader,
+ defaultConTeXtHeader,
+ defaultDocbookHeader,
+ defaultOpenDocumentHeader,
+ defaultS5Header,
+ defaultRTFHeader
+ ) where
+import Text.Pandoc.Writers.S5
+import System.FilePath ( (</>) )
+import Text.Pandoc.TH ( contentsOf )
+
+defaultLaTeXHeader :: String
+#ifndef __HADDOCK__
+defaultLaTeXHeader = $(contentsOf $ "data" </> "headers" </> "LaTeX.header")
+#endif
+
+defaultConTeXtHeader :: String
+#ifndef __HADDOCK__
+defaultConTeXtHeader = $(contentsOf $ "data" </> "headers" </> "ConTeXt.header")
+#endif
+
+defaultDocbookHeader :: String
+#ifndef __HADDOCK__
+defaultDocbookHeader = $(contentsOf $ "data" </> "headers" </> "Docbook.header")
+#endif
+
+defaultOpenDocumentHeader :: String
+#ifndef __HADDOCK__
+defaultOpenDocumentHeader = $(contentsOf $ "data" </> "headers" </> "OpenDocument.header")
+#endif
+
+defaultS5Header :: String
+defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript
+
+defaultRTFHeader :: String
+#ifndef __HADDOCK__
+defaultRTFHeader = $(contentsOf $ "data" </> "headers" </> "RTF.header")
+#endif
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
new file mode 100644
index 000000000..92ce094d4
--- /dev/null
+++ b/src/Text/Pandoc/Definition.hs
@@ -0,0 +1,150 @@
+{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Definition
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of 'Pandoc' data structure for format-neutral representation
+of documents.
+-}
+module Text.Pandoc.Definition where
+
+import Data.Generics
+
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data)
+
+-- | Bibliographic information for the document: title (list of 'Inline'),
+-- authors (list of strings), date (string).
+data Meta = Meta [Inline] -- title
+ [String] -- authors
+ String -- date
+ deriving (Eq, Show, Read, Typeable, Data)
+
+-- | Alignment of a table column.
+data Alignment = AlignLeft
+ | AlignRight
+ | AlignCenter
+ | AlignDefault deriving (Eq, Show, Read, Typeable, Data)
+
+-- | List attributes.
+type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
+
+-- | Style of list numbers.
+data ListNumberStyle = DefaultStyle
+ | Decimal
+ | LowerRoman
+ | UpperRoman
+ | LowerAlpha
+ | UpperAlpha deriving (Eq, Show, Read, Typeable, Data)
+
+-- | Delimiter of list numbers.
+data ListNumberDelim = DefaultDelim
+ | Period
+ | OneParen
+ | TwoParens deriving (Eq, Show, Read, Typeable, Data)
+
+-- | Attributes: identifier, classes, key-value pairs
+type Attr = (String, [String], [(String, String)])
+
+-- | Block element.
+data Block
+ = Plain [Inline] -- ^ Plain text, not a paragraph
+ | Para [Inline] -- ^ Paragraph
+ | CodeBlock Attr String -- ^ Code block (literal) with attributes
+ | RawHtml String -- ^ Raw HTML block (literal)
+ | BlockQuote [Block] -- ^ Block quote (list of blocks)
+ | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
+ -- and a list of items, each a list of blocks)
+ | BulletList [[Block]] -- ^ Bullet list (list of items, each
+ -- a list of blocks)
+ | DefinitionList [([Inline],[Block])] -- ^ Definition list
+ -- (list of items, each a pair of an inline list,
+ -- the term, and a block list)
+ | Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
+ | HorizontalRule -- ^ Horizontal rule
+ | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table,
+ -- with caption, column alignments,
+ -- relative column widths, column headers
+ -- (each a list of blocks), and rows
+ -- (each a list of lists of blocks)
+ | Null -- ^ Nothing
+ deriving (Eq, Read, Show, Typeable, Data)
+
+-- | Type of quotation marks to use in Quoted inline.
+data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data)
+
+-- | Link target (URL, title).
+type Target = (String, String)
+
+-- | Type of math element (display or inline).
+data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data)
+
+-- | Inline elements.
+data Inline
+ = Str String -- ^ Text (string)
+ | Emph [Inline] -- ^ Emphasized text (list of inlines)
+ | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
+ | Strikeout [Inline] -- ^ Strikeout text (list of inlines)
+ | Superscript [Inline] -- ^ Superscripted text (list of inlines)
+ | Subscript [Inline] -- ^ Subscripted text (list of inlines)
+ | SmallCaps [Inline] -- ^ Small caps text (list of inlines)
+ | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines)
+ | Cite [Target] [Inline] -- ^ Citation (list of inlines)
+ | Code String -- ^ Inline code (literal)
+ | Space -- ^ Inter-word space
+ | EmDash -- ^ Em dash
+ | EnDash -- ^ En dash
+ | Apostrophe -- ^ Apostrophe
+ | Ellipses -- ^ Ellipses
+ | LineBreak -- ^ Hard line break
+ | Math MathType String -- ^ TeX math (literal)
+ | TeX String -- ^ LaTeX code (literal)
+ | HtmlInline String -- ^ HTML code (literal)
+ | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
+ | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
+ -- and target
+ | Note [Block] -- ^ Footnote or endnote
+ deriving (Show, Eq, Read, Typeable, Data)
+
+-- | Applies a transformation on @a@s to matching elements in a @b@.
+processWith :: (Data a, Data b) => (a -> a) -> b -> b
+processWith f = everywhere (mkT f)
+
+-- | Like 'processWith', but with monadic transformations.
+processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b
+processWithM f = everywhereM (mkM f)
+
+-- | Runs a query on matching @a@ elements in a @c@.
+queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b]
+queryWith f = everything (++) ([] `mkQ` f)
+
+{-# DEPRECATED processPandoc "Use processWith instead" #-}
+processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc
+processPandoc = processWith
+
+{-# DEPRECATED queryPandoc "Use queryWith instead" #-}
+queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b]
+queryPandoc = queryWith
+
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
new file mode 100644
index 000000000..6726f1a42
--- /dev/null
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -0,0 +1,64 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Highlighting
+ Copyright : Copyright (C) 2008 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Exports functions for syntax highlighting.
+-}
+
+module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss ) where
+import Text.XHtml
+import Text.Pandoc.Definition
+#ifdef _HIGHLIGHTING
+import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss )
+import Data.List (find, lookup)
+import Data.Maybe (fromMaybe)
+import Data.Char (toLower)
+
+highlightHtml :: Attr -> String -> Either String Html
+highlightHtml (_, classes, keyvals) rawCode =
+ let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals
+ fmtOpts = [OptNumberFrom firstNum] ++
+ case find (`elem` ["number","numberLines","number-lines"]) classes of
+ Nothing -> []
+ Just _ -> [OptNumberLines]
+ lcLanguages = map (map toLower) languages
+ in case find (\c -> (map toLower c) `elem` lcLanguages) classes of
+ Nothing -> Left "Unknown or unsupported language"
+ Just language -> case highlightAs language rawCode of
+ Left err -> Left err
+ Right hl -> Right $ formatAsXHtml fmtOpts language hl
+
+#else
+defaultHighlightingCss :: String
+defaultHighlightingCss = ""
+
+languages :: [String]
+languages = []
+
+highlightHtml :: Attr -> String -> Either String Html
+highlightHtml _ _ = Left "Pandoc was not compiled with support for highlighting"
+#endif
diff --git a/src/Text/Pandoc/LaTeXMathML.hs b/src/Text/Pandoc/LaTeXMathML.hs
new file mode 100644
index 000000000..1eb3c23cc
--- /dev/null
+++ b/src/Text/Pandoc/LaTeXMathML.hs
@@ -0,0 +1,14 @@
+{-# LANGUAGE CPP, TemplateHaskell #-}
+-- | Definitions for use of LaTeXMathML in HTML.
+-- (See <http://math.etsu.edu/LaTeXMathML/>)
+module Text.Pandoc.LaTeXMathML ( latexMathMLScript ) where
+import Text.Pandoc.TH ( contentsOf )
+import System.FilePath ( (</>) )
+
+-- | String containing LaTeXMathML javascript.
+latexMathMLScript :: String
+#ifndef __HADDOCK__
+latexMathMLScript = "<script type=\"text/javascript\">\n" ++
+ $(contentsOf $ "data" </> "LaTeXMathML.js.comment") ++
+ $(contentsOf $ "data" </> "LaTeXMathML.js.packed") ++ "</script>\n"
+#endif
diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs
new file mode 100644
index 000000000..f9e4dd8f1
--- /dev/null
+++ b/src/Text/Pandoc/ODT.hs
@@ -0,0 +1,88 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.ODT
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for producing an ODT file from OpenDocument XML.
+-}
+module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
+import Text.Pandoc.TH ( makeZip )
+import Data.List ( find )
+import System.FilePath ( (</>), takeFileName )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Prelude hiding ( writeFile, readFile )
+import Codec.Archive.Zip
+import Control.Applicative ( (<$>) )
+import Text.ParserCombinators.Parsec
+import System.Time
+
+-- | Produce an ODT file from OpenDocument XML.
+saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced.
+ -> FilePath -- ^ Relative directory of source file.
+ -> String -- ^ OpenDocument XML contents.
+ -> IO ()
+saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do
+ let refArchive = read $(makeZip $ "data" </> "odt-styles")
+ -- handle pictures
+ let (newContents, pics) =
+ case runParser pPictures [] "OpenDocument XML contents" xml of
+ Left err -> error $ show err
+ Right x -> x
+ picEntries <- mapM (makePictureEntry sourceDirRelative) pics
+ (TOD epochTime _) <- getClockTime
+ let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
+ let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+ B.writeFile destinationODTPath $ fromArchive archive
+
+makePictureEntry :: FilePath -- ^ Relative directory of source file
+ -> (FilePath, String) -- ^ Path and new path of picture
+ -> IO Entry
+makePictureEntry sourceDirRelative (path, newPath) = do
+ entry <- readEntry [] $ sourceDirRelative </> path
+ return (entry { eRelativePath = newPath })
+
+pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
+pPictures = do
+ contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
+ pics <- getState
+ return (contents, pics)
+
+pPicture :: GenParser Char [(FilePath, String)] [Char]
+pPicture = try $ do
+ string "<draw:image xlink:href=\""
+ path <- manyTill anyChar (char '"')
+ let filename = takeFileName path
+ pics <- getState
+ newPath <- case find (\(o, _) -> o == path) pics of
+ Just (_, new) -> return new
+ Nothing -> do
+ -- get a unique name
+ let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
+ let new = "Pictures/" ++ replicate dups '0' ++ filename
+ updateState ((path, new) :)
+ return new
+ return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""
diff --git a/src/Text/Pandoc/Plugins.hs b/src/Text/Pandoc/Plugins.hs
new file mode 100644
index 000000000..cb8ad1e11
--- /dev/null
+++ b/src/Text/Pandoc/Plugins.hs
@@ -0,0 +1,69 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Pluigns
+ Copyright : Copyright (C) 2008 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Support for plugins.
+-}
+
+module Text.Pandoc.Plugins (getPlugin)
+where
+
+import Language.Haskell.Interpreter
+import Text.Pandoc
+import Control.Monad (unless, liftM)
+import Control.Monad.Error (throwError)
+import Data.List (isInfixOf)
+
+-- | Returns the function named @transform@ in the specified
+-- module. The module may be identified either by module name
+-- or by path name. The @transform@ function should have type
+-- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data':
+-- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@,
+-- @Block -> Block@, or @[Inline] -> IO [Inline]@.
+getPlugin :: String -> IO (Pandoc -> IO Pandoc)
+getPlugin modsrc = do
+ res <- runInterpreter (evaluatePlugin modsrc)
+ case res of
+ Right func -> return func
+ Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs)
+ Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x
+ Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x
+ Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x
+
+evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc)
+evaluatePlugin modsrc = do
+ set [installedModulesInScope := False]
+ loadModules [modsrc]
+ modnames <- getLoadedModules
+ setTopLevelModules modnames
+ setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"]
+ exports <- liftM concat $ mapM getModuleExports modnames
+ unless ((Fun "transform") `elem` exports) $
+ throwError $ UnknownError $ "The plugin module must define a function 'transform'."
+ transformType <- typeOf "transform"
+ if "-> IO" `isInfixOf` transformType
+ then interpret "processWithM transform" (as :: Pandoc -> IO Pandoc)
+ else interpret "return . (processWith transform)" (as :: Pandoc -> IO Pandoc)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..65e512b5e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,675 @@
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.HTML
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.HTML (
+ readHtml,
+ rawHtmlInline,
+ rawHtmlBlock,
+ anyHtmlBlockTag,
+ anyHtmlInlineTag,
+ anyHtmlTag,
+ anyHtmlEndTag,
+ htmlEndTag,
+ extractTagType,
+ htmlBlockElement,
+ unsanitaryURI
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Data.Maybe ( fromMaybe )
+import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate )
+import Data.Char ( toLower, isAlphaNum )
+import Network.URI ( parseURIReference, URI (..) )
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: ParserState -- ^ Parser state
+ -> String -- ^ String to parse
+ -> Pandoc
+readHtml = readWith parseHtml
+
+--
+-- Constants
+--
+
+eitherBlockOrInline :: [[Char]]
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object"]
+
+{-
+inlineHtmlTags :: [[Char]]
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+ "br", "cite", "code", "dfn", "em", "font", "i", "img",
+ "input", "kbd", "label", "q", "s", "samp", "select",
+ "small", "span", "strike", "strong", "sub", "sup",
+ "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
+-}
+
+blockHtmlTags :: [[Char]]
+blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
+ "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "hr", "html", "isindex", "menu", "noframes",
+ "noscript", "ol", "p", "pre", "table", "ul", "dd",
+ "dt", "frameset", "li", "tbody", "td", "tfoot",
+ "th", "thead", "tr", "script"] ++ eitherBlockOrInline
+
+sanitaryTags :: [[Char]]
+sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
+ "blockquote", "br", "button", "caption", "center",
+ "cite", "code", "col", "colgroup", "dd", "del", "dfn",
+ "dir", "div", "dl", "dt", "em", "fieldset", "font",
+ "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
+ "i", "img", "input", "ins", "kbd", "label", "legend",
+ "li", "map", "menu", "ol", "optgroup", "option", "p",
+ "pre", "q", "s", "samp", "select", "small", "span",
+ "strike", "strong", "sub", "sup", "table", "tbody",
+ "td", "textarea", "tfoot", "th", "thead", "tr", "tt",
+ "u", "ul", "var"]
+
+sanitaryAttributes :: [[Char]]
+sanitaryAttributes = ["abbr", "accept", "accept-charset",
+ "accesskey", "action", "align", "alt", "axis",
+ "border", "cellpadding", "cellspacing", "char",
+ "charoff", "charset", "checked", "cite", "class",
+ "clear", "cols", "colspan", "color", "compact",
+ "coords", "datetime", "dir", "disabled",
+ "enctype", "for", "frame", "headers", "height",
+ "href", "hreflang", "hspace", "id", "ismap",
+ "label", "lang", "longdesc", "maxlength", "media",
+ "method", "multiple", "name", "nohref", "noshade",
+ "nowrap", "prompt", "readonly", "rel", "rev",
+ "rows", "rowspan", "rules", "scope", "selected",
+ "shape", "size", "span", "src", "start",
+ "summary", "tabindex", "target", "title", "type",
+ "usemap", "valign", "value", "vspace", "width"]
+
+--
+-- HTML utility functions
+--
+
+-- | Returns @True@ if sanitization is specified and the specified tag is
+-- not on the sanitized tag list.
+unsanitaryTag :: [Char]
+ -> GenParser tok ParserState Bool
+unsanitaryTag tag = do
+ st <- getState
+ return $ stateSanitizeHTML st && tag `notElem` sanitaryTags
+
+-- | returns @True@ if sanitization is specified and the specified attribute
+-- is not on the sanitized attribute list.
+unsanitaryAttribute :: ([Char], String, t)
+ -> GenParser tok ParserState Bool
+unsanitaryAttribute (attr, val, _) = do
+ st <- getState
+ return $ stateSanitizeHTML st &&
+ (attr `notElem` sanitaryAttributes ||
+ (attr `elem` ["href","src"] && unsanitaryURI val))
+
+-- | Returns @True@ if the specified URI is potentially a security risk.
+unsanitaryURI :: String -> Bool
+unsanitaryURI u =
+ let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
+ "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
+ "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
+ "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
+ "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
+ "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
+ "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
+ "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
+ "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
+ "snews:", "webcal:", "ymsgr:"]
+ in case parseURIReference u of
+ Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes
+ Nothing -> True
+
+-- | Read blocks until end tag.
+blocksTilEnd :: String -> GenParser Char ParserState [Block]
+blocksTilEnd tag = do
+ blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+ return $ filter (/= Null) blocks
+
+-- | Read inlines until end tag.
+inlinesTilEnd :: String -> GenParser Char ParserState [Inline]
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
+
+-- | Parse blocks between open and close tag.
+blocksIn :: String -> GenParser Char ParserState [Block]
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+
+-- | Parse inlines between open and close tag.
+inlinesIn :: String -> GenParser Char ParserState [Inline]
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+
+-- | Extract type from a tag: e.g. @br@ from @\<br\>@
+extractTagType :: String -> String
+extractTagType ('<':rest) =
+ let isSpaceOrSlash c = c `elem` "/ \n\t" in
+ map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
+extractTagType _ = ""
+
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
+anyHtmlTag :: GenParser Char ParserState [Char]
+anyHtmlTag = try $ do
+ char '<'
+ spaces
+ tag <- many1 alphaNum
+ attribs <- many htmlAttribute
+ spaces
+ ender <- option "" (string "/")
+ let ender' = if null ender then "" else " /"
+ spaces
+ char '>'
+ let result = "<" ++ tag ++
+ concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
+ unsanitary <- unsanitaryTag tag
+ if unsanitary
+ then return $ "<!-- unsafe HTML removed -->"
+ else return result
+
+anyHtmlEndTag :: GenParser Char ParserState [Char]
+anyHtmlEndTag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ tag <- many1 alphaNum
+ spaces
+ char '>'
+ let result = "</" ++ tag ++ ">"
+ unsanitary <- unsanitaryTag tag
+ if unsanitary
+ then return $ "<!-- unsafe HTML removed -->"
+ else return result
+
+htmlTag :: String -> GenParser Char ParserState (String, [(String, String)])
+htmlTag tag = try $ do
+ char '<'
+ spaces
+ stringAnyCase tag
+ attribs <- many htmlAttribute
+ spaces
+ optional (string "/")
+ spaces
+ char '>'
+ return (tag, (map (\(name, content, _) -> (name, content)) attribs))
+
+-- parses a quoted html attribute value
+quoted :: Char -> GenParser Char st (String, String)
+quoted quoteChar = do
+ result <- between (char quoteChar) (char quoteChar)
+ (many (noneOf [quoteChar]))
+ return (result, [quoteChar])
+
+nullAttribute :: ([Char], [Char], [Char])
+nullAttribute = ("", "", "")
+
+htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])
+htmlAttribute = do
+ attr <- htmlRegularAttribute <|> htmlMinimizedAttribute
+ unsanitary <- unsanitaryAttribute attr
+ if unsanitary
+ then return nullAttribute
+ else return attr
+
+-- minimized boolean attribute
+htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char])
+htmlMinimizedAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ return (name, name, name)
+
+htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char])
+htmlRegularAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ spaces
+ char '='
+ spaces
+ (content, quoteStr) <- choice [ (quoted '\''),
+ (quoted '"'),
+ (do
+ a <- many (alphaNum <|> (oneOf "-._:"))
+ return (a,"")) ]
+ return (name, content,
+ (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
+
+-- | Parse an end tag of type 'tag'
+htmlEndTag :: [Char] -> GenParser Char st [Char]
+htmlEndTag tag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ stringAnyCase tag
+ spaces
+ char '>'
+ return $ "</" ++ tag ++ ">"
+
+{-
+-- | Returns @True@ if the tag is (or can be) an inline tag.
+isInline :: String -> Bool
+isInline tag = (extractTagType tag) `elem` inlineHtmlTags
+-}
+
+-- | Returns @True@ if the tag is (or can be) a block tag.
+isBlock :: String -> Bool
+isBlock tag = (extractTagType tag) `elem` blockHtmlTags
+
+anyHtmlBlockTag :: GenParser Char ParserState [Char]
+anyHtmlBlockTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isBlock tag then return tag else fail "not a block tag"
+
+anyHtmlInlineTag :: GenParser Char ParserState [Char]
+anyHtmlInlineTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if not (isBlock tag) then return tag else fail "not an inline tag"
+
+-- | Parses material between script tags.
+-- Scripts must be treated differently, because they can contain '<>' etc.
+htmlScript :: GenParser Char ParserState [Char]
+htmlScript = try $ do
+ open <- string "<script"
+ rest <- manyTill anyChar (htmlEndTag "script")
+ st <- getState
+ if stateSanitizeHTML st && not ("script" `elem` sanitaryTags)
+ then return "<!-- unsafe HTML removed -->"
+ else return $ open ++ rest ++ "</script>"
+
+-- | Parses material between style tags.
+-- Style tags must be treated differently, because they can contain CSS
+htmlStyle :: GenParser Char ParserState [Char]
+htmlStyle = try $ do
+ open <- string "<style"
+ rest <- manyTill anyChar (htmlEndTag "style")
+ st <- getState
+ if stateSanitizeHTML st && not ("style" `elem` sanitaryTags)
+ then return "<!-- unsafe HTML removed -->"
+ else return $ open ++ rest ++ "</style>"
+
+htmlBlockElement :: GenParser Char ParserState [Char]
+htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ]
+
+rawHtmlBlock :: GenParser Char ParserState Block
+rawHtmlBlock = try $ do
+ body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
+ state <- getState
+ if stateParseRaw state then return (RawHtml body) else return Null
+
+-- This is a block whose contents should be passed through verbatim, not interpreted.
+rawVerbatimBlock :: GenParser Char ParserState [Char]
+rawVerbatimBlock = try $ do
+ start <- anyHtmlBlockTag
+ let tagtype = extractTagType start
+ if tagtype `elem` ["pre"]
+ then do
+ contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
+ end <- htmlEndTag tagtype
+ return $ start ++ contents ++ end
+ else fail "Not a verbatim block"
+
+-- We don't want to parse </body> or </html> as raw HTML, since these
+-- are handled in parseHtml.
+rawHtmlBlock' :: GenParser Char ParserState Block
+rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+ rawHtmlBlock
+
+-- | Parses an HTML comment.
+htmlComment :: GenParser Char st [Char]
+htmlComment = try $ do
+ string "<!--"
+ comment <- manyTill anyChar (try (string "-->"))
+ return $ "<!--" ++ comment ++ "-->"
+
+--
+-- parsing documents
+--
+
+xmlDec :: GenParser Char st [Char]
+xmlDec = try $ do
+ string "<?"
+ rest <- manyTill anyChar (char '>')
+ return $ "<?" ++ rest ++ ">"
+
+definition :: GenParser Char st [Char]
+definition = try $ do
+ string "<!"
+ rest <- manyTill anyChar (char '>')
+ return $ "<!" ++ rest ++ ">"
+
+nonTitleNonHead :: GenParser Char ParserState Char
+nonTitleNonHead = try $ do
+ notFollowedBy $ (htmlTag "title" >> return ' ') <|>
+ (htmlEndTag "head" >> return ' ')
+ (rawHtmlBlock >> return ' ') <|> anyChar
+
+parseTitle :: GenParser Char ParserState [Inline]
+parseTitle = try $ do
+ (tag, _) <- htmlTag "title"
+ contents <- inlinesTilEnd tag
+ spaces
+ return contents
+
+-- parse header and return meta-information (for now, just title)
+parseHead :: GenParser Char ParserState ([Inline], [a], [Char])
+parseHead = try $ do
+ htmlTag "head"
+ spaces
+ skipMany nonTitleNonHead
+ contents <- option [] parseTitle
+ skipMany nonTitleNonHead
+ htmlEndTag "head"
+ return (contents, [], "")
+
+skipHtmlTag :: String -> GenParser Char ParserState ()
+skipHtmlTag tag = optional (htmlTag tag)
+
+-- h1 class="title" representation of title in body
+bodyTitle :: GenParser Char ParserState [Inline]
+bodyTitle = try $ do
+ (_, attribs) <- htmlTag "h1"
+ case (extractAttribute "class" attribs) of
+ Just "title" -> return ""
+ _ -> fail "not title"
+ inlinesTilEnd "h1"
+
+parseHtml :: GenParser Char ParserState Pandoc
+parseHtml = do
+ sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
+ skipHtmlTag "html"
+ spaces
+ (title, authors, date) <- option ([], [], "") parseHead
+ spaces
+ skipHtmlTag "body"
+ spaces
+ optional bodyTitle -- skip title in body, because it's represented in meta
+ blocks <- parseBlocks
+ spaces
+ optional (htmlEndTag "body")
+ spaces
+ optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
+ eof
+ return $ Pandoc (Meta title authors date) blocks
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
+
+block :: GenParser Char ParserState Block
+block = choice [ codeBlock
+ , header
+ , hrule
+ , list
+ , blockQuote
+ , para
+ , plain
+ , rawHtmlBlock'
+ ] <?> "block"
+
+--
+-- header blocks
+--
+
+header :: GenParser Char ParserState Block
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel :: Int -> GenParser Char ParserState Block
+headerLevel n = try $ do
+ let level = "h" ++ show n
+ htmlTag level
+ contents <- inlinesTilEnd level
+ return $ Header n (normalizeSpaces contents)
+
+--
+-- hrule block
+--
+
+hrule :: GenParser Char ParserState Block
+hrule = try $ do
+ (_, attribs) <- htmlTag "hr"
+ state <- getState
+ if not (null attribs) && stateParseRaw state
+ then unexpected "attributes in hr" -- parse as raw in this case
+ else return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
+-- skipped, because they are not portable to output formats other than HTML.
+codeBlock :: GenParser Char ParserState Block
+codeBlock = try $ do
+ htmlTag "pre"
+ result <- manyTill
+ (many1 (satisfy (/= '<')) <|>
+ ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+ (htmlEndTag "pre")
+ let result' = concat result
+ -- drop leading newline if any
+ let result'' = if "\n" `isPrefixOf` result'
+ then drop 1 result'
+ else result'
+ -- drop trailing newline if any
+ let result''' = if "\n" `isSuffixOf` result''
+ then init result''
+ else result''
+ return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result'''
+
+--
+-- block quotes
+--
+
+blockQuote :: GenParser Char ParserState Block
+blockQuote = try $ htmlTag "blockquote" >> spaces >>
+ blocksTilEnd "blockquote" >>= (return . BlockQuote)
+
+--
+-- list blocks
+--
+
+list :: GenParser Char ParserState Block
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+orderedList :: GenParser Char ParserState Block
+orderedList = try $ do
+ (_, attribs) <- htmlTag "ol"
+ (start, style) <- option (1, DefaultStyle) $
+ do failIfStrict
+ let sta = fromMaybe "1" $
+ lookup "start" attribs
+ let sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ let sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ return (read sta, sty')
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ol"
+ return $ OrderedList (start, style, DefaultDelim) items
+
+bulletList :: GenParser Char ParserState Block
+bulletList = try $ do
+ htmlTag "ul"
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ul"
+ return $ BulletList items
+
+definitionList :: GenParser Char ParserState Block
+definitionList = try $ do
+ failIfStrict -- def lists not part of standard markdown
+ htmlTag "dl"
+ spaces
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return $ DefinitionList items
+
+definitionListItem :: GenParser Char ParserState ([Inline], [Block])
+definitionListItem = try $ do
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = intercalate [LineBreak] terms
+ return (term, concat defs)
+
+--
+-- paragraph block
+--
+
+para :: GenParser Char ParserState Block
+para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
+ return . Para . normalizeSpaces
+
+--
+-- plain block
+--
+
+plain :: GenParser Char ParserState Block
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- inline
+--
+
+inline :: GenParser Char ParserState Inline
+inline = choice [ charRef
+ , strong
+ , emph
+ , superscript
+ , subscript
+ , strikeout
+ , spanStrikeout
+ , code
+ , str
+ , linebreak
+ , whitespace
+ , link
+ , image
+ , rawHtmlInline
+ ] <?> "inline"
+
+code :: GenParser Char ParserState Inline
+code = try $ do
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ -- remove internal line breaks, leading and trailing space,
+ -- and decode character references
+ return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
+ intercalate " " $ lines result
+
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = do
+ result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag
+ state <- getState
+ if stateParseRaw state then return (HtmlInline result) else return (Str "")
+
+betweenTags :: [Char] -> GenParser Char ParserState [Inline]
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+ return . normalizeSpaces
+
+emph :: GenParser Char ParserState Inline
+emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph
+
+strong :: GenParser Char ParserState Inline
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
+
+superscript :: GenParser Char ParserState Inline
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
+
+subscript :: GenParser Char ParserState Inline
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout :: GenParser Char ParserState Inline
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+ return . Strikeout
+
+spanStrikeout :: GenParser Char ParserState Inline
+spanStrikeout = try $ do
+ failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
+ (_, attributes) <- htmlTag "span"
+ result <- case (extractAttribute "class" attributes) of
+ Just "strikeout" -> inlinesTilEnd "span"
+ _ -> fail "not a strikeout"
+ return $ Strikeout result
+
+whitespace :: GenParser Char st Inline
+whitespace = many1 space >> return Space
+
+-- hard line break
+linebreak :: GenParser Char ParserState Inline
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
+
+str :: GenParser Char st Inline
+str = many1 (noneOf "<& \t\n") >>= return . Str
+
+--
+-- links and images
+--
+
+-- extract contents of attribute (attribute names are case-insensitive)
+extractAttribute :: [Char] -> [([Char], String)] -> Maybe String
+extractAttribute _ [] = Nothing
+extractAttribute name ((attrName, contents):rest) =
+ let name' = map toLower name
+ attrName' = map toLower attrName
+ in if attrName' == name'
+ then Just (decodeCharacterReferences contents)
+ else extractAttribute name rest
+
+link :: GenParser Char ParserState Inline
+link = try $ do
+ (_, attributes) <- htmlTag "a"
+ url <- case (extractAttribute "href" attributes) of
+ Just url -> return url
+ Nothing -> fail "no href"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ lab <- inlinesTilEnd "a"
+ return $ Link (normalizeSpaces lab) (url, title)
+
+image :: GenParser Char ParserState Inline
+image = try $ do
+ (_, attributes) <- htmlTag "img"
+ url <- case (extractAttribute "src" attributes) of
+ Just url -> return url
+ Nothing -> fail "no src"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ let alt = fromMaybe "" (extractAttribute "alt" attributes)
+ return $ Image [Str alt] (url, title)
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..f35ab4f29
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,774 @@
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.LaTeX (
+ readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXEnvironment'
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse
+ -> Pandoc
+readLaTeX = readWith parseLaTeX
+
+-- characters with special meaning
+specialChars :: [Char]
+specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+
+--
+-- utility functions
+--
+
+-- | Returns text between brackets and its matching pair.
+bracketedText :: Char -> Char -> GenParser Char st [Char]
+bracketedText openB closeB = do
+ result <- charsInBalanced' openB closeB
+ return $ [openB] ++ result ++ [closeB]
+
+-- | Returns an option or argument of a LaTeX command.
+optOrArg :: GenParser Char st [Char]
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+
+-- | True if the string begins with '{'.
+isArg :: [Char] -> Bool
+isArg ('{':_) = True
+isArg _ = False
+
+-- | Returns list of options and arguments of a LaTeX command.
+commandArgs :: GenParser Char st [[Char]]
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options or arguments).
+command :: GenParser Char st ([Char], [Char], [[Char]])
+command = do
+ char '\\'
+ name <- many1 letter
+ star <- option "" (string "*") -- some commands have starred versions
+ args <- commandArgs
+ return (name, star, args)
+
+begin :: [Char] -> GenParser Char st [Char]
+begin name = try $ do
+ string $ "\\begin{" ++ name ++ "}"
+ optional commandArgs
+ spaces
+ return name
+
+end :: [Char] -> GenParser Char st [Char]
+end name = try $ do
+ string $ "\\end{" ++ name ++ "}"
+ return name
+
+-- | Returns a list of block elements containing the contents of an
+-- environment.
+environment :: [Char] -> GenParser Char ParserState [Block]
+environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
+
+anyEnvironment :: GenParser Char ParserState Block
+anyEnvironment = try $ do
+ string "\\begin{"
+ name <- many letter
+ star <- option "" (string "*") -- some environments have starred variants
+ char '}'
+ optional commandArgs
+ spaces
+ contents <- manyTill block (end (name ++ star))
+ spaces
+ return $ BlockQuote contents
+
+--
+-- parsing documents
+--
+
+-- | Process LaTeX preamble, extracting metadata.
+processLaTeXPreamble :: GenParser Char ParserState ()
+processLaTeXPreamble = try $ manyTill
+ (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}")) >>
+ spaces
+
+-- | Parse LaTeX and return 'Pandoc'.
+parseLaTeX :: GenParser Char ParserState Pandoc
+parseLaTeX = do
+ optional processLaTeXPreamble -- preamble might not be present (fragment)
+ spaces
+ blocks <- parseBlocks
+ spaces
+ optional $ try (string "\\end{document}" >> many anyChar)
+ -- might not be present (fragment)
+ spaces
+ eof
+ state <- getState
+ let blocks' = filter (/= Null) blocks
+ let title' = stateTitle state
+ let authors' = stateAuthors state
+ let date' = stateDate state
+ return $ Pandoc (Meta title' authors' date') blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = spaces >> many block
+
+block :: GenParser Char ParserState Block
+block = choice [ hrule
+ , codeBlock
+ , header
+ , list
+ , blockQuote
+ , comment
+ , bibliographic
+ , para
+ , itemBlock
+ , unknownEnvironment
+ , ignore
+ , unknownCommand ] <?> "block"
+
+--
+-- header blocks
+--
+
+header :: GenParser Char ParserState Block
+header = try $ do
+ char '\\'
+ subs <- many (try (string "sub"))
+ string "section"
+ optional (char '*')
+ char '{'
+ title' <- manyTill inline (char '}')
+ spaces
+ return $ Header (length subs + 1) (normalizeSpaces title')
+
+--
+-- hrule block
+--
+
+hrule :: GenParser Char st Block
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ] >> spaces >> return HorizontalRule
+
+--
+-- code blocks
+--
+
+codeBlock :: GenParser Char ParserState Block
+codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock
+-- Note: Verbatim is from fancyvrb.
+
+codeBlockWith :: String -> GenParser Char st Block
+codeBlockWith env = try $ do
+ string ("\\begin{" ++ env ++ "}") -- don't use begin function because it
+ -- gobbles whitespace
+ optional blanklines -- we want to gobble blank lines, but not
+ -- leading space
+ contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
+ spaces
+ let classes = if env == "code" then ["haskell"] else []
+ return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
+
+lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock = do
+ failUnlessLHS
+ (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
+ return $ CodeBlock ("", ["sourceCode","haskell"], []) cont
+
+--
+-- block quotes
+--
+
+blockQuote :: GenParser Char ParserState Block
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
+ return . BlockQuote
+
+--
+-- list blocks
+--
+
+list :: GenParser Char ParserState Block
+list = bulletList <|> orderedList <|> definitionList <?> "list"
+
+listItem :: GenParser Char ParserState ([Inline], [Block])
+listItem = try $ do
+ ("item", _, args) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\s -> s {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\s -> s {stateParserContext = oldParserContext})
+ opt <- case args of
+ ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
+ parseFromString (many inline) $ tail $ init x
+ _ -> return []
+ return (opt, blocks)
+
+orderedList :: GenParser Char ParserState Block
+orderedList = try $ do
+ string "\\begin{enumerate}"
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ do failIfStrict
+ char '['
+ res <- anyOrderedListMarker
+ char ']'
+ return res
+ spaces
+ option "" $ try $ do string "\\setlength{\\itemindent}"
+ char '{'
+ manyTill anyChar (char '}')
+ spaces
+ start <- option 1 $ try $ do failIfStrict
+ string "\\setcounter{enum"
+ many1 (oneOf "iv")
+ string "}{"
+ num <- many1 digit
+ char '}'
+ spaces
+ return $ (read num) + 1
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return $ OrderedList (start, style, delim) $ map snd items
+
+bulletList :: GenParser Char ParserState Block
+bulletList = try $ do
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList $ map snd items)
+
+definitionList :: GenParser Char ParserState Block
+definitionList = try $ do
+ begin "description"
+ spaces
+ items <- many listItem
+ end "description"
+ spaces
+ return (DefinitionList items)
+
+--
+-- paragraph block
+--
+
+para :: GenParser Char ParserState Block
+para = do
+ res <- many1 inline
+ spaces
+ return $ if null (filter (`notElem` [Str "", Space]) res)
+ then Null
+ else Para $ normalizeSpaces res
+
+--
+-- title authors date
+--
+
+bibliographic :: GenParser Char ParserState Block
+bibliographic = choice [ maketitle, title, authors, date ]
+
+maketitle :: GenParser Char st Block
+maketitle = try (string "\\maketitle") >> spaces >> return Null
+
+title :: GenParser Char ParserState Block
+title = try $ do
+ string "\\title{"
+ tit <- manyTill inline (char '}')
+ spaces
+ updateState (\state -> state { stateTitle = tit })
+ return Null
+
+authors :: GenParser Char ParserState Block
+authors = try $ do
+ string "\\author{"
+ authors' <- manyTill anyChar (char '}')
+ spaces
+ let authors'' = map removeLeadingTrailingSpace $ lines $
+ substitute "\\\\" "\n" authors'
+ updateState (\s -> s { stateAuthors = authors'' })
+ return Null
+
+date :: GenParser Char ParserState Block
+date = try $ do
+ string "\\date{"
+ date' <- manyTill anyChar (char '}')
+ spaces
+ updateState (\state -> state { stateDate = date' })
+ return Null
+
+--
+-- item block
+-- for use in unknown environments that aren't being parsed as raw latex
+--
+
+-- this forces items to be parsed in different blocks
+itemBlock :: GenParser Char ParserState Block
+itemBlock = try $ do
+ ("item", _, args) <- command
+ state <- getState
+ if stateParserContext state == ListItemState
+ then fail "item should be handled by list block"
+ else if null args
+ then return Null
+ else return $ Plain [Str (stripFirstAndLast (head args))]
+
+--
+-- raw LaTeX
+--
+
+-- | Parse any LaTeX environment and return a Para block containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment :: GenParser Char st Block
+rawLaTeXEnvironment = do
+ contents <- rawLaTeXEnvironment'
+ spaces
+ return $ Para [TeX contents]
+
+-- | Parse any LaTeX environment and return a string containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment' :: GenParser Char st String
+rawLaTeXEnvironment' = try $ do
+ string "\\begin{"
+ name <- many1 letter
+ star <- option "" (string "*") -- for starred variants
+ let name' = name ++ star
+ char '}'
+ args <- option [] commandArgs
+ let argStr = concat args
+ contents <- manyTill (choice [ (many1 (noneOf "\\")),
+ rawLaTeXEnvironment',
+ string "\\" ])
+ (end name')
+ return $ "\\begin{" ++ name' ++ "}" ++ argStr ++
+ concat contents ++ "\\end{" ++ name' ++ "}"
+
+unknownEnvironment :: GenParser Char ParserState Block
+unknownEnvironment = try $ do
+ state <- getState
+ result <- if stateParseRaw state -- check whether we should include raw TeX
+ then rawLaTeXEnvironment -- if so, get whole raw environment
+ else anyEnvironment -- otherwise just the contents
+ return result
+
+-- \ignore{} is used conventionally in literate haskell for definitions
+-- that are to be processed by the compiler but not printed.
+ignore :: GenParser Char ParserState Block
+ignore = try $ do
+ ("ignore", _, _) <- command
+ spaces
+ return Null
+
+unknownCommand :: GenParser Char ParserState Block
+unknownCommand = try $ do
+ notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
+ "document"]
+ state <- getState
+ if stateParserContext state == ListItemState
+ then notFollowedBy' $ string "\\item"
+ else return ()
+ if stateParseRaw state
+ then do
+ (name, star, args) <- command
+ spaces
+ return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
+ else do -- skip unknown command, leaving arguments to be parsed
+ char '\\'
+ letter
+ many (letter <|> digit)
+ optional (try $ string "{}")
+ spaces
+ return Null
+
+-- latex comment
+comment :: GenParser Char st Block
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+
+--
+-- inline
+--
+
+inline :: GenParser Char ParserState Inline
+inline = choice [ str
+ , endline
+ , whitespace
+ , quoted
+ , apostrophe
+ , spacer
+ , strong
+ , math
+ , ellipses
+ , emDash
+ , enDash
+ , hyphen
+ , emph
+ , strikeout
+ , superscript
+ , subscript
+ , ref
+ , lab
+ , code
+ , url
+ , link
+ , image
+ , footnote
+ , linebreak
+ , accentedChar
+ , specialChar
+ , rawLaTeXInline
+ , escapedChar
+ , unescapedChar
+ ] <?> "inline"
+
+accentedChar :: GenParser Char st Inline
+accentedChar = normalAccentedChar <|> specialAccentedChar
+
+normalAccentedChar :: GenParser Char st Inline
+normalAccentedChar = try $ do
+ char '\\'
+ accent <- oneOf "'`^\"~"
+ character <- (try $ char '{' >> letter >>~ char '}') <|> letter
+ let table = fromMaybe [] $ lookup character accentTable
+ let result = case lookup accent table of
+ Just num -> chr num
+ Nothing -> '?'
+ return $ Str [result]
+
+-- an association list of letters and association list of accents
+-- and decimal character numbers.
+accentTable :: [(Char, [(Char, Int)])]
+accentTable =
+ [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
+ ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
+ ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
+ ('N', [('~', 209)]),
+ ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
+ ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
+ ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
+ ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
+ ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
+ ('n', [('~', 241)]),
+ ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
+ ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
+
+specialAccentedChar :: GenParser Char st Inline
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+ oslash, pound, euro, copyright, sect ]
+
+ccedil :: GenParser Char st Inline
+ccedil = try $ do
+ char '\\'
+ letter' <- oneOfStrings ["cc", "cC"]
+ let num = if letter' == "cc" then 231 else 199
+ return $ Str [chr num]
+
+aring :: GenParser Char st Inline
+aring = try $ do
+ char '\\'
+ letter' <- oneOfStrings ["aa", "AA"]
+ let num = if letter' == "aa" then 229 else 197
+ return $ Str [chr num]
+
+iuml :: GenParser Char st Inline
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 239])
+
+szlig :: GenParser Char st Inline
+szlig = try (string "\\ss") >> return (Str [chr 223])
+
+oslash :: GenParser Char st Inline
+oslash = try $ do
+ char '\\'
+ letter' <- choice [char 'o', char 'O']
+ let num = if letter' == 'o' then 248 else 216
+ return $ Str [chr num]
+
+aelig :: GenParser Char st Inline
+aelig = try $ do
+ char '\\'
+ letter' <- oneOfStrings ["ae", "AE"]
+ let num = if letter' == "ae" then 230 else 198
+ return $ Str [chr num]
+
+pound :: GenParser Char st Inline
+pound = try (string "\\pounds") >> return (Str [chr 163])
+
+euro :: GenParser Char st Inline
+euro = try (string "\\euro") >> return (Str [chr 8364])
+
+copyright :: GenParser Char st Inline
+copyright = try (string "\\copyright") >> return (Str [chr 169])
+
+sect :: GenParser Char st Inline
+sect = try (string "\\S") >> return (Str [chr 167])
+
+escapedChar :: GenParser Char st Inline
+escapedChar = do
+ result <- escaped (oneOf " $%&_#{}\n")
+ return $ if result == Str "\n" then Str " " else result
+
+-- nonescaped special characters
+unescapedChar :: GenParser Char st Inline
+unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c])
+
+specialChar :: GenParser Char st Inline
+specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
+
+backslash :: GenParser Char st Inline
+backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
+
+tilde :: GenParser Char st Inline
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
+
+caret :: GenParser Char st Inline
+caret = try (string "\\^{}") >> return (Str "^")
+
+bar :: GenParser Char st Inline
+bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
+
+lt :: GenParser Char st Inline
+lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
+
+gt :: GenParser Char st Inline
+gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
+
+doubleQuote :: GenParser Char st Inline
+doubleQuote = char '"' >> return (Str "\"")
+
+code :: GenParser Char ParserState Inline
+code = code1 <|> code2 <|> lhsInlineCode
+
+code1 :: GenParser Char st Inline
+code1 = try $ do
+ string "\\verb"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ return $ Code $ removeLeadingTrailingSpace result
+
+code2 :: GenParser Char st Inline
+code2 = try $ do
+ string "\\texttt{"
+ result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
+ return $ Code result
+
+lhsInlineCode :: GenParser Char ParserState Inline
+lhsInlineCode = try $ do
+ failUnlessLHS
+ char '|'
+ result <- manyTill (noneOf "|\n") (char '|')
+ return $ Code result
+
+emph :: GenParser Char ParserState Inline
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+ manyTill inline (char '}') >>= return . Emph
+
+strikeout :: GenParser Char ParserState Inline
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+ return . Strikeout
+
+superscript :: GenParser Char ParserState Inline
+superscript = try $ string "\\textsuperscript{" >>
+ manyTill inline (char '}') >>= return . Superscript
+
+-- note: \textsubscript isn't a standard latex command, but we use
+-- a defined version in pandoc.
+subscript :: GenParser Char ParserState Inline
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+ return . Subscript
+
+apostrophe :: GenParser Char ParserState Inline
+apostrophe = char '\'' >> return Apostrophe
+
+quoted :: GenParser Char ParserState Inline
+quoted = doubleQuoted <|> singleQuoted
+
+singleQuoted :: GenParser Char ParserState Inline
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted :: GenParser Char ParserState Inline
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+singleQuoteStart :: GenParser Char st Char
+singleQuoteStart = char '`'
+
+singleQuoteEnd :: GenParser Char st ()
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
+
+doubleQuoteStart :: CharParser st String
+doubleQuoteStart = string "``"
+
+doubleQuoteEnd :: CharParser st String
+doubleQuoteEnd = try $ string "''"
+
+ellipses :: GenParser Char st Inline
+ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >>
+ return Ellipses
+
+enDash :: GenParser Char st Inline
+enDash = try (string "--") >> return EnDash
+
+emDash :: GenParser Char st Inline
+emDash = try (string "---") >> return EmDash
+
+hyphen :: GenParser Char st Inline
+hyphen = char '-' >> return (Str "-")
+
+lab :: GenParser Char st Inline
+lab = try $ do
+ string "\\label{"
+ result <- manyTill anyChar (char '}')
+ return $ Str $ "(" ++ result ++ ")"
+
+ref :: GenParser Char st Inline
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
+
+strong :: GenParser Char ParserState Inline
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+ return . Strong
+
+whitespace :: GenParser Char st Inline
+whitespace = many1 (oneOf "~ \t") >> return Space
+
+-- hard line break
+linebreak :: GenParser Char st Inline
+linebreak = try (string "\\\\") >> return LineBreak
+
+spacer :: GenParser Char st Inline
+spacer = try (string "\\,") >> return (Str "")
+
+str :: GenParser Char st Inline
+str = many1 (noneOf specialChars) >>= return . Str
+
+-- endline internal to paragraph
+endline :: GenParser Char st Inline
+endline = try $ newline >> notFollowedBy blankline >> return Space
+
+-- math
+math :: GenParser Char st Inline
+math = (math3 >>= return . Math DisplayMath)
+ <|> (math1 >>= return . Math InlineMath)
+ <|> (math2 >>= return . Math InlineMath)
+ <|> (math4 >>= return . Math DisplayMath)
+ <|> (math5 >>= return . Math DisplayMath)
+ <|> (math6 >>= return . Math DisplayMath)
+ <?> "math"
+
+math1 :: GenParser Char st String
+math1 = try $ char '$' >> manyTill anyChar (char '$')
+
+math2 :: GenParser Char st String
+math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
+
+math3 :: GenParser Char st String
+math3 = try $ char '$' >> math1 >>~ char '$'
+
+math4 :: GenParser Char st String
+math4 = try $ do
+ name <- begin "equation" <|> begin "equation*" <|> begin "displaymath" <|> begin "displaymath*"
+ spaces
+ manyTill anyChar (end name)
+
+math5 :: GenParser Char st String
+math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
+
+math6 :: GenParser Char st String
+math6 = try $ do
+ name <- begin "eqnarray" <|> begin "eqnarray*"
+ spaces
+ res <- manyTill anyChar (end name)
+ return $ filter (/= '&') res -- remove eqnarray alignment codes
+
+--
+-- links and images
+--
+
+url :: GenParser Char ParserState Inline
+url = try $ do
+ string "\\url"
+ url' <- charsInBalanced '{' '}'
+ return $ Link [Code url'] (url', "")
+
+link :: GenParser Char ParserState Inline
+link = try $ do
+ string "\\href{"
+ url' <- manyTill anyChar (char '}')
+ char '{'
+ label' <- manyTill inline (char '}')
+ return $ Link (normalizeSpaces label') (url', "")
+
+image :: GenParser Char ParserState Inline
+image = try $ do
+ ("includegraphics", _, args) <- command
+ let args' = filter isArg args -- filter out options
+ let src = if null args' then
+ ("", "")
+ else
+ (stripFirstAndLast (head args'), "")
+ return $ Image [Str "image"] src
+
+footnote :: GenParser Char ParserState Inline
+footnote = try $ do
+ (name, _, (contents:[])) <- command
+ if ((name == "footnote") || (name == "thanks"))
+ then string ""
+ else fail "not a footnote or thanks command"
+ let contents' = stripFirstAndLast contents
+ -- parse the extracted block, which may contain various block elements:
+ rest <- getInput
+ setInput $ contents'
+ blocks <- parseBlocks
+ setInput rest
+ return $ Note blocks
+
+-- | Parse any LaTeX command and return it in a raw TeX inline element.
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = try $ do
+ notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"]
+ state <- getState
+ if stateParseRaw state
+ then do
+ (name, star, args) <- command
+ return $ TeX ("\\" ++ name ++ star ++ concat args)
+ else do -- skip unknown command, leaving arguments to be parsed
+ char '\\'
+ letter
+ many (letter <|> digit)
+ optional (try $ string "{}")
+ return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..896f5832e
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,1243 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Markdown
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown
+ ) where
+
+import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate )
+import Data.Ord ( comparing )
+import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper )
+import Data.Maybe
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
+ anyHtmlInlineTag, anyHtmlTag,
+ anyHtmlEndTag, htmlEndTag, extractTagType,
+ htmlBlockElement, unsanitaryURI )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.ParserCombinators.Parsec
+import Control.Monad (when)
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars :: [Char]
+spaceChars = " \t"
+
+bulletListMarkers :: [Char]
+bulletListMarkers = "*+-"
+
+hruleChars :: [Char]
+hruleChars = "*-_"
+
+setextHChars :: [Char]
+setextHChars = "=-"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
+
+--
+-- auxiliary functions
+--
+
+indentSpaces :: GenParser Char ParserState [Char]
+indentSpaces = try $ do
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count tabStop (char ' ')) <|>
+ (many (char ' ') >> string "\t") <?> "indentation"
+
+nonindentSpaces :: GenParser Char ParserState [Char]
+nonindentSpaces = do
+ state <- getState
+ let tabStop = stateTabStop state
+ sps <- many (char ' ')
+ if length sps < tabStop
+ then return sps
+ else unexpected "indented line"
+
+-- | Fail unless we're at beginning of a line.
+failUnlessBeginningOfLine :: GenParser tok st ()
+failUnlessBeginningOfLine = do
+ pos <- getPosition
+ if sourceColumn pos == 1 then return () else fail "not beginning of line"
+
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart :: GenParser tok ParserState ()
+failUnlessSmart = do
+ state <- getState
+ if stateSmart state then return () else fail "Smart typography feature"
+
+-- | Parse a sequence of inline elements between square brackets,
+-- including inlines between balanced pairs of square brackets.
+inlinesInBalancedBrackets :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState [Inline]
+inlinesInBalancedBrackets parser = try $ do
+ char '['
+ result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
+ if res == "["
+ then return ()
+ else pzero
+ bal <- inlinesInBalancedBrackets parser
+ return $ [Str "["] ++ bal ++ [Str "]"])
+ <|> (count 1 parser))
+ (char ']')
+ return $ concat result
+
+--
+-- document structure
+--
+
+titleLine :: GenParser Char ParserState [Inline]
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
+
+authorsLine :: GenParser Char st [String]
+authorsLine = try $ do
+ char '%'
+ skipSpaces
+ authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+ newline
+ return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
+
+dateLine :: GenParser Char st String
+dateLine = try $ do
+ char '%'
+ skipSpaces
+ date <- many (noneOf "\n")
+ newline
+ return $ decodeCharacterReferences $ removeTrailingSpace date
+
+titleBlock :: GenParser Char ParserState ([Inline], [String], [Char])
+titleBlock = try $ do
+ failIfStrict
+ title <- option [] titleLine
+ author <- option [] authorsLine
+ date <- option "" dateLine
+ optional blanklines
+ return (title, author, date)
+
+parseMarkdown :: GenParser Char ParserState Pandoc
+parseMarkdown = do
+ -- markdown allows raw HTML
+ updateState (\state -> state { stateParseRaw = True })
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
+ return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ -- go through again for notes unless strict...
+ if stateStrict st
+ then return ()
+ else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
+ return . concat
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ setInput docMinusNotes
+ setPosition startPos
+ -- now parse it for real...
+ (title, author, date) <- option ([],[],"") titleBlock
+ blocks <- parseBlocks
+ return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+
+--
+-- initial pass for references and notes
+--
+
+referenceKey :: GenParser Char ParserState [Char]
+referenceKey = try $ do
+ startPos <- getPosition
+ nonindentSpaces
+ lab <- reference
+ char ':'
+ skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
+ let sourceURL excludes = many $
+ optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' '))
+ src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ tit <- option "" referenceTitle
+ blanklines
+ endPos <- getPosition
+ let newkey = (lab, (intercalate "%20" $ words $ removeTrailingSpace src, tit))
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \s -> s { stateKeys = newkey : oldkeys }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+referenceTitle :: GenParser Char st String
+referenceTitle = try $ do
+ skipSpaces >> optional newline >> skipSpaces
+ tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
+ <|> do delim <- char '\'' <|> char '"'
+ manyTill anyChar (try (char delim >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
+
+noteMarker :: GenParser Char st [Char]
+noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
+
+rawLine :: GenParser Char ParserState [Char]
+rawLine = do
+ notFollowedBy blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
+ return $ contents ++ end
+
+rawLines :: GenParser Char ParserState [Char]
+rawLines = many1 rawLine >>= return . concat
+
+noteBlock :: GenParser Char ParserState [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ ref <- noteMarker
+ char ':'
+ optional blankline
+ optional indentSpaces
+ raw <- sepBy rawLines (try (blankline >> indentSpaces))
+ optional blanklines
+ endPos <- getPosition
+ -- parse the extracted text, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ let newnote = (ref, contents)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = manyTill block eof
+
+block :: GenParser Char ParserState Block
+block = do
+ st <- getState
+ choice (if stateStrict st
+ then [ header
+ , codeBlockIndented
+ , blockQuote
+ , hrule
+ , bulletList
+ , orderedList
+ , htmlBlock
+ , para
+ , plain
+ , nullBlock ]
+ else [ codeBlockDelimited
+ , header
+ , table
+ , codeBlockIndented
+ , lhsCodeBlock
+ , blockQuote
+ , hrule
+ , bulletList
+ , orderedList
+ , definitionList
+ , para
+ , rawHtmlBlocks
+ , plain
+ , nullBlock ]) <?> "block"
+
+--
+-- header blocks
+--
+
+header :: GenParser Char ParserState Block
+header = setextHeader <|> atxHeader <?> "header"
+
+atxHeader :: GenParser Char ParserState Block
+atxHeader = try $ do
+ level <- many1 (char '#') >>= return . length
+ notFollowedBy (char '.' <|> char ')') -- this would be a list
+ skipSpaces
+ text <- manyTill inline atxClosing >>= return . normalizeSpaces
+ return $ Header level text
+
+atxClosing :: GenParser Char st [Char]
+atxClosing = try $ skipMany (char '#') >> blanklines
+
+setextHeader :: GenParser Char ParserState Block
+setextHeader = try $ do
+ text <- many1Till inline newline
+ underlineChar <- oneOf setextHChars
+ many (char underlineChar)
+ blanklines
+ let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
+ return $ Header level (normalizeSpaces text)
+
+--
+-- hrule block
+--
+
+hrule :: GenParser Char st Block
+hrule = try $ do
+ skipSpaces
+ start <- oneOf hruleChars
+ count 2 (skipSpaces >> char start)
+ skipMany (oneOf spaceChars <|> char start)
+ newline
+ optional blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine :: GenParser Char ParserState [Char]
+indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
+
+codeBlockDelimiter :: Maybe Int
+ -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])]))
+codeBlockDelimiter len = try $ do
+ size <- case len of
+ Just l -> count l (char '~') >> many (char '~') >> return l
+ Nothing -> count 3 (char '~') >> many (char '~') >>=
+ return . (+ 3) . length
+ many spaceChar
+ attr <- option ([],[],[]) attributes
+ blankline
+ return (size, attr)
+
+attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attributes = try $ do
+ char '{'
+ many spaceChar
+ attrs <- many (attribute >>~ many spaceChar)
+ char '}'
+ let (ids, classes, keyvals) = unzip3 attrs
+ let id' = if null ids then "" else head ids
+ return (id', concat classes, concat keyvals)
+
+attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
+attribute = identifierAttr <|> classAttr <|> keyValAttr
+
+identifier :: GenParser Char st [Char]
+identifier = do
+ first <- letter
+ rest <- many alphaNum
+ return (first:rest)
+
+identifierAttr :: GenParser Char st ([Char], [a], [a1])
+identifierAttr = try $ do
+ char '#'
+ result <- identifier
+ return (result,[],[])
+
+classAttr :: GenParser Char st ([Char], [[Char]], [a])
+classAttr = try $ do
+ char '.'
+ result <- identifier
+ return ("",[result],[])
+
+keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
+keyValAttr = try $ do
+ key <- identifier
+ char '='
+ char '"'
+ val <- manyTill (noneOf "\n") (char '"')
+ return ("",[],[(key,val)])
+
+codeBlockDelimited :: GenParser Char st Block
+codeBlockDelimited = try $ do
+ (size, attr) <- codeBlockDelimiter Nothing
+ contents <- manyTill anyLine (codeBlockDelimiter (Just size))
+ blanklines
+ return $ CodeBlock attr $ intercalate "\n" contents
+
+codeBlockIndented :: GenParser Char ParserState Block
+codeBlockIndented = do
+ contents <- many1 (indentedLine <|>
+ try (do b <- blanklines
+ l <- indentedLine
+ return $ b ++ l))
+ optional blanklines
+ return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents
+
+lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock = do
+ failUnlessLHS
+ contents <- lhsCodeBlockBird <|> lhsCodeBlockLaTeX
+ return $ CodeBlock ("",["sourceCode","haskell"],[]) contents
+
+lhsCodeBlockLaTeX :: GenParser Char ParserState String
+lhsCodeBlockLaTeX = try $ do
+ string "\\begin{code}"
+ manyTill spaceChar newline
+ contents <- many1Till anyChar (try $ string "\\end{code}")
+ blanklines
+ return $ stripTrailingNewlines contents
+
+lhsCodeBlockBird :: GenParser Char ParserState String
+lhsCodeBlockBird = try $ do
+ pos <- getPosition
+ when (sourceColumn pos /= 1) $ fail "Not in first column"
+ lns <- many1 birdTrackLine
+ -- if (as is normal) there is always a space after >, drop it
+ let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
+ then map (drop 1) lns
+ else lns
+ blanklines
+ return $ intercalate "\n" lns'
+
+birdTrackLine :: GenParser Char st [Char]
+birdTrackLine = do
+ char '>'
+ manyTill anyChar newline
+
+
+--
+-- block quotes
+--
+
+emailBlockQuoteStart :: GenParser Char ParserState Char
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
+
+emailBlockQuote :: GenParser Char ParserState [[Char]]
+emailBlockQuote = try $ do
+ emailBlockQuoteStart
+ raw <- sepBy (many (nonEndline <|>
+ (try (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n'))))
+ (try (newline >> emailBlockQuoteStart))
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
+
+blockQuote :: GenParser Char ParserState Block
+blockQuote = do
+ raw <- emailBlockQuote
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+bulletListStart :: GenParser Char ParserState ()
+bulletListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy' hrule -- because hrules start out just like lists
+ oneOf bulletListMarkers
+ spaceChar
+ skipSpaces
+
+anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim)
+anyOrderedListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy $ string "p." >> spaceChar >> digit -- page number
+ state <- getState
+ if stateStrict state
+ then do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim)
+ else do (num, style, delim) <- anyOrderedListMarker
+ -- if it could be an abbreviated first name, insist on more than one space
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper))
+ else spaceChar
+ skipSpaces
+ return (num, style, delim)
+
+listStart :: GenParser Char ParserState ()
+listStart = bulletListStart <|> (anyOrderedListStart >> return ())
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine :: GenParser Char ParserState [Char]
+listLine = try $ do
+ notFollowedBy' listStart
+ notFollowedBy blankline
+ notFollowedBy' (do indentSpaces
+ many (spaceChar)
+ listStart)
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: GenParser Char ParserState [Char]
+rawListItem = try $ do
+ listStart
+ result <- many1 listLine
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+-- continuation of a list item - indented and separated by blankline
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation :: GenParser Char ParserState [Char]
+listContinuation = try $ do
+ lookAhead indentSpaces
+ result <- many1 listContinuationLine
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+listContinuationLine :: GenParser Char ParserState [Char]
+listContinuationLine = try $ do
+ notFollowedBy blankline
+ notFollowedBy' listStart
+ optional indentSpaces
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+listItem :: GenParser Char ParserState [Block]
+listItem = try $ do
+ first <- rawListItem
+ continuations <- many listContinuation
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may contain various block elements:
+ let raw = concat (first:continuations)
+ contents <- parseFromString parseBlocks raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return contents
+
+orderedList :: GenParser Char ParserState Block
+orderedList = try $ do
+ (start, style, delim) <- lookAhead anyOrderedListStart
+ items <- many1 listItem
+ return $ OrderedList (start, style, delim) $ compactify items
+
+bulletList :: GenParser Char ParserState Block
+bulletList = try $ do
+ lookAhead bulletListStart
+ many1 listItem >>= return . BulletList . compactify
+
+-- definition lists
+
+definitionListItem :: GenParser Char ParserState ([Inline], [Block])
+definitionListItem = try $ do
+ notFollowedBy blankline
+ notFollowedBy' indentSpaces
+ -- first, see if this has any chance of being a definition list:
+ lookAhead (anyLine >> char ':')
+ term <- manyTill inline newline
+ raw <- many1 defRawBlock
+ state <- getState
+ let oldContext = stateParserContext state
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ concat raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return ((normalizeSpaces term), contents)
+
+defRawBlock :: GenParser Char ParserState [Char]
+defRawBlock = try $ do
+ char ':'
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
+ firstline <- anyLine
+ rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
+ trailing <- option "" blanklines
+ return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
+
+definitionList :: GenParser Char ParserState Block
+definitionList = do
+ items <- many1 definitionListItem
+ let (terms, defs) = unzip items
+ let defs' = compactify defs
+ let items' = zip terms defs'
+ return $ DefinitionList items'
+
+--
+-- paragraph block
+--
+
+isHtmlOrBlank :: Inline -> Bool
+isHtmlOrBlank (HtmlInline _) = True
+isHtmlOrBlank (Space) = True
+isHtmlOrBlank (LineBreak) = True
+isHtmlOrBlank _ = False
+
+para :: GenParser Char ParserState Block
+para = try $ do
+ result <- many1 inline
+ if all isHtmlOrBlank result
+ then fail "treat as raw HTML"
+ else return ()
+ newline
+ blanklines <|> do st <- getState
+ if stateStrict st
+ then lookAhead (blockQuote <|> header) >> return ""
+ else pzero
+ return $ Para $ normalizeSpaces result
+
+plain :: GenParser Char ParserState Block
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- raw html
+--
+
+htmlElement :: GenParser Char ParserState [Char]
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+
+htmlBlock :: GenParser Char ParserState Block
+htmlBlock = try $ do
+ failUnlessBeginningOfLine
+ first <- htmlElement
+ finalSpace <- many (oneOf spaceChars)
+ finalNewlines <- many newline
+ return $ RawHtml $ first ++ finalSpace ++ finalNewlines
+
+-- True if tag is self-closing
+isSelfClosing :: [Char] -> Bool
+isSelfClosing tag =
+ isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+
+strictHtmlBlock :: GenParser Char ParserState [Char]
+strictHtmlBlock = try $ do
+ tag <- anyHtmlBlockTag
+ let tag' = extractTagType tag
+ if isSelfClosing tag || tag' == "hr"
+ then return tag
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ (htmlElement <|> (count 1 anyChar)))
+ end <- htmlEndTag tag'
+ return $ tag ++ concat contents ++ end
+
+rawHtmlBlocks :: GenParser Char ParserState Block
+rawHtmlBlocks = do
+ htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
+ sps <- do sp1 <- many spaceChar
+ sp2 <- option "" (blankline >> return "\n")
+ sp3 <- many spaceChar
+ sp4 <- option "" blanklines
+ return $ sp1 ++ sp2 ++ sp3 ++ sp4
+ -- note: we want raw html to be able to
+ -- precede a code block, when separated
+ -- by a blank line
+ return $ blk ++ sps
+ let combined = concat htmlBlocks
+ let combined' = if last combined == '\n' then init combined else combined
+ return $ RawHtml combined'
+
+--
+-- Tables
+--
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine :: Char
+ -> GenParser Char st (Int, Int)
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
+
+-- Parse a table header with dashed lines of '-' preceded by
+-- one line of text.
+simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
+simpleTableHeader = try $ do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines') = unzip dashes
+ let indices = scanl (+) (length initSp) lines'
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return (rawHeads, aligns, indices)
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter :: GenParser Char ParserState [Char]
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep :: GenParser Char ParserState String
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine :: [Int]
+ -> GenParser Char ParserState [String]
+rawTableLine indices = do
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine :: [Int]
+ -> GenParser Char ParserState [[Block]]
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow :: [Int]
+ -> GenParser Char ParserState [[Block]]
+multilineRow indices = do
+ colLines <- many1 (rawTableLine indices)
+ optional blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- Parses a table caption: inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption :: GenParser Char ParserState [Inline]
+tableCaption = try $ do
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> ([Int] -> GenParser Char ParserState [[Block]])
+ -> GenParser Char ParserState end
+ -> GenParser Char ParserState Block
+tableWith headerParser lineParser footerParser = try $ do
+ (rawHeads, aligns, indices) <- headerParser
+ lines' <- many1Till (lineParser indices) footerParser
+ caption <- option [] tableCaption
+ heads <- mapM (parseFromString (many plain)) rawHeads
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines'
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable :: GenParser Char ParserState Block
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- Parse a multiline table: starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable :: GenParser Char ParserState Block
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int])
+multilineTableHeader = try $ do
+ tableSep
+ rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines') = unzip dashes
+ let indices = scanl (+) (length initSp) lines'
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (intercalate " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String]
+ -> Int
+ -> Alignment
+alignType [] _ = AlignDefault
+alignType strLst len =
+ let s = head $ sortBy (comparing length) $
+ map removeTrailingSpace strLst
+ leftSpace = if null s then False else (s !! 0) `elem` " \t"
+ rightSpace = length s < len || (s !! (len - 1)) `elem` " \t"
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
+table :: GenParser Char ParserState Block
+table = simpleTable <|> multilineTable <?> "table"
+
+--
+-- inline
+--
+
+inline :: GenParser Char ParserState Inline
+inline = choice inlineParsers <?> "inline"
+
+inlineParsers :: [GenParser Char ParserState Inline]
+inlineParsers = [ abbrev
+ , str
+ , smartPunctuation
+ , whitespace
+ , endline
+ , code
+ , charRef
+ , strong
+ , emph
+ , note
+ , inlineNote
+ , link
+#ifdef _CITEPROC
+ , inlineCitation
+#endif
+ , image
+ , math
+ , strikeout
+ , superscript
+ , subscript
+ , autoLink
+ , rawHtmlInline'
+ , rawLaTeXInline'
+ , escapedChar
+ , symbol
+ , ltSign ]
+
+inlineNonLink :: GenParser Char ParserState Inline
+inlineNonLink = (choice $
+ map (\parser -> try (parser >>= failIfLink)) inlineParsers)
+ <?> "inline (non-link)"
+
+failIfLink :: Inline -> GenParser tok st Inline
+failIfLink (Link _ _) = pzero
+failIfLink elt = return elt
+
+escapedChar :: GenParser Char ParserState Inline
+escapedChar = do
+ char '\\'
+ state <- getState
+ result <- option '\\' $ if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
+ let result' = if result == ' '
+ then '\160' -- '\ ' is a nonbreaking space
+ else result
+ return $ Str [result']
+
+ltSign :: GenParser Char ParserState Inline
+ltSign = do
+ st <- getState
+ if stateStrict st
+ then char '<'
+ else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+ return $ Str ['<']
+
+specialCharsMinusLt :: [Char]
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol :: GenParser Char ParserState Inline
+symbol = do
+ result <- oneOf specialCharsMinusLt
+ return $ Str [result]
+
+-- parses inline code, between n `s and n `s
+code :: GenParser Char ParserState Inline
+code = try $ do
+ starts <- many1 (char '`')
+ skipSpaces
+ result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (char '\n' >> return " "))
+ (try (skipSpaces >> count (length starts) (char '`') >>
+ notFollowedBy (char '`')))
+ return $ Code $ removeLeadingTrailingSpace $ concat result
+
+mathWord :: GenParser Char st [Char]
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+ (try (char '\\') >>~ notFollowedBy (char '$')))
+
+math :: GenParser Char ParserState Inline
+math = (mathDisplay >>= return . Math DisplayMath)
+ <|> (mathInline >>= return . Math InlineMath)
+
+mathDisplay :: GenParser Char ParserState String
+mathDisplay = try $ do
+ failIfStrict
+ string "$$"
+ many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$")
+
+mathInline :: GenParser Char ParserState String
+mathInline = try $ do
+ failIfStrict
+ char '$'
+ notFollowedBy space
+ words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
+ char '$'
+ notFollowedBy digit
+ return $ intercalate " " words'
+
+emph :: GenParser Char ParserState Inline
+emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|>
+ (enclosed (char '_') (notFollowedBy' strong >> char '_' >>
+ notFollowedBy alphaNum) inline)) >>=
+ return . Emph . normalizeSpaces
+
+strong :: GenParser Char ParserState Inline
+strong = ((enclosed (string "**") (try $ string "**") inline) <|>
+ (enclosed (string "__") (try $ string "__") inline)) >>=
+ return . Strong . normalizeSpaces
+
+strikeout :: GenParser Char ParserState Inline
+strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
+ return . Strikeout . normalizeSpaces
+
+superscript :: GenParser Char ParserState Inline
+superscript = failIfStrict >> enclosed (char '^') (char '^')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Superscript
+
+subscript :: GenParser Char ParserState Inline
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Subscript
+
+abbrev :: GenParser Char ParserState Inline
+abbrev = failUnlessSmart >>
+ (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160")
+
+-- an string of letters followed by a period that does not end a sentence
+-- is assumed to be an abbreviation. It is assumed that sentences don't
+-- start with lowercase letters or numerals.
+assumedAbbrev :: GenParser Char ParserState [Char]
+assumedAbbrev = try $ do
+ result <- many1 $ satisfy isAlpha
+ string ". "
+ lookAhead $ satisfy (\x -> isLower x || isDigit x)
+ return result
+
+-- these strings are treated as abbreviations even if they are followed
+-- by a capital letter (such as a name).
+knownAbbrev :: GenParser Char ParserState [Char]
+knownAbbrev = try $ do
+ result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen",
+ "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs",
+ "Sen", "Rep", "Pres", "Hon", "Rev" ]
+ string ". "
+ return result
+
+smartPunctuation :: GenParser Char ParserState Inline
+smartPunctuation = failUnlessSmart >>
+ choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe :: GenParser Char ParserState Inline
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted :: GenParser Char ParserState Inline
+quoted = doubleQuoted <|> singleQuoted
+
+withQuoteContext :: QuoteContext
+ -> (GenParser Char ParserState Inline)
+ -> GenParser Char ParserState Inline
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+singleQuoted :: GenParser Char ParserState Inline
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted :: GenParser Char ParserState Inline
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
+failIfInQuoteContext context = do
+ st <- getState
+ if stateQuoteContext st == context
+ then fail "already inside quotes"
+ else return ()
+
+singleQuoteStart :: GenParser Char ParserState Char
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ char '\8216' <|>
+ (try $ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return '\'')
+
+singleQuoteEnd :: GenParser Char st Char
+singleQuoteEnd = try $ do
+ char '\8217' <|> char '\''
+ notFollowedBy alphaNum
+ return '\''
+
+doubleQuoteStart :: GenParser Char ParserState Char
+doubleQuoteStart = do
+ failIfInQuoteContext InDoubleQuote
+ char '\8220' <|>
+ (try $ do char '"'
+ notFollowedBy (oneOf " \t\n")
+ return '"')
+
+doubleQuoteEnd :: GenParser Char st Char
+doubleQuoteEnd = char '\8221' <|> char '"'
+
+ellipses :: GenParser Char st Inline
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+
+dash :: GenParser Char st Inline
+dash = enDash <|> emDash
+
+enDash :: GenParser Char st Inline
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+
+emDash :: GenParser Char st Inline
+emDash = oneOfStrings ["---", "--"] >> return EmDash
+
+whitespace :: GenParser Char ParserState Inline
+whitespace = do
+ sps <- many1 (oneOf spaceChars)
+ if length sps >= 2
+ then option Space (endline >> return LineBreak)
+ else return Space <?> "whitespace"
+
+nonEndline :: GenParser Char st Char
+nonEndline = satisfy (/='\n')
+
+strChar :: GenParser Char st Char
+strChar = noneOf (specialChars ++ spaceChars ++ "\n")
+
+str :: GenParser Char st Inline
+str = many1 strChar >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: GenParser Char ParserState Inline
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ st <- getState
+ if stateStrict st
+ then do notFollowedBy emailBlockQuoteStart
+ notFollowedBy (char '#') -- atx header
+ else return ()
+ -- parse potential list-starts differently if in a list:
+ if stateParserContext st == ListItemState
+ then notFollowedBy' (bulletListStart <|>
+ (anyOrderedListStart >> return ()))
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference :: GenParser Char ParserState [Inline]
+reference = do notFollowedBy' (string "[^") -- footnote reference
+ result <- inlinesInBalancedBrackets inlineNonLink
+ return $ normalizeSpaces result
+
+-- source for a link, with optional title
+source :: GenParser Char st (String, [Char])
+source =
+ (try $ charsInBalanced '(' ')' >>= parseFromString source') <|>
+ -- the following is needed for cases like: [ref](/url(a).
+ (enclosed (char '(') (char ')') anyChar >>=
+ parseFromString source')
+
+-- auxiliary function for source
+source' :: GenParser Char st (String, [Char])
+source' = do
+ skipSpaces
+ let sourceURL excludes = many $
+ optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' '))
+ src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ tit <- option "" linkTitle
+ skipSpaces
+ eof
+ return (intercalate "%20" $ words $ removeTrailingSpace src, tit)
+
+linkTitle :: GenParser Char st String
+linkTitle = try $ do
+ (many1 spaceChar >> option '\n' newline) <|> newline
+ skipSpaces
+ delim <- oneOf "'\""
+ tit <- manyTill (optional (char '\\') >> anyChar)
+ (try (char delim >> skipSpaces >> eof))
+ return $ decodeCharacterReferences tit
+
+link :: GenParser Char ParserState Inline
+link = try $ do
+ lab <- reference
+ src <- source <|> referenceLink lab
+ sanitize <- getState >>= return . stateSanitizeHTML
+ if sanitize && unsanitaryURI (fst src)
+ then fail "Unsanitary URI"
+ else return $ Link lab src
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink :: [Inline]
+ -> GenParser Char ParserState (String, [Char])
+referenceLink lab = do
+ ref <- option [] (try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >> reference))
+ let ref' = if null ref then lab else ref
+ state <- getState
+ case lookupKeySrc (stateKeys state) ref' of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+
+autoLink :: GenParser Char ParserState Inline
+autoLink = try $ do
+ char '<'
+ src <- uri <|> (emailAddress >>= (return . ("mailto:" ++)))
+ char '>'
+ let src' = if "mailto:" `isPrefixOf` src
+ then drop 7 src
+ else src
+ st <- getState
+ let sanitize = stateSanitizeHTML st
+ if sanitize && unsanitaryURI src
+ then fail "Unsanitary URI"
+ else return $ if stateStrict st
+ then Link [Str src'] (src, "")
+ else Link [Code src'] (src, "")
+
+image :: GenParser Char ParserState Inline
+image = try $ do
+ char '!'
+ (Link lab src) <- link
+ return $ Image lab src
+
+note :: GenParser Char ParserState Inline
+note = try $ do
+ failIfStrict
+ ref <- noteMarker
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just contents -> return $ Note contents
+
+inlineNote :: GenParser Char ParserState Inline
+inlineNote = try $ do
+ failIfStrict
+ char '^'
+ contents <- inlinesInBalancedBrackets inline
+ return $ Note [Para contents]
+
+rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' = do
+ failIfStrict
+ (rawConTeXtEnvironment' >>= return . TeX)
+ <|> (rawLaTeXEnvironment' >>= return . TeX)
+ <|> rawLaTeXInline
+
+rawConTeXtEnvironment' :: GenParser Char st String
+rawConTeXtEnvironment' = try $ do
+ string "\\start"
+ completion <- inBrackets (letter <|> digit <|> spaceChar)
+ <|> (many1 letter)
+ contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
+ (try $ string "\\stop" >> string completion)
+ return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
+
+inBrackets :: (GenParser Char st Char) -> GenParser Char st String
+inBrackets parser = do
+ char '['
+ contents <- many parser
+ char ']'
+ return $ "[" ++ contents ++ "]"
+
+rawHtmlInline' :: GenParser Char ParserState Inline
+rawHtmlInline' = do
+ st <- getState
+ result <- if stateStrict st
+ then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ else anyHtmlInlineTag
+ return $ HtmlInline result
+
+#ifdef _CITEPROC
+inlineCitation :: GenParser Char ParserState Inline
+inlineCitation = try $ do
+ failIfStrict
+ cit <- citeMarker
+ let citations = readWith parseCitation defaultParserState cit
+ mr <- mapM chkCit citations
+ if catMaybes mr /= []
+ then return $ Cite citations []
+ else fail "no citation found"
+
+chkCit :: Target -> GenParser Char ParserState (Maybe Target)
+chkCit t = do
+ st <- getState
+ case lookupKeySrc (stateKeys st) [Str $ fst t] of
+ Just _ -> fail "This is a link"
+ Nothing -> if elem (fst t) $ stateCitations st
+ then return $ Just t
+ else return $ Nothing
+
+citeMarker :: GenParser Char ParserState String
+citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']')
+
+parseCitation :: GenParser Char ParserState [(String,String)]
+parseCitation = try $ sepBy (parseLabel) (oneOf ";")
+
+parseLabel :: GenParser Char ParserState (String,String)
+parseLabel = try $ do
+ res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@")
+ case res of
+ [lab,loc] -> return (lab, loc)
+ [lab] -> return (lab, "" )
+ _ -> return ("" , "" )
+
+#endif
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..255054c10
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,707 @@
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.RST
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.RST (
+ readRST
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.ParserCombinators.Parsec
+import Control.Monad ( when )
+import Data.List ( findIndex, delete, intercalate )
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: ParserState -> String -> Pandoc
+readRST state s = (readWith parseRST) state (s ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers :: [Char]
+bulletListMarkers = "*+-"
+
+underlineChars :: [Char]
+underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars :: [Char]
+specialChars = "\\`|*_<>$:[-"
+
+--
+-- parsing documents
+--
+
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
+
+-- | Promote all headers in a list of blocks. (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level text):rest) =
+ (Header (level - num) text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders _ [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers.
+titleTransform :: [Block] -- ^ list of blocks
+ -> ([Block], [Inline]) -- ^ modified list of blocks, title
+titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
+ if (any (isHeader 1) rest) || (any (isHeader 2) rest)
+ then ((Header 1 head1):(Header 2 head2):rest, [])
+ else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):rest) = -- title, no subtitle
+ if (any (isHeader 1) rest)
+ then ((Header 1 head1):rest, [])
+ else ((promoteHeaders 1 rest), head1)
+titleTransform blocks = (blocks, [])
+
+parseRST :: GenParser Char ParserState Pandoc
+parseRST = do
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ let reversedKeys = stateKeys st
+ updateState $ \s -> s { stateKeys = reverse reversedKeys }
+ -- now parse it for real...
+ blocks <- parseBlocks
+ let blocks' = filter (/= Null) blocks
+ state <- getState
+ let (blocks'', title) = if stateStandalone state
+ then titleTransform blocks'
+ else (blocks', [])
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
+ return $ Pandoc (Meta title' authors date) blocks''
+
+--
+-- parsing blocks
+--
+
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = manyTill block eof
+
+block :: GenParser Char ParserState Block
+block = choice [ codeBlock
+ , rawHtmlBlock
+ , rawLaTeXBlock
+ , fieldList
+ , blockQuote
+ , imageBlock
+ , unknownDirective
+ , header
+ , hrule
+ , list
+ , lineBlock
+ , lhsCodeBlock
+ , para
+ , plain
+ , nullBlock ] <?> "block"
+
+--
+-- field list
+--
+
+fieldListItem :: String -> GenParser Char st ([Char], [Char])
+fieldListItem indent = try $ do
+ string indent
+ char ':'
+ name <- many1 alphaNum
+ string ": "
+ skipSpaces
+ first <- manyTill anyChar newline
+ rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
+ indentedBlock
+ return (name, intercalate " " (first:(lines rest)))
+
+fieldList :: GenParser Char ParserState Block
+fieldList = try $ do
+ indent <- lookAhead $ many (oneOf " \t")
+ items <- many1 $ fieldListItem indent
+ blanklines
+ let authors = case lookup "Authors" items of
+ Just auth -> [auth]
+ Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
+ if null authors
+ then return ()
+ else updateState $ \st -> st {stateAuthors = authors}
+ case (lookup "Date" items) of
+ Just dat -> updateState $ \st -> st {stateDate = dat}
+ Nothing -> return ()
+ case (lookup "Title" items) of
+ Just tit -> parseFromString (many inline) tit >>=
+ \t -> updateState $ \st -> st {stateTitle = t}
+ Nothing -> return ()
+ let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
+ (x /= "Date") && (x /= "Title")) items
+ if null remaining
+ then return Null
+ else do terms <- mapM (return . (:[]) . Str . fst) remaining
+ defs <- mapM (parseFromString (many block) . snd)
+ remaining
+ return $ DefinitionList $ zip terms defs
+
+--
+-- line block
+--
+
+lineBlockLine :: GenParser Char ParserState [Inline]
+lineBlockLine = try $ do
+ string "| "
+ white <- many (oneOf " \t")
+ line <- manyTill inline newline
+ return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+
+lineBlock :: GenParser Char ParserState Block
+lineBlock = try $ do
+ lines' <- many1 lineBlockLine
+ blanklines
+ return $ Para (concat lines')
+
+--
+-- paragraph block
+--
+
+para :: GenParser Char ParserState Block
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
+
+codeBlockStart :: GenParser Char st Char
+codeBlockStart = string "::" >> blankline >> blankline
+
+-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock :: GenParser Char ParserState Block
+paraBeforeCodeBlock = try $ do
+ result <- many1 (notFollowedBy' codeBlockStart >> inline)
+ lookAhead (string "::")
+ return $ Para $ if last result == Space
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"]
+
+-- regular paragraph
+paraNormal :: GenParser Char ParserState Block
+paraNormal = try $ do
+ result <- many1 inline
+ newline
+ blanklines
+ return $ Para $ normalizeSpaces result
+
+plain :: GenParser Char ParserState Block
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- image block
+--
+
+imageBlock :: GenParser Char st Block
+imageBlock = try $ do
+ string ".. image:: "
+ src <- manyTill anyChar newline
+ fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
+ many1 $ fieldListItem indent
+ optional blanklines
+ case lookup "alt" fields of
+ Just alt -> return $ Plain [Image [Str alt] (src, alt)]
+ Nothing -> return $ Plain [Image [Str "image"] (src, "")]
+--
+-- header blocks
+--
+
+header :: GenParser Char ParserState Block
+header = doubleHeader <|> singleHeader <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader :: GenParser Char ParserState Block
+doubleHeader = try $ do
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else return ()
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+-- a header with line on the bottom only
+singleHeader :: GenParser Char ParserState Block
+singleHeader = try $ do
+ notFollowedBy' whitespace
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+--
+-- hrule block
+--
+
+hrule :: GenParser Char st Block
+hrule = try $ do
+ chr <- oneOf underlineChars
+ count 3 (char chr)
+ skipMany (char chr)
+ blankline
+ blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine :: String -> GenParser Char st [Char]
+indentedLine indents = try $ do
+ string indents
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+-- two or more indented lines, possibly separated by blank lines.
+-- any amount of indentation will work.
+indentedBlock :: GenParser Char st [Char]
+indentedBlock = do
+ indents <- lookAhead $ many1 (oneOf " \t")
+ lns <- many $ choice $ [ indentedLine indents,
+ try $ do b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l) ]
+ optional blanklines
+ return $ concat lns
+
+codeBlock :: GenParser Char st Block
+codeBlock = try $ do
+ codeBlockStart
+ result <- indentedBlock
+ return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result
+
+lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock = try $ do
+ failUnlessLHS
+ pos <- getPosition
+ when (sourceColumn pos /= 1) $ fail "Not in first column"
+ lns <- many1 birdTrackLine
+ -- if (as is normal) there is always a space after >, drop it
+ let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
+ then map (drop 1) lns
+ else lns
+ blanklines
+ return $ CodeBlock ("", ["sourceCode", "haskell"], []) $ intercalate "\n" lns'
+
+birdTrackLine :: GenParser Char st [Char]
+birdTrackLine = do
+ char '>'
+ manyTill anyChar newline
+
+--
+-- raw html
+--
+
+rawHtmlBlock :: GenParser Char st Block
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+ indentedBlock >>= return . RawHtml
+
+--
+-- raw latex
+--
+
+rawLaTeXBlock :: GenParser Char st Block
+rawLaTeXBlock = try $ do
+ string ".. raw:: latex"
+ blanklines
+ result <- indentedBlock
+ return $ Para [(TeX result)]
+
+--
+-- block quotes
+--
+
+blockQuote :: GenParser Char ParserState Block
+blockQuote = do
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list :: GenParser Char ParserState Block
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+definitionListItem :: GenParser Char ParserState ([Inline], [Block])
+definitionListItem = try $ do
+ -- avoid capturing a directive or comment
+ notFollowedBy (try $ char '.' >> char '.')
+ term <- many1Till inline endline
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return (normalizeSpaces term, contents)
+
+definitionList :: GenParser Char ParserState Block
+definitionList = many1 definitionListItem >>= return . DefinitionList
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart :: GenParser Char st Int
+bulletListStart = try $ do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ return $ length (marker:white)
+
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
+orderedListStart style delim = try $ do
+ (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
+ white <- many1 spaceChar
+ return $ markerLen + length white
+
+-- parse a line of a list item
+listLine :: Int -> GenParser Char ParserState [Char]
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith :: Int -> GenParser Char ParserState [Char]
+indentWith num = do
+ state <- getState
+ let tabStop = stateTabStop state
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem :: GenParser Char ParserState Int
+ -> GenParser Char ParserState (Int, [Char])
+rawListItem start = try $ do
+ markerLength <- start
+ firstLine <- manyTill anyChar newline
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation :: Int -> GenParser Char ParserState [Char]
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return $ blanks ++ concat result
+
+listItem :: GenParser Char ParserState Int
+ -> GenParser Char ParserState [Block]
+listItem start = try $ do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ blanks <- choice [ try (many blankline >>~ lookAhead start),
+ many1 blankline ] -- whole list must end with blank.
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may itself contain block elements
+ parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+ updateState (\st -> st {stateParserContext = oldContext})
+ return parsed
+
+orderedList :: GenParser Char ParserState Block
+orderedList = try $ do
+ (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+ items <- many1 (listItem (orderedListStart style delim))
+ let items' = compactify items
+ return $ OrderedList (start, style, delim) items'
+
+bulletList :: GenParser Char ParserState Block
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
+
+--
+-- unknown directive (e.g. comment)
+--
+
+unknownDirective :: GenParser Char st Block
+unknownDirective = try $ do
+ string ".."
+ notFollowedBy (noneOf " \t\n")
+ manyTill anyChar newline
+ many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
+ return Null
+
+--
+-- reference key
+--
+
+quotedReferenceName :: GenParser Char ParserState [Inline]
+quotedReferenceName = try $ do
+ char '`' >> notFollowedBy (char '`') -- `` means inline code!
+ label' <- many1Till inline (char '`')
+ return label'
+
+unquotedReferenceName :: GenParser Char ParserState [Inline]
+unquotedReferenceName = try $ do
+ label' <- many1Till inline (lookAhead $ char ':')
+ return label'
+
+isolated :: Char -> GenParser Char st Char
+isolated ch = try $ char ch >>~ notFollowedBy (char ch)
+
+simpleReferenceName :: GenParser Char st [Inline]
+simpleReferenceName = do
+ raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
+ (try $ char '_' >>~ lookAhead alphaNum))
+ return [Str raw]
+
+referenceName :: GenParser Char ParserState [Inline]
+referenceName = quotedReferenceName <|>
+ (try $ simpleReferenceName >>~ lookAhead (char ':')) <|>
+ unquotedReferenceName
+
+referenceKey :: GenParser Char ParserState [Char]
+referenceKey = do
+ startPos <- getPosition
+ key <- choice [imageKey, anonymousKey, regularKey]
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \s -> s { stateKeys = key : oldkeys }
+ optional blanklines
+ endPos <- getPosition
+ -- return enough blanks to replace key
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+targetURI :: GenParser Char st [Char]
+targetURI = do
+ skipSpaces
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+ blanklines
+ return contents
+
+imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+imageKey = try $ do
+ string ".. |"
+ ref <- manyTill inline (char '|')
+ skipSpaces
+ string "image::"
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
+anonymousKey = try $ do
+ oneOfStrings [".. __:", "__"]
+ src <- targetURI
+ return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+
+regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+regularKey = try $ do
+ string ".. _"
+ ref <- referenceName
+ char ':'
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+ --
+ -- inline
+ --
+
+inline :: GenParser Char ParserState Inline
+inline = choice [ link
+ , str
+ , whitespace
+ , endline
+ , strong
+ , emph
+ , code
+ , image
+ , hyphens
+ , superscript
+ , subscript
+ , escapedChar
+ , symbol ] <?> "inline"
+
+hyphens :: GenParser Char ParserState Inline
+hyphens = do
+ result <- many1 (char '-')
+ option Space endline
+ -- don't want to treat endline after hyphen or dash as a space
+ return $ Str result
+
+escapedChar :: GenParser Char st Inline
+escapedChar = escaped anyChar
+
+symbol :: GenParser Char ParserState Inline
+symbol = do
+ result <- oneOf specialChars
+ return $ Str [result]
+
+-- parses inline code, between codeStart and codeEnd
+code :: GenParser Char ParserState Inline
+code = try $ do
+ string "``"
+ result <- manyTill anyChar (try (string "``"))
+ return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result
+
+emph :: GenParser Char ParserState Inline
+emph = enclosed (char '*') (char '*') inline >>=
+ return . Emph . normalizeSpaces
+
+strong :: GenParser Char ParserState Inline
+strong = enclosed (string "**") (try $ string "**") inline >>=
+ return . Strong . normalizeSpaces
+
+interpreted :: [Char] -> GenParser Char st [Inline]
+interpreted role = try $ do
+ optional $ try $ string "\\ "
+ result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
+ try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
+ return [Str result]
+
+superscript :: GenParser Char ParserState Inline
+superscript = interpreted "sup" >>= (return . Superscript)
+
+subscript :: GenParser Char ParserState Inline
+subscript = interpreted "sub" >>= (return . Subscript)
+
+whitespace :: GenParser Char ParserState Inline
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+str :: GenParser Char ParserState Inline
+str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline :: GenParser Char ParserState Inline
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts at beginning of line differently in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState
+ then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ notFollowedBy' bulletListStart
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+link :: GenParser Char ParserState Inline
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
+
+explicitLink :: GenParser Char ParserState Inline
+explicitLink = try $ do
+ char '`'
+ notFollowedBy (char '`') -- `` marks start of inline code
+ label' <- manyTill (notFollowedBy (char '`') >> inline)
+ (try (spaces >> char '<'))
+ src <- manyTill (noneOf ">\n ") (char '>')
+ skipSpaces
+ string "`_"
+ return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "")
+
+referenceLink :: GenParser Char ParserState Inline
+referenceLink = try $ do
+ label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
+ key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ -- if anonymous link, remove first anon key so it won't be used again
+ let keyTable' = if (key == [Str "_"]) -- anonymous link?
+ then delete ([Str "_"], src) keyTable -- remove first anon key
+ else keyTable
+ setState $ state { stateKeys = keyTable' }
+ return $ Link (normalizeSpaces label') src
+
+autoURI :: GenParser Char ParserState Inline
+autoURI = do
+ src <- uri
+ return $ Link [Str src] (src, "")
+
+autoEmail :: GenParser Char ParserState Inline
+autoEmail = do
+ src <- emailAddress
+ return $ Link [Str src] ("mailto:" ++ src, "")
+
+autoLink :: GenParser Char ParserState Inline
+autoLink = autoURI <|> autoEmail
+
+-- For now, we assume that all substitution references are for images.
+image :: GenParser Char ParserState Inline
+image = try $ do
+ char '|'
+ ref <- manyTill inline (char '|')
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return $ Image (normalizeSpaces ref) src
+
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
new file mode 100644
index 000000000..04b0f3b8f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -0,0 +1,233 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.TeXMath
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of TeX math to a list of 'Pandoc' inline elements.
+-}
+module Text.Pandoc.Readers.TeXMath (
+ readTeXMath
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+
+-- | Converts a string of raw TeX math to a list of 'Pandoc' inlines.
+readTeXMath :: String -> [Inline]
+readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of
+ Left _ -> [Str inp] -- if unparseable, just include original
+ Right res -> res
+
+teXMath :: GenParser Char st [Inline]
+teXMath = manyTill mathPart eof >>= return . concat
+
+mathPart :: GenParser Char st [Inline]
+mathPart = whitespace <|> superscript <|> subscript <|> symbol <|>
+ argument <|> digits <|> letters <|> misc
+
+whitespace :: GenParser Char st [Inline]
+whitespace = many1 space >> return []
+
+symbol :: GenParser Char st [Inline]
+symbol = try $ do
+ char '\\'
+ res <- many1 letter
+ case lookup res teXsymbols of
+ Just m -> return [Str m]
+ Nothing -> return [Str $ "\\" ++ res]
+
+argument :: GenParser Char st [Inline]
+argument = try $ do
+ char '{'
+ res <- many mathPart
+ char '}'
+ return $ if null res
+ then [Str " "]
+ else [Str "{"] ++ concat res ++ [Str "}"]
+
+digits :: GenParser Char st [Inline]
+digits = do
+ res <- many1 digit
+ return [Str res]
+
+letters :: GenParser Char st [Inline]
+letters = do
+ res <- many1 letter
+ return [Emph [Str res]]
+
+misc :: GenParser Char st [Inline]
+misc = do
+ res <- noneOf "}"
+ return [Str [res]]
+
+scriptArg :: GenParser Char st [Inline]
+scriptArg = try $ do
+ (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r}))
+ <|> symbol
+ <|> (do{c <- (letter <|> digit); return [Str [c]]})
+
+superscript :: GenParser Char st [Inline]
+superscript = try $ do
+ char '^'
+ arg <- scriptArg
+ return [Superscript arg]
+
+subscript :: GenParser Char st [Inline]
+subscript = try $ do
+ char '_'
+ arg <- scriptArg
+ return [Subscript arg]
+
+withThinSpace :: String -> String
+withThinSpace str = "\x2009" ++ str ++ "\x2009"
+
+teXsymbols :: [(String, String)]
+teXsymbols =
+ [("alpha","\x3B1")
+ ,("beta", "\x3B2")
+ ,("chi", "\x3C7")
+ ,("delta", "\x3B4")
+ ,("Delta", "\x394")
+ ,("epsilon", "\x3B5")
+ ,("varepsilon", "\x25B")
+ ,("eta", "\x3B7")
+ ,("gamma", "\x3B3")
+ ,("Gamma", "\x393")
+ ,("iota", "\x3B9")
+ ,("kappa", "\x3BA")
+ ,("lambda", "\x3BB")
+ ,("Lambda", "\x39B")
+ ,("mu", "\x3BC")
+ ,("nu", "\x3BD")
+ ,("omega", "\x3C9")
+ ,("Omega", "\x3A9")
+ ,("phi", "\x3C6")
+ ,("varphi", "\x3D5")
+ ,("Phi", "\x3A6")
+ ,("pi", "\x3C0")
+ ,("Pi", "\x3A0")
+ ,("psi", "\x3C8")
+ ,("Psi", "\x3A8")
+ ,("rho", "\x3C1")
+ ,("sigma", "\x3C3")
+ ,("Sigma", "\x3A3")
+ ,("tau", "\x3C4")
+ ,("theta", "\x3B8")
+ ,("vartheta", "\x3D1")
+ ,("Theta", "\x398")
+ ,("upsilon", "\x3C5")
+ ,("xi", "\x3BE")
+ ,("Xi", "\x39E")
+ ,("zeta", "\x3B6")
+ ,("ne", "\x2260")
+ ,("lt", withThinSpace "<")
+ ,("le", withThinSpace "\x2264")
+ ,("leq", withThinSpace "\x2264")
+ ,("ge", withThinSpace "\x2265")
+ ,("geq", withThinSpace "\x2265")
+ ,("prec", withThinSpace "\x227A")
+ ,("succ", withThinSpace "\x227B")
+ ,("preceq", withThinSpace "\x2AAF")
+ ,("succeq", withThinSpace "\x2AB0")
+ ,("in", withThinSpace "\x2208")
+ ,("notin", withThinSpace "\x2209")
+ ,("subset", withThinSpace "\x2282")
+ ,("supset", withThinSpace "\x2283")
+ ,("subseteq", withThinSpace "\x2286")
+ ,("supseteq", withThinSpace "\x2287")
+ ,("equiv", withThinSpace "\x2261")
+ ,("cong", withThinSpace "\x2245")
+ ,("approx", withThinSpace "\x2248")
+ ,("propto", withThinSpace "\x221D")
+ ,("cdot", withThinSpace "\x22C5")
+ ,("star", withThinSpace "\x22C6")
+ ,("backslash", "\\")
+ ,("times", withThinSpace "\x00D7")
+ ,("divide", withThinSpace "\x00F7")
+ ,("circ", withThinSpace "\x2218")
+ ,("oplus", withThinSpace "\x2295")
+ ,("otimes", withThinSpace "\x2297")
+ ,("odot", withThinSpace "\x2299")
+ ,("sum", "\x2211")
+ ,("prod", "\x220F")
+ ,("wedge", withThinSpace "\x2227")
+ ,("bigwedge", withThinSpace "\x22C0")
+ ,("vee", withThinSpace "\x2228")
+ ,("bigvee", withThinSpace "\x22C1")
+ ,("cap", withThinSpace "\x2229")
+ ,("bigcap", withThinSpace "\x22C2")
+ ,("cup", withThinSpace "\x222A")
+ ,("bigcup", withThinSpace "\x22C3")
+ ,("neg", "\x00AC")
+ ,("implies", withThinSpace "\x21D2")
+ ,("iff", withThinSpace "\x21D4")
+ ,("forall", "\x2200")
+ ,("exists", "\x2203")
+ ,("bot", "\x22A5")
+ ,("top", "\x22A4")
+ ,("vdash", "\x22A2")
+ ,("models", withThinSpace "\x22A8")
+ ,("uparrow", "\x2191")
+ ,("downarrow", "\x2193")
+ ,("rightarrow", withThinSpace "\x2192")
+ ,("to", withThinSpace "\x2192")
+ ,("rightarrowtail", "\x21A3")
+ ,("twoheadrightarrow", withThinSpace "\x21A0")
+ ,("twoheadrightarrowtail", withThinSpace "\x2916")
+ ,("mapsto", withThinSpace "\x21A6")
+ ,("leftarrow", withThinSpace "\x2190")
+ ,("leftrightarrow", withThinSpace "\x2194")
+ ,("Rightarrow", withThinSpace "\x21D2")
+ ,("Leftarrow", withThinSpace "\x21D0")
+ ,("Leftrightarrow", withThinSpace "\x21D4")
+ ,("partial", "\x2202")
+ ,("nabla", "\x2207")
+ ,("pm", "\x00B1")
+ ,("emptyset", "\x2205")
+ ,("infty", "\x221E")
+ ,("aleph", "\x2135")
+ ,("ldots", "...")
+ ,("therefore", "\x2234")
+ ,("angle", "\x2220")
+ ,("quad", "\x00A0\x00A0")
+ ,("cdots", "\x22EF")
+ ,("vdots", "\x22EE")
+ ,("ddots", "\x22F1")
+ ,("diamond", "\x22C4")
+ ,("Box", "\x25A1")
+ ,("lfloor", "\x230A")
+ ,("rfloor", "\x230B")
+ ,("lceiling", "\x2308")
+ ,("rceiling", "\x2309")
+ ,("langle", "\x2329")
+ ,("rangle", "\x232A")
+ ,("{", "{")
+ ,("}", "}")
+ ,("[", "[")
+ ,("]", "]")
+ ,("|", "|")
+ ,("||", "||")
+ ]
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..6854e5ae6
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,953 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Shared
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
+ splitByIndices,
+ substitute,
+ -- * Text processing
+ backslashEscapes,
+ escapeStringUsing,
+ stripTrailingNewlines,
+ removeLeadingTrailingSpace,
+ removeLeadingSpace,
+ removeTrailingSpace,
+ stripFirstAndLast,
+ camelCaseToHyphenated,
+ toRomanNumeral,
+ wrapped,
+ wrapIfNeeded,
+ wrappedTeX,
+ wrapTeXIfNeeded,
+ BlockWrapper (..),
+ wrappedBlocksToDoc,
+ -- * Parsing
+ (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ emailAddress,
+ uri,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ failUnlessLHS,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ lookupKeySrc,
+ refsMatch,
+ -- * Prettyprinting
+ hang',
+ prettyPandoc,
+ -- * Pandoc block and inline list processing
+ orderedListMarkers,
+ normalizeSpaces,
+ compactify,
+ Element (..),
+ hierarchicalize,
+ isHeaderBlock,
+ -- * Writer options
+ HTMLMathMethod (..),
+ ObfuscationMethod (..),
+ WriterOptions (..),
+ defaultWriterOptions,
+ -- * File handling
+ inDirectory
+ ) where
+
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Parsec
+import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
+import qualified Text.PrettyPrint.HughesPJ as PP
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
+import Data.List ( find, isPrefixOf, intercalate )
+import Control.Monad ( join )
+import Network.URI ( parseURI, URI (..), isAllowedInURI )
+import System.Directory
+import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
+import System.IO.UTF8
+
+--
+-- List processing
+--
+
+-- | Split list by groups of one or more sep.
+splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy sep lst =
+ let (first, rest) = break (== sep) lst
+ rest' = dropWhile (== sep) rest
+ in first:(splitBy sep rest')
+
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
+
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ lst = lst
+substitute target replacement lst =
+ if target `isPrefixOf` lst
+ then replacement ++ (substitute target replacement $ drop (length target) lst)
+ else (head lst):(substitute target replacement $ tail lst)
+
+--
+-- Text processing
+--
+
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing _ [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
+
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (`elem` " \n\t")
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x =
+ if x >= 4000 || x < 0
+ then "?"
+ else case x of
+ _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
+ _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
+ _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
+ _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
+ _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
+ _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
+ _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
+ _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
+ _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
+ _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
+ _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
+ _ -> ""
+
+-- | Wrap inlines to line length.
+wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
+wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
+ return . fsep
+
+-- | Wrap inlines if the text wrap option is selected.
+wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
+ [Inline] -> m Doc
+wrapIfNeeded opts = if writerWrapText opts
+ then wrapped
+ else ($)
+
+-- auxiliary function for wrappedTeX
+isNote :: Inline -> Bool
+isNote (Note _) = True
+isNote _ = False
+
+-- | Wrap inlines to line length, treating footnotes in a way that
+-- makes sense in LaTeX and ConTeXt.
+wrappedTeX :: Monad m
+ => Bool
+ -> ([Inline] -> m Doc)
+ -> [Inline]
+ -> m Doc
+wrappedTeX includePercent listWriter sect = do
+ let (firstpart, rest) = break isNote sect
+ firstpartWrapped <- wrapped listWriter firstpart
+ if null rest
+ then return firstpartWrapped
+ else do let (note:rest') = rest
+ let (rest1, rest2) = break (== Space) rest'
+ -- rest1 is whatever comes between the note and a Space.
+ -- if the note is followed directly by a Space, rest1 is null.
+ -- rest1 is printed after the note but before the line break,
+ -- to avoid spurious blank space the note and immediately
+ -- following punctuation.
+ rest1Out <- if null rest1
+ then return empty
+ else listWriter rest1
+ rest2Wrapped <- if null rest2
+ then return empty
+ else wrappedTeX includePercent listWriter (tail rest2)
+ noteText <- listWriter [note]
+ return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$
+ (noteText <> rest1Out) $$
+ rest2Wrapped
+
+-- | Wrap inlines if the text wrap option is selected, specialized
+-- for LaTeX and ConTeXt.
+wrapTeXIfNeeded :: Monad m
+ => WriterOptions
+ -> Bool
+ -> ([Inline] -> m Doc)
+ -> [Inline]
+ -> m Doc
+wrapTeXIfNeeded opts includePercent = if writerWrapText opts
+ then wrappedTeX includePercent
+ else ($)
+
+-- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@).
+data BlockWrapper = Pad Doc | Reg Doc
+
+-- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks.
+wrappedBlocksToDoc :: [BlockWrapper] -> Doc
+wrappedBlocksToDoc = foldr addBlock empty
+ where addBlock (Pad d) accum | isEmpty accum = d
+ addBlock (Pad d) accum = d $$ text "" $$ accum
+ addBlock (Reg d) accum = d $$ accum
+
+--
+-- Parsing
+--
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- Auxiliary functions for romanNumeral:
+
+lowercaseRomanDigits :: [Char]
+lowercaseRomanDigits = ['i','v','x','l','c','d','m']
+
+uppercaseRomanDigits :: [Char]
+uppercaseRomanDigits = map toUpper lowercaseRomanDigits
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
+ else lowercaseRomanDigits
+ lookAhead $ oneOf romanDigits
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ map char romanDigits
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- Parsers for email addresses and URIs
+
+emailChar :: GenParser Char st Char
+emailChar = alphaNum <|> oneOf "-+_."
+
+domainChar :: GenParser Char st Char
+domainChar = alphaNum <|> char '-'
+
+domain :: GenParser Char st [Char]
+domain = do
+ first <- many1 domainChar
+ dom <- many1 $ try (char '.' >> many1 domainChar )
+ return $ intercalate "." (first:dom)
+
+-- | Parses an email address; returns string.
+emailAddress :: GenParser Char st [Char]
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return $ addr ++ '@':dom
+
+-- | Parses a URI.
+uri :: GenParser Char st String
+uri = try $ do
+ str <- many1 $ satisfy isAllowedInURI
+ case parseURI str of
+ Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
+ "file:", "mailto:",
+ "news:", "telnet:" ]
+ then return $ show uri'
+ else fail "not a URI"
+ Nothing -> fail "not a URI"
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Fail unless we're in literate haskell mode.
+failUnlessLHS :: GenParser tok ParserState ()
+failUnlessLHS = do
+ state <- getState
+ if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char st ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
+orderedListMarker style delim = do
+ let num = case style of
+ DefaultStyle -> decimal <|> defaultNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+#ifdef _CITEPROC
+ stateCitations :: [String], -- ^ List of available citations
+#endif
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateSanitizeHTML = False,
+ stateKeys = [],
+#ifdef _CITEPROC
+ stateCitations = [],
+#endif
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateLiterateHaskell = False,
+ stateColumns = 80,
+ stateHeaderTable = [] }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, [Block])]
+
+type KeyTable = [([Inline], Target)]
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case find (refsMatch key . fst) table of
+ Nothing -> Nothing
+ Just (_, src) -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Math t x):restx) ((Math u y):resty) =
+ ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
+--
+-- Prettyprinting
+--
+
+-- | A version of hang that works like the version in pretty-1.0.0.0
+hang' :: Doc -> Int -> Doc -> Doc
+hang' d1 n d2 = d1 $$ (nest n d2)
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
+ indentBy 2 0 ("[" ++ (intercalate ",\n"
+ (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
+ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (intercalate ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+prettyPandoc :: Pandoc -> String
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
+--
+-- Pandoc block and inline list processing
+--
+
+-- | Generate infinite lazy list of markers for an ordered list,
+-- depending on list attributes.
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers (start, numstyle, numdelim) =
+ let singleton c = [c]
+ nums = case numstyle of
+ DefaultStyle -> map show [start..]
+ Decimal -> map show [start..]
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
+ UpperRoman -> map toRomanNumeral [start..]
+ LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ inDelim str = case numdelim of
+ DefaultDelim -> str ++ "."
+ Period -> str ++ "."
+ OneParen -> str ++ ")"
+ TwoParens -> "(" ++ str ++ ")"
+ in map inDelim nums
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, collapse double @Space@s into singles, and
+-- remove empty Str elements.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces [] = []
+normalizeSpaces list =
+ let removeDoubles [] = []
+ removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
+ removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
+ removeDoubles ((Str ""):rest) = removeDoubles rest
+ removeDoubles (x:rest) = x:(removeDoubles rest)
+ removeLeading (Space:xs) = removeLeading xs
+ removeLeading x = x
+ removeTrailing [] = []
+ removeTrailing lst = if (last lst == Space)
+ then init lst
+ else lst
+ in removeLeading $ removeTrailing $ removeDoubles list
+
+-- | Change final list item from @Para@ to @Plain@ if the list should
+-- be compact.
+compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
+ -> [[Block]]
+compactify [] = []
+compactify items =
+ let final = last items
+ others = init items
+ in case last final of
+ Para a -> if all endsWithPlain others && not (null final)
+ then others ++ [init final ++ [Plain a]]
+ else items
+ _ -> items
+
+endsWithPlain :: [Block] -> Bool
+endsWithPlain [] = False
+endsWithPlain blocks =
+ case last blocks of
+ Plain _ -> True
+ (BulletList (x:xs)) -> endsWithPlain $ last (x:xs)
+ (OrderedList _ (x:xs)) -> endsWithPlain $ last (x:xs)
+ (DefinitionList (x:xs)) -> endsWithPlain $ last $ map snd (x:xs)
+ _ -> False
+
+-- | Data structure for defining hierarchical Pandoc documents
+data Element = Blk Block
+ | Sec [Inline] [Element] deriving (Eq, Read, Show)
+
+-- | Returns @True@ on Header block with at least the specified level
+headerAtLeast :: Int -> Block -> Bool
+headerAtLeast level (Header x _) = x <= level
+headerAtLeast _ _ = False
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
+hierarchicalize :: [Block] -> [Element]
+hierarchicalize [] = []
+hierarchicalize (block:rest) =
+ case block of
+ (Header level title) ->
+ let (thisSection, rest') = break (headerAtLeast level) rest
+ in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ x -> (Blk x):(hierarchicalize rest)
+
+-- | True if block is a Header block.
+isHeaderBlock :: Block -> Bool
+isHeaderBlock (Header _ _) = True
+isHeaderBlock _ = False
+
+--
+-- Writer options
+--
+
+data HTMLMathMethod = PlainMath
+ | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
+ | JsMath (Maybe String) -- url of jsMath load script
+ | GladTeX
+ | MimeTeX String -- url of mimetex.cgi
+ deriving (Show, Read, Eq)
+
+-- | Methods for obfuscating email addresses in HTML.
+data ObfuscationMethod = NoObfuscation
+ | ReferenceObfuscation
+ | JavascriptObfuscation
+ deriving (Show, Read, Eq)
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ , writerLiterateHaskell :: Bool -- ^ Write as literate haskell
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
+ } deriving Show
+
+-- | Default writer options.
+defaultWriterOptions :: WriterOptions
+defaultWriterOptions =
+ WriterOptions { writerStandalone = False
+ , writerHeader = ""
+ , writerTitlePrefix = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerS5 = False
+ , writerHTMLMathMethod = PlainMath
+ , writerIgnoreNotes = False
+ , writerIncremental = False
+ , writerNumberSections = False
+ , writerIncludeBefore = ""
+ , writerIncludeAfter = ""
+ , writerStrictMarkdown = False
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ , writerLiterateHaskell = False
+ , writerEmailObfuscation = JavascriptObfuscation
+ }
+
+--
+-- File handling
+--
+
+-- | Perform an IO action in a directory, returning to starting directory.
+inDirectory :: FilePath -> IO a -> IO a
+inDirectory path action = do
+ oldDir <- getCurrentDirectory
+ setCurrentDirectory path
+ result <- action
+ setCurrentDirectory oldDir
+ return result
diff --git a/src/Text/Pandoc/TH.hs b/src/Text/Pandoc/TH.hs
new file mode 100644
index 000000000..0dc5a6719
--- /dev/null
+++ b/src/Text/Pandoc/TH.hs
@@ -0,0 +1,65 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.TH
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Template haskell functions used by Pandoc modules.
+-}
+module Text.Pandoc.TH (
+ contentsOf,
+ binaryContentsOf,
+ makeZip
+ ) where
+
+import Language.Haskell.TH
+import Language.Haskell.TH.Syntax (Lift (..))
+import qualified Data.ByteString as B
+import Data.ByteString.Internal ( w2c )
+import Prelude hiding ( readFile )
+import System.IO.UTF8
+import Codec.Archive.Zip
+import Text.Pandoc.Shared ( inDirectory )
+
+-- | Insert contents of text file into a template.
+contentsOf :: FilePath -> ExpQ
+contentsOf p = lift =<< (runIO $ readFile p)
+
+-- | Insert contents of binary file into a template.
+-- Note that @Data.ByteString.readFile@ uses binary mode on windows.
+binaryContentsOf :: FilePath -> ExpQ
+binaryContentsOf p = lift =<< (runIO $ B.readFile p)
+
+instance Lift B.ByteString where
+ lift x = return (LitE (StringL $ map w2c $ B.unpack x))
+
+instance Lift Archive where
+ lift x = return (LitE (StringL $ show x ))
+
+-- | Construct zip file from files in a directory, and
+-- insert into a template.
+makeZip :: FilePath -> ExpQ
+makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."])
+
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
new file mode 100644
index 000000000..014751968
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -0,0 +1,302 @@
+{-
+Copyright (C) 2007-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ConTeXt
+ Copyright : Copyright (C) 2007-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into ConTeXt.
+-}
+module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( isSuffixOf, intercalate )
+import Control.Monad.State
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+data WriterState =
+ WriterState { stNextRef :: Int -- number of next URL reference
+ , stOrderedListLevel :: Int -- level of ordered list
+ , stOptions :: WriterOptions -- writer options
+ }
+
+orderedListStyles :: [[Char]]
+orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"]
+
+-- | Convert Pandoc to ConTeXt.
+writeConTeXt :: WriterOptions -> Pandoc -> String
+writeConTeXt options document =
+ let defaultWriterState = WriterState { stNextRef = 1
+ , stOrderedListLevel = 0
+ , stOptions = options
+ }
+ in render $
+ evalState (pandocToConTeXt options document) defaultWriterState
+
+pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToConTeXt options (Pandoc meta blocks) = do
+ main <- blockListToConTeXt blocks
+ let before = if null (writerIncludeBefore options)
+ then empty
+ else text $ writerIncludeBefore options
+ let after = if null (writerIncludeAfter options)
+ then empty
+ else text $ writerIncludeAfter options
+ let body = before $$ main $$ after
+ head' <- if writerStandalone options
+ then contextHeader options meta
+ else return empty
+ let toc = if writerTableOfContents options
+ then text "\\placecontent\n"
+ else empty
+ let foot = if writerStandalone options
+ then text "\\stoptext\n"
+ else empty
+ return $ head' $$ toc $$ body $$ foot
+
+-- | Insert bibliographic information into ConTeXt header.
+contextHeader :: WriterOptions -- ^ Options, including ConTeXt header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState Doc
+contextHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return empty
+ else inlineListToConTeXt title
+ let authorstext = if null authors
+ then ""
+ else if length authors == 1
+ then stringToConTeXt $ head authors
+ else stringToConTeXt $ (intercalate ", " $
+ init authors) ++ " & " ++ last authors
+ let datetext = if date == ""
+ then ""
+ else stringToConTeXt date
+ let titleblock = text "\\doctitle{" <> titletext <> char '}' $$
+ text ("\\author{" ++ authorstext ++ "}") $$
+ text ("\\date{" ++ datetext ++ "}")
+ let header = text $ writerHeader options
+ return $ header $$ titleblock $$ text "\\starttext\n\\maketitle\n"
+
+-- escape things as needed for ConTeXt
+
+escapeCharForConTeXt :: Char -> String
+escapeCharForConTeXt ch =
+ case ch of
+ '{' -> "\\letteropenbrace{}"
+ '}' -> "\\letterclosebrace{}"
+ '\\' -> "\\letterbackslash{}"
+ '$' -> "\\$"
+ '|' -> "\\letterbar{}"
+ '^' -> "\\letterhat{}"
+ '%' -> "\\%"
+ '~' -> "\\lettertilde{}"
+ '&' -> "\\&"
+ '#' -> "\\#"
+ '<' -> "\\letterless{}"
+ '>' -> "\\lettermore{}"
+ '_' -> "\\letterunderscore{}"
+ '\160' -> "~"
+ x -> [x]
+
+-- | Escape string for ConTeXt
+stringToConTeXt :: String -> String
+stringToConTeXt = concatMap escapeCharForConTeXt
+
+-- | Convert Pandoc block element to ConTeXt.
+blockToConTeXt :: Block
+ -> State WriterState BlockWrapper
+blockToConTeXt Null = return $ Reg empty
+blockToConTeXt (Plain lst) = do
+ st <- get
+ let options = stOptions st
+ contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst
+ return $ Reg contents
+blockToConTeXt (Para lst) = do
+ st <- get
+ let options = stOptions st
+ contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst
+ return $ Pad contents
+blockToConTeXt (BlockQuote lst) = do
+ contents <- blockListToConTeXt lst
+ return $ Pad $ text "\\startblockquote" $$ contents $$ text "\\stopblockquote"
+blockToConTeXt (CodeBlock _ str) =
+ return $ Reg $ text $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n"
+ -- \n because \stoptyping can't have anything after it, inc. }
+blockToConTeXt (RawHtml _) = return $ Reg empty
+blockToConTeXt (BulletList lst) = do
+ contents <- mapM listItemToConTeXt lst
+ return $ Pad $ text "\\startitemize" $$ vcat contents $$ text "\\stopitemize"
+blockToConTeXt (OrderedList (start, style', delim) lst) = do
+ st <- get
+ let level = stOrderedListLevel st
+ put $ st {stOrderedListLevel = level + 1}
+ contents <- mapM listItemToConTeXt lst
+ put $ st {stOrderedListLevel = level}
+ let start' = if start == 1 then "" else "start=" ++ show start
+ let delim' = case delim of
+ DefaultDelim -> ""
+ Period -> "stopper=."
+ OneParen -> "stopper=)"
+ TwoParens -> "left=(,stopper=)"
+ let width = maximum $ map length $ take (length contents)
+ (orderedListMarkers (start, style', delim))
+ let width' = (toEnum width + 1) / 2
+ let width'' = if width' > (1.5 :: Double)
+ then "width=" ++ show width' ++ "em"
+ else ""
+ let specs2Items = filter (not . null) [start', delim', width'']
+ let specs2 = if null specs2Items
+ then ""
+ else "[" ++ intercalate "," specs2Items ++ "]"
+ let style'' = case style' of
+ DefaultStyle -> orderedListStyles !! level
+ Decimal -> "[n]"
+ LowerRoman -> "[r]"
+ UpperRoman -> "[R]"
+ LowerAlpha -> "[a]"
+ UpperAlpha -> "[A]"
+ let specs = style'' ++ specs2
+ return $ Pad $ text ("\\startitemize" ++ specs) $$ vcat contents $$
+ text "\\stopitemize"
+blockToConTeXt (DefinitionList lst) =
+ mapM defListItemToConTeXt lst >>= return . Pad . wrappedBlocksToDoc
+blockToConTeXt HorizontalRule = return $ Pad $ text "\\thinrule"
+blockToConTeXt (Header level lst) = do
+ contents <- inlineListToConTeXt lst
+ st <- get
+ let opts = stOptions st
+ let base = if writerNumberSections opts then "section" else "subject"
+ return $ Pad $ if level >= 1 && level <= 5
+ then char '\\' <> text (concat (replicate (level - 1) "sub")) <>
+ text base <> char '{' <> contents <> char '}'
+ else contents
+blockToConTeXt (Table caption aligns widths heads rows) = do
+ let colWidths = map printDecimal widths
+ let colDescriptor colWidth alignment = (case alignment of
+ AlignLeft -> 'l'
+ AlignRight -> 'r'
+ AlignCenter -> 'c'
+ AlignDefault -> 'l'):
+ "p(" ++ colWidth ++ "\\textwidth)|"
+ let colDescriptors = "|" ++ (concat $
+ zipWith colDescriptor colWidths aligns)
+ headers <- tableRowToConTeXt heads
+ captionText <- inlineListToConTeXt caption
+ let captionText' = if null caption then text "none" else captionText
+ rows' <- mapM tableRowToConTeXt rows
+ return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$
+ text "\\starttable[" <> text colDescriptors <> char ']' $$
+ text "\\HL" $$ headers $$ text "\\HL" $$
+ vcat rows' $$ text "\\HL\n\\stoptable"
+
+printDecimal :: Double -> String
+printDecimal = printf "%.2f"
+
+tableRowToConTeXt :: [[Block]] -> State WriterState Doc
+tableRowToConTeXt cols = do
+ cols' <- mapM blockListToConTeXt cols
+ return $ (vcat (map (text "\\NC " <>) cols')) $$
+ text "\\NC\\AR"
+
+listItemToConTeXt :: [Block] -> State WriterState Doc
+listItemToConTeXt list = blockListToConTeXt list >>=
+ return . (text "\\item" $$) . (nest 2)
+
+defListItemToConTeXt :: ([Inline], [Block]) -> State WriterState BlockWrapper
+defListItemToConTeXt (term, def) = do
+ term' <- inlineListToConTeXt term
+ def' <- blockListToConTeXt def
+ return $ Pad $ text "\\startdescr{" <> term' <> char '}' $$ def' $$ text "\\stopdescr"
+
+-- | Convert list of block elements to ConTeXt.
+blockListToConTeXt :: [Block] -> State WriterState Doc
+blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . wrappedBlocksToDoc
+
+-- | Convert list of inline elements to ConTeXt.
+inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . hcat
+
+-- | Convert inline element to ConTeXt
+inlineToConTeXt :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+inlineToConTeXt (Emph lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "{\\em " <> contents <> char '}'
+inlineToConTeXt (Strong lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "{\\bf " <> contents <> char '}'
+inlineToConTeXt (Strikeout lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "\\overstrikes{" <> contents <> char '}'
+inlineToConTeXt (Superscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "\\high{" <> contents <> char '}'
+inlineToConTeXt (Subscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "\\low{" <> contents <> char '}'
+inlineToConTeXt (SmallCaps lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "{\\sc " <> contents <> char '}'
+inlineToConTeXt (Code str) = return $ text $ "\\type{" ++ str ++ "}"
+inlineToConTeXt (Quoted SingleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "\\quote{" <> contents <> char '}'
+inlineToConTeXt (Quoted DoubleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ text "\\quotation{" <> contents <> char '}'
+inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
+inlineToConTeXt Apostrophe = return $ char '\''
+inlineToConTeXt EmDash = return $ text "---"
+inlineToConTeXt EnDash = return $ text "--"
+inlineToConTeXt Ellipses = return $ text "\\ldots{}"
+inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str
+inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$'
+inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula"
+inlineToConTeXt (TeX str) = return $ text str
+inlineToConTeXt (HtmlInline _) = return empty
+inlineToConTeXt (LineBreak) = return $ text "\\crlf\n"
+inlineToConTeXt Space = return $ char ' '
+inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own
+ inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
+inlineToConTeXt (Link txt (src, _)) = do
+ st <- get
+ let next = stNextRef st
+ put $ st {stNextRef = next + 1}
+ let ref = show next
+ label <- inlineListToConTeXt txt
+ return $ text "\\useURL[" <> text ref <> text "][" <> text src <>
+ text "][][" <> label <> text "]\\from[" <> text ref <> char ']'
+inlineToConTeXt (Image alternate (src, tit)) = do
+ alt <- inlineListToConTeXt alternate
+ return $ text "\\placefigure\n[]\n[fig:" <> alt <> text "]\n{" <>
+ text tit <> text "}\n{\\externalfigure[" <> text src <> text "]}"
+inlineToConTeXt (Note contents) = do
+ contents' <- blockListToConTeXt contents
+ let rawnote = stripTrailingNewlines $ render contents'
+ -- note: a \n before } is needed when note ends with a \stoptyping
+ let optNewline = "\\stoptyping" `isSuffixOf` rawnote
+ return $ text "\\footnote{" <>
+ text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
new file mode 100644
index 000000000..3e535a87e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -0,0 +1,262 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.Docbook ( writeDocbook) where
+import Text.Pandoc.Definition
+import Text.Pandoc.XML
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
+import Data.List ( isPrefixOf, drop, intercalate )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+-- | Convert list of authors to a docbook <author> section
+authorToDocbook :: [Char] -> Doc
+authorToDocbook name = inTagsIndented "author" $
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = removeLeadingSpace rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (intercalate " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+
+-- | Convert Pandoc document to string in Docbook format.
+writeDocbook :: WriterOptions -> Pandoc -> String
+writeDocbook opts (Pandoc (Meta title authors date) blocks) =
+ let head' = if writerStandalone opts
+ then text (writerHeader opts)
+ else empty
+ meta = if writerStandalone opts
+ then inTagsIndented "articleinfo" $
+ (inTagsSimple "title" (wrap opts title)) $$
+ (vcat (map authorToDocbook authors)) $$
+ (inTagsSimple "date" (text $ escapeStringForXML date))
+ else empty
+ elements = hierarchicalize blocks
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
+ body = (if null before then empty else text before) $$
+ vcat (map (elementToDocbook opts) elements) $$
+ (if null after then empty else text after)
+ body' = if writerStandalone opts
+ then inTagsIndented "article" (meta $$ body)
+ else body
+ in render $ head' $$ body' $$ text ""
+
+-- | Convert an Element to Docbook.
+elementToDocbook :: WriterOptions -> Element -> Doc
+elementToDocbook opts (Blk block) = blockToDocbook opts block
+elementToDocbook opts (Sec title elements) =
+ -- Docbook doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ in inTagsIndented "section" $
+ inTagsSimple "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
+
+-- | Convert a list of Pandoc blocks to Docbook.
+blocksToDocbook :: WriterOptions -> [Block] -> Doc
+blocksToDocbook opts = vcat . map (blockToDocbook opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- Docbook varlistentrys.
+deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc
+deflistItemsToDocbook opts items =
+ vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items
+
+-- | Convert a term and a list of blocks into a Docbook varlistentry.
+deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
+deflistItemToDocbook opts term def =
+ let def' = map plainToPara def
+ in inTagsIndented "varlistentry" $
+ inTagsIndented "term" (inlinesToDocbook opts term) $$
+ inTagsIndented "listitem" (blocksToDocbook opts def')
+
+-- | Convert a list of lists of blocks to a list of Docbook list items.
+listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
+listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
+
+-- | Convert a list of blocks into a Docbook list item.
+listItemToDocbook :: WriterOptions -> [Block] -> Doc
+listItemToDocbook opts item =
+ inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
+
+-- | Convert a Pandoc block element to Docbook.
+blockToDocbook :: WriterOptions -> Block -> Doc
+blockToDocbook _ Null = empty
+blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize
+blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented "blockquote" $ blocksToDocbook opts blocks
+blockToDocbook _ (CodeBlock _ str) =
+ text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook _ (OrderedList _ []) = empty
+blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
+ let attribs = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("numeration", "arabic")]
+ UpperAlpha -> [("numeration", "upperalpha")]
+ LowerAlpha -> [("numeration", "loweralpha")]
+ UpperRoman -> [("numeration", "upperroman")]
+ LowerRoman -> [("numeration", "lowerroman")]
+ items = if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else (inTags True "listitem" [("override",show start)]
+ (blocksToDocbook opts $ map plainToPara first)) $$
+ listItemsToDocbook opts rest
+ in inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) =
+ inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
+blockToDocbook _ (RawHtml str) = text str -- raw XML block
+blockToDocbook _ HorizontalRule = empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) =
+ let alignStrings = map alignmentToString aligns
+ captionDoc = if null caption
+ then empty
+ else inTagsIndented "caption"
+ (inlinesToDocbook opts caption)
+ tableType = if isEmpty captionDoc then "informaltable" else "table"
+ in inTagsIndented tableType $ captionDoc $$
+ (colHeadsToDocbook opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+
+colHeadsToDocbook :: WriterOptions
+ -> [[Char]]
+ -> [Double]
+ -> [[Block]]
+ -> Doc
+colHeadsToDocbook opts alignStrings widths headers =
+ let heads = zipWith3 (\align width item ->
+ tableItemToDocbook opts "th" align width item)
+ alignStrings widths headers
+ in inTagsIndented "tr" $ vcat heads
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook :: WriterOptions -> [[Char]] -> [[Block]] -> Doc
+tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
+ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+
+tableItemToDocbook :: WriterOptions
+ -> [Char]
+ -> [Char]
+ -> Double
+ -> [Block]
+ -> Doc
+tableItemToDocbook opts tag align width item =
+ let attrib = [("align", align)] ++
+ if width /= 0
+ then [("style", "{width: " ++
+ show (truncate (100*width) :: Integer) ++ "%;}")]
+ else []
+ in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
+
+-- | Take list of inline elements and return wrapped doc.
+wrap :: WriterOptions -> [Inline] -> Doc
+wrap opts lst = if writerWrapText opts
+ then fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
+ else inlinesToDocbook opts lst
+
+-- | Convert a list of inline elements to Docbook.
+inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
+inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
+
+-- | Convert an inline element to Docbook.
+inlineToDocbook :: WriterOptions -> Inline -> Doc
+inlineToDocbook _ (Str str) = text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple "emphasis" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strong lst) =
+ inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strikeout lst) =
+ inTags False "emphasis" [("role", "strikethrough")] $
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Superscript lst) =
+ inTagsSimple "superscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Subscript lst) =
+ inTagsSimple "subscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (SmallCaps lst) =
+ inTags False "emphasis" [("role", "smallcaps")] $
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Quoted _ lst) =
+ inTagsSimple "quote" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Cite _ lst) =
+ inlinesToDocbook opts lst
+inlineToDocbook _ Apostrophe = char '\''
+inlineToDocbook _ Ellipses = text "&#8230;"
+inlineToDocbook _ EmDash = text "&#8212;"
+inlineToDocbook _ EnDash = text "&#8211;"
+inlineToDocbook _ (Code str) =
+ inTagsSimple "literal" $ text (escapeStringForXML str)
+inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
+inlineToDocbook _ (TeX _) = empty
+inlineToDocbook _ (HtmlInline _) = empty
+inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>"
+inlineToDocbook _ Space = char ' '
+inlineToDocbook opts (Link txt (src, _)) =
+ if isPrefixOf "mailto:" src
+ then let src' = drop 7 src
+ emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ src'
+ in if txt == [Code src']
+ then emailLink
+ else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
+ char ')'
+ else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
+inlineToDocbook _ (Image _ (src, tit)) =
+ let titleDoc = if null tit
+ then empty
+ else inTagsIndented "objectinfo" $
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..fb7320e92
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,557 @@
+{-# OPTIONS_GHC -fno-warn-deprecations #-}
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.HTML
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to HTML.
+-}
+module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.LaTeXMathML
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
+import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
+import Numeric ( showHex )
+import Data.Char ( ord, toLower, isAlpha )
+import Data.List ( isPrefixOf, intercalate )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.XHtml.Transitional hiding ( stringToHtml )
+
+data WriterState = WriterState
+ { stNotes :: [Html] -- ^ List of notes
+ , stIds :: [String] -- ^ List of header identifiers
+ , stMath :: Bool -- ^ Math is used in document
+ , stCSS :: S.Set String -- ^ CSS to include in header
+ } deriving Show
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState {stNotes= [], stIds = [],
+ stMath = False, stCSS = S.empty}
+
+-- Helpers to render HTML with the appropriate function.
+
+render :: (HTML html) => WriterOptions -> html -> String
+render opts = if writerWrapText opts then renderHtml else showHtml
+
+renderFragment :: (HTML html) => WriterOptions -> html -> String
+renderFragment opts = if writerWrapText opts
+ then renderHtmlFragment
+ else showHtmlFragment
+
+-- | Slightly modified version of Text.XHtml's stringToHtml.
+-- Only uses numerical entities for 0xff and greater.
+-- Adds &nbsp;.
+stringToHtml :: String -> Html
+stringToHtml = primHtml . concatMap fixChar
+ where
+ fixChar '<' = "&lt;"
+ fixChar '>' = "&gt;"
+ fixChar '&' = "&amp;"
+ fixChar '"' = "&quot;"
+ fixChar '\160' = "&nbsp;"
+ fixChar c | ord c < 0xff = [c]
+ fixChar c = "&#" ++ show (ord c) ++ ";"
+
+-- | Convert Pandoc document to Html string.
+writeHtmlString :: WriterOptions -> Pandoc -> String
+writeHtmlString opts =
+ if writerStandalone opts
+ then render opts . writeHtml opts
+ else renderFragment opts . writeHtml opts
+
+-- | Convert Pandoc document to Html structure.
+writeHtml :: WriterOptions -> Pandoc -> Html
+writeHtml opts (Pandoc (Meta tit authors date) blocks) =
+ let titlePrefix = writerTitlePrefix opts
+ topTitle = evalState (inlineListToHtml opts tit) defaultWriterState
+ topTitle' = if null titlePrefix
+ then topTitle
+ else if null tit
+ then stringToHtml titlePrefix
+ else titlePrefix +++ " - " +++ topTitle
+ metadata = thetitle topTitle' +++
+ meta ! [httpequiv "Content-Type",
+ content "text/html; charset=UTF-8"] +++
+ meta ! [name "generator", content "pandoc"] +++
+ (toHtmlFromList $
+ map (\a -> meta ! [name "author", content a]) authors) +++
+ (if null date
+ then noHtml
+ else meta ! [name "date", content date])
+ titleHeader = if writerStandalone opts && not (null tit) &&
+ not (writerS5 opts)
+ then h1 ! [theclass "title"] $ topTitle
+ else noHtml
+ headerBlocks = filter isHeaderBlock blocks
+ ids = uniqueIdentifiers $
+ map (\(Header _ lst) -> lst) headerBlocks
+ toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks ids
+ else noHtml
+ (blocks', newstate) =
+ runState (blockListToHtml opts blocks)
+ (defaultWriterState {stIds = ids})
+ cssLines = stCSS newstate
+ css = if S.null cssLines
+ then noHtml
+ else style ! [thetype "text/css"] $ primHtml $
+ '\n':(unlines $ S.toList cssLines)
+ math = if stMath newstate
+ then case writerHTMLMathMethod opts of
+ LaTeXMathML Nothing ->
+ primHtml latexMathMLScript
+ LaTeXMathML (Just url) ->
+ script !
+ [src url, thetype "text/javascript"] $
+ noHtml
+ JsMath (Just url) ->
+ script !
+ [src url, thetype "text/javascript"] $
+ noHtml
+ _ -> noHtml
+ else noHtml
+ head' = header $ metadata +++ math +++ css +++
+ primHtml (writerHeader opts)
+ notes = reverse (stNotes newstate)
+ before = primHtml $ writerIncludeBefore opts
+ after = primHtml $ writerIncludeAfter opts
+ thebody = before +++ titleHeader +++ toc +++ blocks' +++
+ footnoteSection notes +++ after
+ in if writerStandalone opts
+ then head' +++ body thebody
+ else thebody
+
+-- | Construct table of contents from list of header blocks and identifiers.
+-- Assumes there are as many identifiers as header blocks.
+tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
+tableOfContents _ [] _ = noHtml
+tableOfContents opts headers ids =
+ let opts' = opts { writerIgnoreNotes = True }
+ contentsTree = hierarchicalize headers
+ contents = evalState (mapM (elementToListItem opts') contentsTree)
+ (defaultWriterState {stIds = ids})
+ in thediv ! [identifier "toc"] $ unordList contents
+
+-- | Converts an Element to a list item for a table of contents,
+-- retrieving the appropriate identifier from state.
+elementToListItem :: WriterOptions -> Element -> State WriterState Html
+elementToListItem _ (Blk _) = return noHtml
+elementToListItem opts (Sec headerText subsecs) = do
+ st <- get
+ let ids = stIds st
+ let (id', rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ txt <- inlineListToHtml opts headerText
+ subHeads <- mapM (elementToListItem opts) subsecs
+ let subList = if null subHeads
+ then noHtml
+ else unordList subHeads
+ return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++
+ subList
+
+-- | Convert list of Note blocks to a footnote <div>.
+-- Assumes notes are sorted.
+footnoteSection :: [Html] -> Html
+footnoteSection notes =
+ if null notes
+ then noHtml
+ else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
+
+
+-- | Parse a mailto link; return Just (name, domain) or Nothing.
+parseMailto :: String -> Maybe (String, String)
+parseMailto ('m':'a':'i':'l':'t':'o':':':addr) =
+ let (name', rest) = span (/='@') addr
+ domain = drop 1 rest
+ in Just (name', domain)
+parseMailto _ = Nothing
+
+-- | Obfuscate a "mailto:" link.
+obfuscateLink :: WriterOptions -> String -> String -> Html
+obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
+ anchor ! [href s] << txt
+obfuscateLink opts txt s =
+ let meth = writerEmailObfuscation opts
+ s' = map toLower s
+ in case parseMailto s' of
+ (Just (name', domain)) ->
+ let domain' = substitute "." " dot " domain
+ at' = obfuscateChar '@'
+ (linkText, altText) =
+ if txt == drop 7 s' -- autolink
+ then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain')
+ else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
+ domain' ++ ")")
+ in case meth of
+ ReferenceObfuscation ->
+ -- need to use primHtml or &'s are escaped to &amp; in URL
+ primHtml $ "<a href=\"" ++ (obfuscateString s')
+ ++ "\">" ++ (obfuscateString txt) ++ "</a>"
+ JavascriptObfuscation ->
+ (script ! [thetype "text/javascript"] $
+ primHtml ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ obfuscateString name' ++ "';e=n+a+h;\n" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
+ noscript (primHtml $ obfuscateString altText)
+ _ -> error $ "Unknown obfuscation method: " ++ show meth
+ _ -> anchor ! [href s] $ primHtml txt -- malformed email
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char =
+ let num = ord char
+ numstr = if even num then show num else "x" ++ showHex num ""
+ in "&#" ++ numstr ++ ";"
+
+-- | Obfuscate string using entities.
+obfuscateString :: String -> String
+obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
+
+-- | Add CSS for document header.
+addToCSS :: String -> State WriterState ()
+addToCSS item = do
+ st <- get
+ let current = stCSS st
+ put $ st {stCSS = S.insert item current}
+
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
+
+inlineListToIdentifier' :: [Inline] -> [Char]
+inlineListToIdentifier' [] = ""
+inlineListToIdentifier' (x:xs) =
+ xAsText ++ inlineListToIdentifier' xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ intercalate "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier' lst
+ Strikeout lst -> inlineListToIdentifier' lst
+ Superscript lst -> inlineListToIdentifier' lst
+ SmallCaps lst -> inlineListToIdentifier' lst
+ Subscript lst -> inlineListToIdentifier' lst
+ Strong lst -> inlineListToIdentifier' lst
+ Quoted _ lst -> inlineListToIdentifier' lst
+ Cite _ lst -> inlineListToIdentifier' lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ Math _ _ -> ""
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier' lst
+ Image lst _ -> inlineListToIdentifier' lst
+ Note _ -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+ let addIdentifier (nonuniqueIds, uniqueIds) l =
+ let new = inlineListToIdentifier l
+ matches = length $ filter (== new) nonuniqueIds
+ new' = (if null new then "section" else new) ++
+ if matches > 0 then ("-" ++ show matches) else ""
+ in (new:nonuniqueIds, new':uniqueIds)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: WriterOptions -> Block -> State WriterState Html
+blockToHtml _ Null = return $ noHtml
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
+blockToHtml _ (RawHtml str) = return $ primHtml str
+blockToHtml _ (HorizontalRule) = return $ hr
+blockToHtml opts (CodeBlock (_,classes,_) rawCode) | "haskell" `elem` classes &&
+ writerLiterateHaskell opts =
+ let classes' = map (\c -> if c == "haskell" then "literatehaskell" else c) classes
+ in blockToHtml opts $ CodeBlock ("",classes',[]) $ intercalate "\n" $ map ("> " ++) $ lines rawCode
+blockToHtml _ (CodeBlock attr@(_,classes,_) rawCode) = do
+ case highlightHtml attr rawCode of
+ Left _ -> -- change leading newlines into <br /> tags, because some
+ -- browsers ignore leading newlines in pre blocks
+ let (leadingBreaks, rawCode') = span (=='\n') rawCode
+ in return $ pre ! (if null classes
+ then []
+ else [theclass $ unwords classes]) $ thecode <<
+ (replicate (length leadingBreaks) br +++
+ [stringToHtml $ rawCode' ++ "\n"])
+ Right h -> addToCSS defaultHighlightingCss >> return h
+blockToHtml opts (BlockQuote blocks) =
+ -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ if writerS5 opts
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList attribs lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (OrderedList attribs lst)
+ _ -> blockListToHtml opts blocks >>=
+ (return . blockquote)
+ else blockListToHtml opts blocks >>= (return . blockquote)
+blockToHtml opts (Header level lst) = do
+ contents <- inlineListToHtml opts lst
+ st <- get
+ let ids = stIds st
+ let (id', rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
+ then []
+ else [identifier id']
+ let contents' = if writerTableOfContents opts
+ then anchor ! [href ("#TOC-" ++ id')] $ contents
+ else contents
+ return $ case level of
+ 1 -> h1 contents' ! attribs
+ 2 -> h2 contents' ! attribs
+ 3 -> h3 contents' ! attribs
+ 4 -> h4 contents' ! attribs
+ 5 -> h5 contents' ! attribs
+ 6 -> h6 contents' ! attribs
+ _ -> paragraph contents' ! attribs
+blockToHtml opts (BulletList lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ unordList ! attribs $ contents
+blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ let attribs = (if writerIncremental opts
+ then [theclass "incremental"]
+ else []) ++
+ (if startnum /= 1
+ then [start startnum]
+ else []) ++
+ (if numstyle /= DefaultStyle
+ then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"]
+ else [])
+ return $ ordList ! attribs $ contents
+blockToHtml opts (DefinitionList lst) = do
+ contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
+ def' <- blockListToHtml opts def
+ return $ (term', def')) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ defList ! attribs $ contents
+blockToHtml opts (Table capt aligns widths headers rows') = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return noHtml
+ else inlineListToHtml opts capt >>= return . caption
+ colHeads <- colHeadsToHtml opts alignStrings
+ widths headers
+ rows'' <- zipWithM (tableRowToHtml opts alignStrings) (cycle ["odd", "even"]) rows'
+ return $ table $ captionDoc +++ colHeads +++ rows''
+
+colHeadsToHtml :: WriterOptions
+ -> [[Char]]
+ -> [Double]
+ -> [[Block]]
+ -> State WriterState Html
+colHeadsToHtml opts alignStrings widths headers = do
+ heads <- sequence $ zipWith3
+ (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item)
+ alignStrings widths headers
+ return $ tr ! [theclass "header"] $ toHtmlFromList heads
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToHtml :: WriterOptions
+ -> [[Char]]
+ -> String
+ -> [[Block]]
+ -> State WriterState Html
+tableRowToHtml opts aligns rowclass columns =
+ (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>=
+ return . (tr ! [theclass rowclass]) . toHtmlFromList
+
+tableItemToHtml :: WriterOptions
+ -> (Html -> Html)
+ -> [Char]
+ -> Double
+ -> [Block]
+ -> State WriterState Html
+tableItemToHtml opts tag' align' width' item = do
+ contents <- blockListToHtml opts item
+ let attrib = [align align'] ++
+ if width' /= 0
+ then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")]
+ else []
+ return $ tag' ! attrib $ contents
+
+blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
+blockListToHtml opts lst =
+ mapM (blockToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
+inlineListToHtml opts lst =
+ mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
+inlineToHtml opts inline =
+ case inline of
+ (Str str) -> return $ stringToHtml str
+ (Space) -> return $ stringToHtml " "
+ (LineBreak) -> return $ br
+ (EmDash) -> return $ primHtmlChar "mdash"
+ (EnDash) -> return $ primHtmlChar "ndash"
+ (Ellipses) -> return $ primHtmlChar "hellip"
+ (Apostrophe) -> return $ primHtmlChar "rsquo"
+ (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
+ (Strong lst) -> inlineListToHtml opts lst >>= return . strong
+ (Code str) -> return $ thecode << str
+ (Strikeout lst) -> inlineListToHtml opts lst >>=
+ return . (thespan ! [thestyle "text-decoration: line-through;"])
+ (SmallCaps lst) -> inlineListToHtml opts lst >>=
+ return . (thespan ! [thestyle "font-variant: small-caps;"])
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
+ (Quoted quoteType lst) ->
+ let (leftQuote, rightQuote) = case quoteType of
+ SingleQuote -> (primHtmlChar "lsquo",
+ primHtmlChar "rsquo")
+ DoubleQuote -> (primHtmlChar "ldquo",
+ primHtmlChar "rdquo")
+ in do contents <- inlineListToHtml opts lst
+ return $ leftQuote +++ contents +++ rightQuote
+ (Math t str) ->
+ modify (\st -> st {stMath = True}) >>
+ (case writerHTMLMathMethod opts of
+ LaTeXMathML _ ->
+ -- putting LaTeXMathML in container with class "LaTeX" prevents
+ -- non-math elements on the page from being treated as math by
+ -- the javascript
+ return $ thespan ! [theclass "LaTeX"] $
+ if t == InlineMath
+ then primHtml ("$" ++ str ++ "$")
+ else primHtml ("$$" ++ str ++ "$$")
+ JsMath _ ->
+ return $ if t == InlineMath
+ then thespan ! [theclass "math"] $ primHtml str
+ else thediv ! [theclass "math"] $ primHtml str
+ MimeTeX url ->
+ return $ image ! [src (url ++ "?" ++ str),
+ alt str, title str]
+ GladTeX ->
+ return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
+ PlainMath ->
+ inlineListToHtml opts (readTeXMath str) >>=
+ return . (thespan ! [theclass "math"]))
+ (TeX str) -> case writerHTMLMathMethod opts of
+ LaTeXMathML _ -> do modify (\st -> st {stMath = True})
+ return $ primHtml str
+ _ -> return noHtml
+ (HtmlInline str) -> return $ primHtml str
+ (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s ->
+ return $ obfuscateLink opts str s
+ (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
+ linkText <- inlineListToHtml opts txt
+ return $ obfuscateLink opts (show linkText) s
+ (Link txt (s,tit)) -> do
+ linkText <- inlineListToHtml opts txt
+ return $ anchor ! ([href s] ++
+ if null tit then [] else [title tit]) $
+ linkText
+ (Image txt (s,tit)) -> do
+ alternate <- inlineListToHtml opts txt
+ let alternate' = renderFragment opts alternate
+ let attributes = [src s] ++
+ (if null tit
+ then []
+ else [title tit]) ++
+ if null txt
+ then []
+ else [alt alternate']
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
+ st <- get
+ let notes = stNotes st
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ -- push contents onto front of notes
+ put $ st {stNotes = (htmlContents:notes)}
+ return $ anchor ! [href ("#fn" ++ ref),
+ theclass "footnoteRef",
+ identifier ("fnref" ++ ref)] <<
+ sup << ref
+ (Cite _ il) -> inlineListToHtml opts il
+
+blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
+blockListToNote opts ref blocks =
+ -- If last block is Para or Plain, include the backlink at the end of
+ -- that block. Otherwise, insert a new Plain block with the backlink.
+ let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++
+ "\" class=\"footnoteBackLink\"" ++
+ " title=\"Jump back to footnote " ++ ref ++ "\">&#8617;</a>"]
+ blocks' = if null blocks
+ then []
+ else let lastBlock = last blocks
+ otherBlocks = init blocks
+ in case lastBlock of
+ (Para lst) -> otherBlocks ++
+ [Para (lst ++ backlink)]
+ (Plain lst) -> otherBlocks ++
+ [Plain (lst ++ backlink)]
+ _ -> otherBlocks ++ [lastBlock,
+ Plain backlink]
+ in do contents <- blockListToHtml opts blocks'
+ return $ li ! [identifier ("fn" ++ ref)] $ contents
+
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
new file mode 100644
index 000000000..f3cbf1acb
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -0,0 +1,331 @@
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.LaTeX
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( (\\), isSuffixOf, intercalate )
+import Data.Char ( toLower )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+data WriterState =
+ WriterState { stIncludes :: S.Set String -- strings to include in header
+ , stInNote :: Bool -- @True@ if we're in a note
+ , stOLLevel :: Int -- level of ordered list nesting
+ , stOptions :: WriterOptions -- writer options, so they don't have to be parameter
+ }
+
+-- | Add line to header.
+addToHeader :: String -> State WriterState ()
+addToHeader str = do
+ st <- get
+ let includes = stIncludes st
+ put st {stIncludes = S.insert str includes}
+
+-- | Convert Pandoc to LaTeX.
+writeLaTeX :: WriterOptions -> Pandoc -> String
+writeLaTeX options document =
+ render $ evalState (pandocToLaTeX options document) $
+ WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1, stOptions = options }
+
+pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToLaTeX options (Pandoc meta blocks) = do
+ main <- blockListToLaTeX blocks
+ head' <- if writerStandalone options
+ then latexHeader options meta
+ else return empty
+ let before = if null (writerIncludeBefore options)
+ then empty
+ else text (writerIncludeBefore options)
+ let after = if null (writerIncludeAfter options)
+ then empty
+ else text (writerIncludeAfter options)
+ let body = before $$ main $$ after
+ let toc = if writerTableOfContents options
+ then text "\\tableofcontents\n"
+ else empty
+ let foot = if writerStandalone options
+ then text "\\end{document}"
+ else empty
+ return $ head' $$ toc $$ body $$ foot
+
+-- | Insert bibliographic information into LaTeX header.
+latexHeader :: WriterOptions -- ^ Options, including LaTeX header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState Doc
+latexHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return empty
+ else inlineListToLaTeX title >>= return . inCmd "title"
+ headerIncludes <- get >>= return . S.toList . stIncludes
+ let extras = text $ unlines headerIncludes
+ let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes
+ then text "\\VerbatimFootnotes % allows verbatim text in footnotes"
+ else empty
+ let authorstext = text $ "\\author{" ++
+ intercalate "\\\\" (map stringToLaTeX authors) ++ "}"
+ let datetext = if date == ""
+ then empty
+ else text $ "\\date{" ++ stringToLaTeX date ++ "}"
+ let maketitle = if null title then empty else text "\\maketitle"
+ let secnumline = if (writerNumberSections options)
+ then empty
+ else text "\\setcounter{secnumdepth}{0}"
+ let baseHeader = text $ writerHeader options
+ let header = baseHeader $$ extras
+ return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$
+ datetext $$ text "\\begin{document}" $$ maketitle $$ text ""
+
+-- escape things as needed for LaTeX
+
+stringToLaTeX :: String -> String
+stringToLaTeX = escapeStringUsing latexEscapes
+ where latexEscapes = backslashEscapes "{}$%&_#" ++
+ [ ('^', "\\^{}")
+ , ('\\', "\\textbackslash{}")
+ , ('~', "\\ensuremath{\\sim}")
+ , ('|', "\\textbar{}")
+ , ('<', "\\textless{}")
+ , ('>', "\\textgreater{}")
+ , ('\160', "~")
+ ]
+
+-- | Puts contents into LaTeX command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '\\' <> text cmd <> braces contents
+
+-- | Remove all code elements from list of inline elements
+-- (because it's illegal to have verbatim inside some command arguments)
+deVerb :: [Inline] -> [Inline]
+deVerb [] = []
+deVerb ((Code str):rest) =
+ (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
+deVerb (other:rest) = other:(deVerb rest)
+
+-- | Convert Pandoc block element to LaTeX.
+blockToLaTeX :: Block -- ^ Block to convert
+ -> State WriterState Doc
+blockToLaTeX Null = return empty
+blockToLaTeX (Plain lst) = do
+ st <- get
+ let opts = stOptions st
+ wrapTeXIfNeeded opts True inlineListToLaTeX lst
+blockToLaTeX (Para lst) = do
+ st <- get
+ let opts = stOptions st
+ result <- wrapTeXIfNeeded opts True inlineListToLaTeX lst
+ return $ result <> char '\n'
+blockToLaTeX (BlockQuote lst) = do
+ contents <- blockListToLaTeX lst
+ return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}"
+blockToLaTeX (CodeBlock (_,classes,_) str) = do
+ st <- get
+ env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes
+ then return "code"
+ else if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ return "Verbatim"
+ else return "verbatim"
+ return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>
+ text ("\n\\end{" ++ env ++ "}")
+blockToLaTeX (RawHtml _) = return empty
+blockToLaTeX (BulletList lst) = do
+ items <- mapM listItemToLaTeX lst
+ return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}"
+blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
+ st <- get
+ let oldlevel = stOLLevel st
+ put $ st {stOLLevel = oldlevel + 1}
+ items <- mapM listItemToLaTeX lst
+ modify (\s -> s {stOLLevel = oldlevel})
+ exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
+ then do addToHeader "\\usepackage{enumerate}"
+ return $ char '[' <>
+ text (head (orderedListMarkers (1, numstyle,
+ numdelim))) <> char ']'
+ else return empty
+ let resetcounter = if start /= 1 && oldlevel <= 4
+ then text $ "\\setcounter{enum" ++
+ map toLower (toRomanNumeral oldlevel) ++
+ "}{" ++ show (start - 1) ++ "}"
+ else empty
+ return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+ vcat items $$ text "\\end{enumerate}"
+blockToLaTeX (DefinitionList lst) = do
+ items <- mapM defListItemToLaTeX lst
+ return $ text "\\begin{description}" $$ vcat items $$
+ text "\\end{description}"
+blockToLaTeX HorizontalRule = return $ text $
+ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n"
+blockToLaTeX (Header level lst) = do
+ txt <- inlineListToLaTeX (deVerb lst)
+ return $ if (level > 0) && (level <= 3)
+ then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++
+ "section{") <> txt <> text "}\n"
+ else txt <> char '\n'
+blockToLaTeX (Table caption aligns widths heads rows) = do
+ headers <- tableRowToLaTeX heads
+ captionText <- inlineListToLaTeX caption
+ rows' <- mapM tableRowToLaTeX rows
+ let colWidths = map (printf "%.2f") widths
+ let colDescriptors = concat $ zipWith
+ (\width align -> ">{\\PBS" ++
+ (case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright") ++
+ "\\hspace{0pt}}p{" ++ width ++
+ "\\columnwidth}")
+ colWidths aligns
+ let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
+ headers $$ text "\\hline" $$ vcat rows' $$
+ text "\\end{tabular}"
+ let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}"
+ addToHeader $ "\\usepackage{array}\n" ++
+ "% This is needed because raggedright in table elements redefines \\\\:\n" ++
+ "\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n" ++
+ "\\let\\PBS=\\PreserveBackslash"
+ return $ if isEmpty captionText
+ then centered tableBody <> char '\n'
+ else text "\\begin{table}[h]" $$ centered tableBody $$
+ inCmd "caption" captionText $$ text "\\end{table}\n"
+
+blockListToLaTeX :: [Block] -> State WriterState Doc
+blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
+
+tableRowToLaTeX :: [[Block]] -> State WriterState Doc
+tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=
+ return . ($$ text "\\\\") . foldl (\row item -> row $$
+ (if isEmpty row then text "" else text " & ") <> item) empty
+
+listItemToLaTeX :: [Block] -> State WriterState Doc
+listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
+ (nest 2)
+
+defListItemToLaTeX :: ([Inline], [Block]) -> State WriterState Doc
+defListItemToLaTeX (term, def) = do
+ term' <- inlineListToLaTeX $ deVerb term
+ def' <- blockListToLaTeX def
+ return $ text "\\item[" <> term' <> text "]" $$ def'
+
+-- | Convert list of inline elements to LaTeX.
+inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted Apostrophe = True
+isQuoted _ = False
+
+-- | Convert inline element to LaTeX
+inlineToLaTeX :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+inlineToLaTeX (Emph lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph"
+inlineToLaTeX (Strong lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"
+inlineToLaTeX (Strikeout lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ addToHeader "\\usepackage[normalem]{ulem}"
+ return $ inCmd "sout" contents
+inlineToLaTeX (Superscript lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript"
+inlineToLaTeX (Subscript lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ -- oddly, latex includes \textsuperscript but not \textsubscript
+ -- so we have to define it (using a different name so as not to conflict with memoir class):
+ addToHeader "\\newcommand{\\textsubscr}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}"
+ return $ inCmd "textsubscr" contents
+inlineToLaTeX (SmallCaps lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc"
+inlineToLaTeX (Cite _ lst) =
+ inlineListToLaTeX lst
+inlineToLaTeX (Code str) = do
+ st <- get
+ if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ else return ()
+ let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
+inlineToLaTeX (Quoted SingleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ char '`' <> s1 <> contents <> s2 <> char '\''
+inlineToLaTeX (Quoted DoubleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ text "``" <> s1 <> contents <> s2 <> text "''"
+inlineToLaTeX Apostrophe = return $ char '\''
+inlineToLaTeX EmDash = return $ text "---"
+inlineToLaTeX EnDash = return $ text "--"
+inlineToLaTeX Ellipses = return $ text "\\ldots{}"
+inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
+inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$'
+inlineToLaTeX (Math DisplayMath str) = return $ text "\\[" <> text str <> text "\\]"
+inlineToLaTeX (TeX str) = return $ text str
+inlineToLaTeX (HtmlInline _) = return empty
+inlineToLaTeX (LineBreak) = return $ text "\\\\"
+inlineToLaTeX Space = return $ char ' '
+inlineToLaTeX (Link txt (src, _)) = do
+ addToHeader "\\usepackage[breaklinks=true]{hyperref}"
+ case txt of
+ [Code x] | x == src -> -- autolink
+ do addToHeader "\\usepackage{url}"
+ return $ text $ "\\url{" ++ x ++ "}"
+ _ -> do contents <- inlineListToLaTeX $ deVerb txt
+ return $ text ("\\href{" ++ src ++ "}{") <> contents <>
+ char '}'
+inlineToLaTeX (Image _ (source, _)) = do
+ addToHeader "\\usepackage{graphicx}"
+ return $ text $ "\\includegraphics{" ++ source ++ "}"
+inlineToLaTeX (Note contents) = do
+ st <- get
+ put (st {stInNote = True})
+ contents' <- blockListToLaTeX contents
+ modify (\s -> s {stInNote = False})
+ let rawnote = stripTrailingNewlines $ render contents'
+ -- note: a \n before } is needed when note ends with a Verbatim environment
+ let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote
+ return $ text "\\footnote{" <>
+ text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
new file mode 100644
index 000000000..210c7ed07
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -0,0 +1,301 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Man
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to groff man page format.
+
+-}
+module Text.Pandoc.Writers.Man ( writeMan) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( isPrefixOf, drop, nub, intersperse, intercalate )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Preprocessors = [String] -- e.g. "t" for tbl
+type WriterState = (Notes, Preprocessors)
+
+-- | Convert Pandoc to Man.
+writeMan :: WriterOptions -> Pandoc -> String
+writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
+
+-- | Return groff man representation of document.
+pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMan opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ (head', foot) <- metaToMan opts meta
+ body <- blockListToMan opts blocks
+ (notes, preprocessors) <- get
+ let preamble = if null preprocessors || not (writerStandalone opts)
+ then empty
+ else text $ ".\\\" " ++ concat (nub preprocessors)
+ notes' <- notesToMan opts (reverse notes)
+ return $ preamble $$ head' $$ before' $$ body $$ notes' $$ foot $$ after'
+
+-- | Insert bibliographic information into Man header and footer.
+metaToMan :: WriterOptions -- ^ Options, including Man header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState (Doc, Doc)
+metaToMan options (Meta title authors date) = do
+ titleText <- inlineListToMan options title
+ let (cmdName, rest) = break (== ' ') $ render titleText
+ let (title', section) = case reverse cmdName of
+ (')':d:'(':xs) | d `elem` ['0'..'9'] ->
+ (text (reverse xs), char d)
+ xs -> (text (reverse xs), doubleQuotes empty)
+ let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $
+ splitBy '|' rest
+ let head' = (text ".TH") <+> title' <+> section <+>
+ doubleQuotes (text date) <+> hsep extras
+ let foot = case length authors of
+ 0 -> empty
+ 1 -> text ".SH AUTHOR" $$ (text $ intercalate ", " authors)
+ _ -> text ".SH AUTHORS" $$ (text $ intercalate ", " authors)
+ return $ if writerStandalone options
+ then (head', foot)
+ else (empty, empty)
+
+-- | Return man representation of notes.
+notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMan opts notes =
+ if null notes
+ then return empty
+ else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ return . (text ".SH NOTES" $$) . vcat
+
+-- | Return man representation of a note.
+noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMan opts num note = do
+ contents <- blockListToMan opts note
+ let marker = text "\n.SS [" <> text (show num) <> char ']'
+ return $ marker $$ contents
+
+-- | Association list of characters to escape.
+manEscapes :: [(Char, String)]
+manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes ".@\\"
+
+-- | Escape special characters for Man.
+escapeString :: String -> String
+escapeString = escapeStringUsing manEscapes
+
+-- | Escape a literal (code) section for Man.
+escapeCode :: String -> String
+escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ")
+
+-- | Convert Pandoc block element to man.
+blockToMan :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMan _ Null = return empty
+blockToMan opts (Plain inlines) =
+ wrapIfNeeded opts (inlineListToMan opts) inlines
+blockToMan opts (Para inlines) = do
+ contents <- wrapIfNeeded opts (inlineListToMan opts) inlines
+ return $ text ".PP" $$ contents
+blockToMan _ (RawHtml str) = return $ text str
+blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *"
+blockToMan opts (Header level inlines) = do
+ contents <- inlineListToMan opts inlines
+ let heading = case level of
+ 1 -> ".SH "
+ _ -> ".SS "
+ return $ text heading <> contents
+blockToMan _ (CodeBlock _ str) = return $
+ text ".PP" $$ text "\\f[CR]" $$
+ text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
+blockToMan opts (BlockQuote blocks) = do
+ contents <- blockListToMan opts blocks
+ return $ text ".RS" $$ contents $$ text ".RE"
+blockToMan opts (Table caption alignments widths headers rows) =
+ let aligncode AlignLeft = "l"
+ aligncode AlignRight = "r"
+ aligncode AlignCenter = "c"
+ aligncode AlignDefault = "l"
+ in do
+ caption' <- inlineListToMan opts caption
+ modify (\(notes, preprocessors) -> (notes, "t":preprocessors))
+ let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths
+ -- 78n default width - 8n indent = 70n
+ let coldescriptions = text $ intercalate " "
+ (zipWith (\align width -> aligncode align ++ width)
+ alignments iwidths) ++ "."
+ colheadings <- mapM (blockListToMan opts) headers
+ let makeRow cols = text "T{" $$
+ (vcat $ intersperse (text "T}@T{") cols) $$
+ text "T}"
+ let colheadings' = makeRow colheadings
+ body <- mapM (\row -> do
+ cols <- mapM (blockListToMan opts) row
+ return $ makeRow cols) rows
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "tab(@);" $$ coldescriptions $$
+ colheadings' $$ char '_' $$ vcat body $$ text ".TE"
+
+blockToMan opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMan opts) items
+ return (vcat contents)
+blockToMan opts (OrderedList attribs items) = do
+ let markers = take (length items) $ orderedListMarkers attribs
+ let indent = 1 + (maximum $ map length markers)
+ contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
+ zip markers items
+ return (vcat contents)
+blockToMan opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMan opts) items
+ return (vcat contents)
+
+-- | Convert bullet list item (list of blocks) to man.
+bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMan _ [] = return empty
+bulletListItemToMan opts ((Para first):rest) =
+ bulletListItemToMan opts ((Plain first):rest)
+bulletListItemToMan opts ((Plain first):rest) = do
+ first' <- blockToMan opts (Plain first)
+ rest' <- blockListToMan opts rest
+ let first'' = text ".IP \\[bu] 2" $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 2" $$ rest' $$ text ".RE"
+ return (first'' $$ rest'')
+bulletListItemToMan opts (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
+
+-- | Convert ordered list item (a list of blocks) to man.
+orderedListItemToMan :: WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMan _ _ _ [] = return empty
+orderedListItemToMan opts num indent ((Para first):rest) =
+ orderedListItemToMan opts num indent ((Plain first):rest)
+orderedListItemToMan opts num indent (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ let num' = printf ("%" ++ show (indent - 1) ++ "s") num
+ let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 4" $$ rest' $$ text ".RE"
+ return $ first'' $$ rest''
+
+-- | Convert definition list item (label, list of blocks) to man.
+definitionListItemToMan :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMan opts (label, items) = do
+ labelText <- inlineListToMan opts label
+ contents <- if null items
+ then return empty
+ else do
+ let (first, rest) = case items of
+ ((Para x):y) -> (Plain x,y)
+ (x:y) -> (x,y)
+ [] -> error "items is null"
+ rest' <- mapM (\item -> blockToMan opts item)
+ rest >>= (return . vcat)
+ first' <- blockToMan opts first
+ return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ return $ text ".TP\n.B " <> labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to man.
+blockListToMan :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMan opts blocks =
+ mapM (blockToMan opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to man.
+inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
+
+-- | Convert Pandoc inline element to man.
+inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMan opts (Emph lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[I]" <> contents <> text "\\f[]"
+inlineToMan opts (Strong lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[B]" <> contents <> text "\\f[]"
+inlineToMan opts (Strikeout lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToMan opts (Superscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '^' <> contents <> char '^'
+inlineToMan opts (Subscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '~' <> contents <> char '~'
+inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported
+inlineToMan opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '`' <> contents <> char '\''
+inlineToMan opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\[lq]" <> contents <> text "\\[rq]"
+inlineToMan opts (Cite _ lst) =
+ inlineListToMan opts lst
+inlineToMan _ EmDash = return $ text "\\[em]"
+inlineToMan _ EnDash = return $ text "\\[en]"
+inlineToMan _ Apostrophe = return $ char '\''
+inlineToMan _ Ellipses = return $ text "\\&..."
+inlineToMan _ (Code str) =
+ return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
+inlineToMan _ (Str str) = return $ text $ escapeString str
+inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str)
+inlineToMan opts (Math DisplayMath str) = do
+ contents <- inlineToMan opts (Code str)
+ return $ text ".RS" $$ contents $$ text ".RE"
+inlineToMan _ (TeX _) = return empty
+inlineToMan _ (HtmlInline str) = return $ text $ escapeCode str
+inlineToMan _ (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n"
+inlineToMan _ Space = return $ char ' '
+inlineToMan opts (Link txt (src, _)) = do
+ linktext <- inlineListToMan opts txt
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ if txt == [Code srcSuffix]
+ then char '<' <> text srcSuffix <> char '>'
+ else linktext <> text " (" <> text src <> char ')'
+inlineToMan opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMan opts (Link txt (source, tit))
+ return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
+inlineToMan _ (Note contents) = do
+ modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ char '[' <> text ref <> char ']'
+
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
new file mode 100644
index 000000000..70d1f0c91
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -0,0 +1,396 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Markdown
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to markdown-formatted plain text.
+
+Markdown: <http://daringfireball.net/projects/markdown/>
+-}
+module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Text.ParserCombinators.Parsec ( parse, GenParser )
+import Data.List ( group, isPrefixOf, drop, find, intersperse, intercalate )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs)
+
+-- | Convert Pandoc to Markdown.
+writeMarkdown :: WriterOptions -> Pandoc -> String
+writeMarkdown opts document =
+ render $ evalState (pandocToMarkdown opts document) ([],[])
+
+-- | Return markdown representation of document.
+pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMarkdown opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ metaBlock <- metaToMarkdown opts meta
+ let head' = if writerStandalone opts
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ let headerBlocks = filter isHeaderBlock blocks
+ let toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks
+ else empty
+ body <- blockListToMarkdown opts blocks
+ (notes, _) <- get
+ notes' <- notesToMarkdown opts (reverse notes)
+ (_, refs) <- get -- note that the notes may contain refs
+ refs' <- keyTableToMarkdown opts (reverse refs)
+ return $ head' $+$ before' $+$ toc $+$ body $+$ text "" $+$
+ notes' $+$ text "" $+$ refs' $+$ after'
+
+-- | Return markdown representation of reference key table.
+keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToMarkdown opts (label, (src, tit)) = do
+ label' <- inlineListToMarkdown opts label
+ let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
+ text src <> tit'
+
+-- | Return markdown representation of notes.
+notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMarkdown opts notes =
+ mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return markdown representation of a note.
+noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMarkdown opts num blocks = do
+ contents <- blockListToMarkdown opts blocks
+ let marker = text "[^" <> text (show num) <> text "]:"
+ return $ hang' marker (writerTabStop opts) contents
+
+-- | Escape special characters for Markdown.
+escapeString :: String -> String
+escapeString = escapeStringUsing markdownEscapes
+ where markdownEscapes = backslashEscapes "`<\\*_^~"
+
+-- | Convert bibliographic information into Markdown header.
+metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc
+metaToMarkdown opts (Meta title authors date) = do
+ title' <- titleToMarkdown opts title
+ authors' <- authorsToMarkdown authors
+ date' <- dateToMarkdown date
+ return $ title' $+$ authors' $+$ date'
+
+titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToMarkdown _ [] = return empty
+titleToMarkdown opts lst = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "% " <> contents
+
+authorsToMarkdown :: [String] -> State WriterState Doc
+authorsToMarkdown [] = return empty
+authorsToMarkdown lst = return $
+ text "% " <> text (intercalate ", " (map escapeString lst))
+
+dateToMarkdown :: String -> State WriterState Doc
+dateToMarkdown [] = return empty
+dateToMarkdown str = return $ text "% " <> text (escapeString str)
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: WriterOptions -> [Block] -> Doc
+tableOfContents opts headers =
+ let opts' = opts { writerIgnoreNotes = True }
+ contents = BulletList $ map elementToListItem $ hierarchicalize headers
+ in evalState (blockToMarkdown opts' contents) ([],[])
+
+-- | Converts an Element to a list item for a table of contents,
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
+ if null subsecs
+ then []
+ else [BulletList $ map elementToListItem subsecs]
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: GenParser Char st Char
+olMarker = do (start, style', delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style' == UpperAlpha || (style' == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case parse olMarker "para start" str of
+ Left _ -> False
+ Right _ -> True
+
+wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedMarkdown opts inlines = do
+ let chunks = splitBy LineBreak inlines
+ let chunks' = if null chunks
+ then []
+ else (map (++ [Str " "]) $ init chunks) ++ [last chunks]
+ lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks'
+ return $ vcat lns
+
+-- | Convert Pandoc block element to markdown.
+blockToMarkdown :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMarkdown _ Null = return empty
+blockToMarkdown opts (Plain inlines) =
+ wrappedMarkdown opts inlines
+blockToMarkdown opts (Para inlines) = do
+ contents <- wrappedMarkdown opts inlines
+ -- escape if para starts with ordered list marker
+ let esc = if (not (writerStrictMarkdown opts)) &&
+ beginsWithOrderedListMarker (render contents)
+ then char '\\'
+ else empty
+ return $ esc <> contents <> text "\n"
+blockToMarkdown _ (RawHtml str) = return $ text str
+blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n"
+blockToMarkdown opts (Header level inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ -- use setext style headers if in literate haskell mode.
+ -- ghc interprets '#' characters in column 1 as line number specifiers.
+ if writerLiterateHaskell opts
+ then let len = length $ render contents
+ in return $ contents <> text "\n" <>
+ case level of
+ 1 -> text $ replicate len '=' ++ "\n"
+ 2 -> text $ replicate len '-' ++ "\n"
+ _ -> empty
+ else return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
+blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes &&
+ writerLiterateHaskell opts =
+ return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n"
+blockToMarkdown opts (CodeBlock _ str) = return $
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToMarkdown opts (BlockQuote blocks) = do
+ -- if we're writing literate haskell, put a space before the bird tracks
+ -- so they won't be interpreted as lhs...
+ let leader = if writerLiterateHaskell opts
+ then text . (" > " ++)
+ else text . ("> " ++)
+ contents <- blockListToMarkdown opts blocks
+ return $ (vcat $ map leader $ lines $ render contents) <>
+ text "\n"
+blockToMarkdown opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToMarkdown opts caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM (blockListToMarkdown opts) headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> leftAlignBlock
+ AlignCenter -> centerAlignBlock
+ AlignRight -> rightAlignBlock
+ AlignDefault -> leftAlignBlock
+ let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
+ (zipWith docToBlock widthsInChars)
+ let head' = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
+ return $ makeRow cols) rows
+ let maxRowHeight = maximum $ map heightOfBlock (head':rows')
+ let isMultilineTable = maxRowHeight > 1
+ let underline = hsep $
+ map (\width -> text $ replicate width '-') widthsInChars
+ let border = if isMultilineTable
+ then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-'
+ else empty
+ let spacer = if isMultilineTable
+ then text ""
+ else empty
+ let body = vcat $ intersperse spacer $ map blockToDoc rows'
+ return $ (nest 2 $ border $+$ (blockToDoc head') $+$ underline $+$ body $+$
+ border $+$ caption'') <> text "\n"
+blockToMarkdown opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (OrderedList attribs items) = do
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip markers' items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to markdown.
+bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMarkdown opts items = do
+ contents <- blockListToMarkdown opts items
+ return $ hang' (text "- ") (writerTabStop opts) contents
+
+-- | Convert ordered list item (a list of blocks) to markdown.
+orderedListItemToMarkdown :: WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMarkdown opts marker items = do
+ contents <- blockListToMarkdown opts items
+ return $ hsep [nest (min (3 - length marker) 0) (text marker),
+ nest (writerTabStop opts) contents]
+
+-- | Convert definition list item (label, list of blocks) to markdown.
+definitionListItemToMarkdown :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMarkdown opts (label, items) = do
+ labelText <- inlineListToMarkdown opts label
+ let tabStop = writerTabStop opts
+ let leader = char ':'
+ contents <- mapM (\item -> blockToMarkdown opts item >>=
+ (\txt -> return (leader $$ nest tabStop txt)))
+ items >>= return . vcat
+ return $ labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to markdown.
+blockListToMarkdown :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMarkdown opts blocks =
+ mapM (blockToMarkdown opts) blocks >>= return . vcat
+
+-- | Get reference for target; if none exists, create unique one and return.
+-- Prefer label if possible; otherwise, generate a unique key.
+getReference :: [Inline] -> Target -> State WriterState [Inline]
+getReference label (src, tit) = do
+ (_,refs) <- get
+ case find ((== (src, tit)) . snd) refs of
+ Just (ref, _) -> return ref
+ Nothing -> do
+ let label' = case find ((== label) . fst) refs of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> not (any (== [Str (show n)])
+ (map fst refs))) [1..(10000 :: Integer)] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
+ Nothing -> label
+ modify (\(notes, refs') -> (notes, (label', (src,tit)):refs'))
+ return label'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMarkdown opts lst =
+ mapM (inlineToMarkdown opts) lst >>= return . hcat
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMarkdown opts (Emph lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '*' <> contents <> char '*'
+inlineToMarkdown opts (Strong lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToMarkdown opts (Strikeout lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "~~" <> contents <> text "~~"
+inlineToMarkdown opts (Superscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '^' <> contents' <> char '^'
+inlineToMarkdown opts (Subscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '~' <> contents' <> char '~'
+inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToMarkdown _ EmDash = return $ text "--"
+inlineToMarkdown _ EnDash = return $ char '-'
+inlineToMarkdown _ Apostrophe = return $ char '\''
+inlineToMarkdown _ Ellipses = return $ text "..."
+inlineToMarkdown _ (Code str) =
+ let tickGroups = filter (\s -> '`' `elem` s) $ group str
+ longest = if null tickGroups
+ then 0
+ else maximum $ map length tickGroups
+ marker = replicate (longest + 1) '`'
+ spacer = if (longest == 0) then "" else " " in
+ return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
+inlineToMarkdown _ (Str str) = return $ text $ escapeString str
+inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$'
+inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$"
+inlineToMarkdown _ (TeX str) = return $ text str
+inlineToMarkdown _ (HtmlInline str) = return $ text str
+inlineToMarkdown _ (LineBreak) = return $ text " \n"
+inlineToMarkdown _ Space = return $ char ' '
+inlineToMarkdown _ (Cite cits _ ) = do
+ let format (a,b) xs = text a <>
+ (if b /= [] then char '@' else empty) <>
+ text b <>
+ (if isEmpty xs then empty else text "; ") <>
+ xs
+ return $ char '[' <> foldr format empty cits <> char ']'
+inlineToMarkdown opts (Link txt (src, tit)) = do
+ linktext <- inlineListToMarkdown opts txt
+ let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useRefLinks = writerReferenceLinks opts
+ let useAuto = null tit && txt == [Code srcSuffix]
+ ref <- if useRefLinks then getReference txt (src, tit) else return []
+ reftext <- inlineListToMarkdown opts ref
+ return $ if useAuto
+ then char '<' <> text srcSuffix <> char '>'
+ else if useRefLinks
+ then let first = char '[' <> linktext <> char ']'
+ second = if txt == ref
+ then text "[]"
+ else char '[' <> reftext <> char ']'
+ in first <> second
+ else char '[' <> linktext <> char ']' <>
+ char '(' <> text src <> linktitle <> char ')'
+inlineToMarkdown opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMarkdown opts (Link txt (source, tit))
+ return $ char '!' <> linkPart
+inlineToMarkdown _ (Note contents) = do
+ modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ text "[^" <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
new file mode 100644
index 000000000..c5f6b3bf1
--- /dev/null
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -0,0 +1,396 @@
+{-
+Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.MediaWiki
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to MediaWiki markup.
+
+MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki>
+-}
+module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.XML ( escapeStringForXML )
+import Data.List ( intersect )
+import Network.URI ( isURI )
+import Control.Monad.State
+
+data WriterState = WriterState {
+ stNotes :: Bool -- True if there are notes
+ , stListLevel :: [Char] -- String at beginning of list items, e.g. "**"
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+-- | Convert Pandoc to MediaWiki.
+writeMediaWiki :: WriterOptions -> Pandoc -> String
+writeMediaWiki opts document =
+ evalState (pandocToMediaWiki opts document)
+ (WriterState { stNotes = False, stListLevel = [], stUseTags = False })
+
+-- | Return MediaWiki representation of document.
+pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String
+pandocToMediaWiki opts (Pandoc _ blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let head' = if writerStandalone opts
+ then writerHeader opts
+ else ""
+ let toc = if writerTableOfContents opts
+ then "__TOC__\n"
+ else ""
+ body <- blockListToMediaWiki opts blocks
+ notesExist <- get >>= return . stNotes
+ let notes = if notesExist
+ then "\n== Notes ==\n<references />"
+ else ""
+ return $ head' ++ before ++ toc ++ body ++ after ++ notes
+
+-- | Escape special characters for MediaWiki.
+escapeString :: String -> String
+escapeString = escapeStringForXML
+
+-- | Convert Pandoc block element to MediaWiki.
+blockToMediaWiki :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState String
+
+blockToMediaWiki _ Null = return ""
+
+blockToMediaWiki opts (Plain inlines) =
+ inlineListToMediaWiki opts inlines
+
+blockToMediaWiki opts (Para inlines) = do
+ useTags <- get >>= return . stUseTags
+ listLevel <- get >>= return . stListLevel
+ contents <- inlineListToMediaWiki opts inlines
+ return $ if useTags
+ then "<p>" ++ contents ++ "</p>"
+ else contents ++ if null listLevel then "\n" else ""
+
+blockToMediaWiki _ (RawHtml str) = return str
+
+blockToMediaWiki _ HorizontalRule = return "\n-----\n"
+
+blockToMediaWiki opts (Header level inlines) = do
+ contents <- inlineListToMediaWiki opts inlines
+ let eqs = replicate (level + 1) '='
+ return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n"
+
+blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do
+ let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp",
+ "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm",
+ "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran",
+ "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5",
+ "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc",
+ "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql",
+ "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic",
+ "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl",
+ "visualfoxpro", "winbatch", "xml", "xpp", "z80"]
+ let (beg, end) = if null at
+ then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>")
+ else ("<source lang=\"" ++ head at ++ "\">", "</source>")
+ return $ beg ++ escapeString str ++ end
+
+blockToMediaWiki opts (BlockQuote blocks) = do
+ contents <- blockListToMediaWiki opts blocks
+ return $ "<blockquote>" ++ contents ++ "</blockquote>"
+
+blockToMediaWiki opts (Table caption aligns widths headers rows) = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null caption
+ then return ""
+ else do
+ c <- inlineListToMediaWiki opts caption
+ return $ "<caption>" ++ c ++ "</caption>"
+ colHeads <- colHeadsToMediaWiki opts alignStrings widths headers
+ rows' <- mapM (tableRowToMediaWiki opts alignStrings) rows
+ return $ "<table>\n" ++ captionDoc ++ colHeads ++ vcat rows' ++ "\n</table>"
+
+blockToMediaWiki opts x@(BulletList items) = do
+ oldUseTags <- get >>= return . stUseTags
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (listItemToMediaWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<ul>\n" ++ vcat contents ++ "</ul>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
+ contents <- mapM (listItemToMediaWiki opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents
+
+blockToMediaWiki opts x@(OrderedList attribs items) = do
+ oldUseTags <- get >>= return . stUseTags
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (listItemToMediaWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ "</ol>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "#" }
+ contents <- mapM (listItemToMediaWiki opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents
+
+blockToMediaWiki opts x@(DefinitionList items) = do
+ oldUseTags <- get >>= return . stUseTags
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ modify $ \s -> s { stUseTags = True }
+ contents <- mapM (definitionListItemToMediaWiki opts) items
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return $ "<dl>\n" ++ vcat contents ++ "</dl>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ ";" }
+ contents <- mapM (definitionListItemToMediaWiki opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet or ordered list item (list of blocks) to MediaWiki.
+listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String
+listItemToMediaWiki opts items = do
+ contents <- blockListToMediaWiki opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<li>" ++ contents ++ "</li>"
+ else do
+ marker <- get >>= return . stListLevel
+ return $ marker ++ " " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to MediaWiki.
+definitionListItemToMediaWiki :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState String
+definitionListItemToMediaWiki opts (label, items) = do
+ labelText <- inlineListToMediaWiki opts label
+ contents <- blockListToMediaWiki opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<dt>" ++ labelText ++ "</dt>\n<dd>" ++ contents ++ "</dd>"
+ else do
+ marker <- get >>= return . stListLevel
+ return $ marker ++ " " ++ labelText ++ "\n" ++ (init marker ++ ": ") ++ contents
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (num, sty, _) items -> all isSimpleListItem items &&
+ num == 1 && sty `elem` [DefaultStyle, Decimal]
+ DefinitionList items -> all isSimpleListItem $ map snd items
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ DefinitionList _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ DefinitionList _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+tr :: String -> String
+tr x = "<tr>\n" ++ x ++ "\n</tr>"
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat [] = ""
+vcat [x] = x
+vcat (x:xs) = x ++ "\n" ++ vcat xs
+
+-- Auxiliary functions for tables:
+
+colHeadsToMediaWiki :: WriterOptions
+ -> [[Char]]
+ -> [Double]
+ -> [[Block]]
+ -> State WriterState String
+colHeadsToMediaWiki opts alignStrings widths headers = do
+ heads <- sequence $ zipWith3
+ (\alignment columnwidth item -> tableItemToMediaWiki opts "th" alignment columnwidth item)
+ alignStrings widths headers
+ return $ tr $ vcat heads
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToMediaWiki :: WriterOptions
+ -> [[Char]]
+ -> [[Block]]
+ -> State WriterState String
+tableRowToMediaWiki opts aligns columns =
+ (sequence $ zipWith3 (tableItemToMediaWiki opts "td") aligns (repeat 0) columns) >>=
+ return . tr . vcat
+
+tableItemToMediaWiki :: WriterOptions
+ -> [Char]
+ -> [Char]
+ -> Double
+ -> [Block]
+ -> State WriterState String
+tableItemToMediaWiki opts tag' align' width' item = do
+ contents <- blockListToMediaWiki opts item
+ let attrib = " align=\"" ++ align' ++ "\"" ++
+ if width' /= 0
+ then " style=\"width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;\""
+ else ""
+ return $ "<" ++ tag' ++ attrib ++ ">" ++ contents ++ "</" ++ tag' ++ ">"
+
+-- | Convert list of Pandoc block elements to MediaWiki.
+blockListToMediaWiki :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState String
+blockListToMediaWiki opts blocks =
+ mapM (blockToMediaWiki opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to MediaWiki.
+inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToMediaWiki opts lst =
+ mapM (inlineToMediaWiki opts) lst >>= return . concat
+
+-- | Convert Pandoc inline element to MediaWiki.
+inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String
+
+inlineToMediaWiki opts (Emph lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "''" ++ contents ++ "''"
+
+inlineToMediaWiki opts (Strong lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "'''" ++ contents ++ "'''"
+
+inlineToMediaWiki opts (Strikeout lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "<s>" ++ contents ++ "</s>"
+
+inlineToMediaWiki opts (Superscript lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "<sup>" ++ contents ++ "</sup>"
+
+inlineToMediaWiki opts (Subscript lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "<sub>" ++ contents ++ "</sub>"
+
+inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst
+
+inlineToMediaWiki opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "&lsquo;" ++ contents ++ "&rsquo;"
+
+inlineToMediaWiki opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMediaWiki opts lst
+ return $ "&ldquo;" ++ contents ++ "&rdquo;"
+
+inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst
+
+inlineToMediaWiki _ EmDash = return "&mdash;"
+
+inlineToMediaWiki _ EnDash = return "&ndash;"
+
+inlineToMediaWiki _ Apostrophe = return "&rsquo;"
+
+inlineToMediaWiki _ Ellipses = return "&hellip;"
+
+inlineToMediaWiki _ (Code str) =
+ return $ "<tt>" ++ (escapeString str) ++ "</tt>"
+
+inlineToMediaWiki _ (Str str) = return $ escapeString str
+
+inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
+ -- note: str should NOT be escaped
+
+inlineToMediaWiki _ (TeX _) = return ""
+
+inlineToMediaWiki _ (HtmlInline str) = return str
+
+inlineToMediaWiki _ (LineBreak) = return "<br />\n"
+
+inlineToMediaWiki _ Space = return " "
+
+inlineToMediaWiki opts (Link txt (src, _)) = do
+ link <- inlineListToMediaWiki opts txt
+ let useAuto = txt == [Code src]
+ let src' = if isURI src
+ then src
+ else if take 1 src == "/"
+ then "http://{{SERVERNAME}}" ++ src
+ else "http://{{SERVERNAME}}/" ++ src
+ return $ if useAuto
+ then src'
+ else "[" ++ src' ++ " " ++ link ++ "]"
+
+inlineToMediaWiki opts (Image alt (source, tit)) = do
+ alt' <- inlineListToMediaWiki opts alt
+ let txt = if (null tit)
+ then if null alt
+ then ""
+ else "|" ++ alt'
+ else "|" ++ tit
+ return $ "[[Image:" ++ source ++ txt ++ "]]"
+
+inlineToMediaWiki opts (Note contents) = do
+ contents' <- blockListToMediaWiki opts contents
+ modify (\s -> s { stNotes = True })
+ return $ "<ref>" ++ contents' ++ "</ref>"
+ -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
new file mode 100644
index 000000000..52438f81e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -0,0 +1,568 @@
+{-# LANGUAGE PatternGuards #-}
+{-
+Copyright (C) 2008 Andrea Rossato <andrea.rossato@ing.unitn.it>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.OpenDocument
+ Copyright : Copyright (C) 2008 Andrea Rossato
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to OpenDocument XML.
+-}
+module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.XML
+import Text.Pandoc.Readers.TeXMath
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Printf ( printf )
+import Control.Applicative ( (<$>) )
+import Control.Arrow ( (***), (>>>) )
+import Control.Monad.State hiding ( when )
+import Data.Char (chr)
+import Data.List (intercalate)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara :: Block -> Block
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+--
+-- OpenDocument writer
+--
+
+data WriterState =
+ WriterState { stNotes :: [Doc]
+ , stTableStyles :: [Doc]
+ , stParaStyles :: [Doc]
+ , stListStyles :: [(Int, [Doc])]
+ , stTextStyles :: [Doc]
+ , stTextStyleAttr :: [(TextStyle,[(String,String)])]
+ , stIndentPara :: Int
+ , stInDefinition :: Bool
+ , stTight :: Bool
+ }
+
+defaultWriterState :: WriterState
+defaultWriterState =
+ WriterState { stNotes = []
+ , stTableStyles = []
+ , stParaStyles = []
+ , stListStyles = []
+ , stTextStyles = []
+ , stTextStyleAttr = []
+ , stIndentPara = 0
+ , stInDefinition = False
+ , stTight = False
+ }
+
+when :: Bool -> Doc -> Doc
+when p a = if p then a else empty
+
+addTableStyle :: Doc -> State WriterState ()
+addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s }
+
+addNote :: Doc -> State WriterState ()
+addNote i = modify $ \s -> s { stNotes = i : stNotes s }
+
+addParaStyle :: Doc -> State WriterState ()
+addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s }
+
+addTextStyle :: Doc -> State WriterState ()
+addTextStyle i = modify $ \s -> s { stTextStyles = i : stTextStyles s }
+
+addTextStyleAttr :: (TextStyle, [(String,String)]) -> State WriterState ()
+addTextStyleAttr i = modify $ \s -> s { stTextStyleAttr = i : stTextStyleAttr s }
+
+rmTextStyleAttr :: State WriterState ()
+rmTextStyleAttr = modify $ \s -> s { stTextStyleAttr = rmHead (stTextStyleAttr s) }
+ where rmHead l = if l /= [] then tail l else []
+
+increaseIndent :: State WriterState ()
+increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s }
+
+resetIndent :: State WriterState ()
+resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 }
+
+inTightList :: State WriterState a -> State WriterState a
+inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r ->
+ modify (\s -> s { stTight = False }) >> return r
+
+setInDefinitionList :: Bool -> State WriterState ()
+setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
+
+inParagraphTags :: Doc -> Doc
+inParagraphTags = inTags False "text:p" [("text:style-name", "Text_20_body")]
+
+inParagraphTagsWithStyle :: String -> Doc -> Doc
+inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
+
+inSpanTags :: String -> Doc -> Doc
+inSpanTags s = inTags False "text:span" [("text:style-name",s)]
+
+withTextStyle :: TextStyle -> State WriterState a -> State WriterState a
+withTextStyle s f = addTextStyleAttr (s,textStyleAttr s) >>
+ f >>= \r -> rmTextStyleAttr >> return r
+
+inTextStyle :: Doc -> State WriterState Doc
+inTextStyle d = do
+ at <- gets stTextStyleAttr
+ if at == []
+ then return d
+ else do
+ tn <- (+) 1 . length <$> gets stTextStyles
+ addTextStyle $ inTags False "style:style" [("style:name" , "T" ++ show tn)
+ ,("style:family", "text" )]
+ $ selfClosingTag "style:text-properties" (concatMap snd at)
+ return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d
+
+inHeaderTags :: Int -> Doc -> Doc
+inHeaderTags i = inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
+ , ("text:outline-level", show i)]
+
+inQuotes :: QuoteType -> Doc -> Doc
+inQuotes SingleQuote s = text "&#8216;" <> s <> text "&#8217;"
+inQuotes DoubleQuote s = text "&#8220;" <> s <> text "&#8221;"
+
+handleSpaces :: String -> Doc
+handleSpaces s
+ | ( ' ':_) <- s = genTag s
+ | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x
+ | otherwise = rm s
+ where
+ genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>)
+ tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)]
+ rm ( ' ':xs) = char ' ' <> genTag xs
+ rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs
+ rm ( x:xs) = char x <> rm xs
+ rm [] = empty
+
+-- | Convert list of authors to a docbook <author> section
+authorToOpenDocument :: [Char] -> Doc
+authorToOpenDocument name =
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = removeLeadingSpace rest
+ in inParagraphTagsWithStyle "Author" $
+ (text $ escapeStringForXML firstname) <+>
+ (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (intercalate " " (take (n-1) namewords), last namewords)
+ in inParagraphTagsWithStyle "Author" $
+ (text $ escapeStringForXML firstname) <+>
+ (text $ escapeStringForXML lastname)
+
+-- | Convert Pandoc document to string in OpenDocument format.
+writeOpenDocument :: WriterOptions -> Pandoc -> String
+writeOpenDocument opts (Pandoc (Meta title authors date) blocks) =
+ let root = inTags True "office:document-content" openDocumentNameSpaces
+ header = when (writerStandalone opts) $ text (writerHeader opts)
+ title' = case runState (wrap opts title) defaultWriterState of
+ (t,_) -> if isEmpty t then empty else inHeaderTags 1 t
+ authors' = when (authors /= []) $ vcat (map authorToOpenDocument authors)
+ date' = when (date /= []) $
+ inParagraphTagsWithStyle "Date" (text $ escapeStringForXML date)
+ meta = when (writerStandalone opts) $ title' $$ authors' $$ date'
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
+ (doc, s) = runState (blocksToOpenDocument opts blocks) defaultWriterState
+ body = (if null before then empty else text before) $$
+ doc $$
+ (if null after then empty else text after)
+ body' = if writerStandalone opts
+ then inTagsIndented "office:body" $
+ inTagsIndented "office:text" (meta $$ body)
+ else body
+ styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s
+ listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l)
+ listStyles = map listStyle (stListStyles s)
+ in render $ header $$ root (generateStyles (styles ++ listStyles) $$ body' $$ text "")
+
+withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc
+withParagraphStyle o s (b:bs)
+ | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l
+ | otherwise = go =<< blockToOpenDocument o b
+ where go i = ($$) i <$> withParagraphStyle o s bs
+withParagraphStyle _ _ [] = return empty
+
+inPreformattedTags :: String -> State WriterState Doc
+inPreformattedTags s = do
+ n <- paraStyle "Preformatted_20_Text" []
+ return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s
+
+orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc
+orderedListToOpenDocument o pn bs =
+ vcat . map (inTagsIndented "text:list-item") <$>
+ mapM (orderedItemToOpenDocument o pn . map plainToPara) bs
+
+orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+orderedItemToOpenDocument o n (b:bs)
+ | OrderedList a l <- b = newLevel a l
+ | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l
+ | otherwise = go =<< blockToOpenDocument o b
+ where
+ go i = ($$) i <$> orderedItemToOpenDocument o n bs
+ newLevel a l = do
+ nn <- length <$> gets stParaStyles
+ ls <- head <$> gets stListStyles
+ modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) }
+ inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l
+orderedItemToOpenDocument _ _ [] = return empty
+
+isTightList :: [[Block]] -> Bool
+isTightList [] = False
+isTightList (b:_)
+ | Plain {} : _ <- b = True
+ | otherwise = False
+
+newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int)
+newOrderedListStyle b a = do
+ ln <- (+) 1 . length <$> gets stListStyles
+ let nbs = orderedListLevelStyle a (ln, [])
+ pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln
+ modify $ \s -> s { stListStyles = nbs : stListStyles s }
+ return (ln,pn)
+
+bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc
+bulletListToOpenDocument o b = do
+ ln <- (+) 1 . length <$> gets stListStyles
+ (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln
+ modify $ \s -> s { stListStyles = ns : stListStyles s }
+ is <- listItemsToOpenDocument ("P" ++ show pn) o b
+ return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is
+
+listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc
+listItemsToOpenDocument s o is =
+ vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is
+
+deflistItemToOpenDocument :: WriterOptions -> ([Inline],[Block]) -> State WriterState Doc
+deflistItemToOpenDocument o (t,d) = do
+ let ts = if isTightList [d]
+ then "Definition_20_Term_20_Tight" else "Definition_20_Term"
+ ds = if isTightList [d]
+ then "Definition_20_Definition_20_Tight" else "Definition_20_Definition"
+ t' <- withParagraphStyle o ts [Para t]
+ d' <- withParagraphStyle o ds (map plainToPara d)
+ return $ t' $$ d'
+
+inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+inBlockQuote o i (b:bs)
+ | BlockQuote l <- b = do increaseIndent
+ ni <- paraStyle "Quotations" []
+ go =<< inBlockQuote o ni (map plainToPara l)
+ | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l
+ | otherwise = do go =<< blockToOpenDocument o b
+ where go block = ($$) block <$> inBlockQuote o i bs
+inBlockQuote _ _ [] = resetIndent >> return empty
+
+-- | Convert a list of Pandoc blocks to OpenDocument.
+blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc
+blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
+
+-- | Convert a Pandoc block element to OpenDocument.
+blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
+blockToOpenDocument o bs
+ | Plain b <- bs = inParagraphTags <$> wrap o b
+ | Para b <- bs = inParagraphTags <$> wrap o b
+ | Header i b <- bs = inHeaderTags i <$> wrap o b
+ | BlockQuote b <- bs = mkBlockQuote b
+ | CodeBlock _ s <- bs = preformatted s
+ | RawHtml _ <- bs = return empty
+ | DefinitionList b <- bs = defList b
+ | BulletList b <- bs = bulletListToOpenDocument o b
+ | OrderedList a b <- bs = orderedList a b
+ | Table c a w h r <- bs = table c a w h r
+ | Null <- bs = return empty
+ | HorizontalRule <- bs = return $ selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]
+ | otherwise = return empty
+ where
+ defList b = do setInDefinitionList True
+ r <- vcat <$> mapM (deflistItemToOpenDocument o) b
+ setInDefinitionList False
+ return r
+ preformatted s = vcat <$> mapM (inPreformattedTags . escapeStringForXML) (lines s)
+ mkBlockQuote b = do increaseIndent
+ i <- paraStyle "Quotations" []
+ inBlockQuote o i (map plainToPara b)
+ orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a
+ inTags True "text:list" [ ("text:style-name", "L" ++ show ln)]
+ <$> orderedListToOpenDocument o pn b
+ table c a w h r = do
+ tn <- length <$> gets stTableStyles
+ pn <- length <$> gets stParaStyles
+ let genIds = map chr [65..]
+ name = "Table" ++ show (tn + 1)
+ columnIds = zip genIds w
+ mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])]
+ columns = map mkColumn columnIds
+ paraHStyles = paraTableStyles "Heading" pn a
+ paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a
+ newPara = map snd . filter (not . isEmpty . snd)
+ addTableStyle $ tableStyle tn columnIds
+ mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles
+ captionDoc <- if null c
+ then return empty
+ else withParagraphStyle o "Caption" [Para c]
+ th <- colHeadsToOpenDocument o name (map fst paraHStyles) h
+ tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r
+ return $ inTags True "table:table" [ ("table:name" , name)
+ , ("table:style-name", name)
+ ] (vcat columns $$ th $$ vcat tr) $$ captionDoc
+
+colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
+colHeadsToOpenDocument o tn ns hs =
+ inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$>
+ mapM (tableItemToOpenDocument o tn) (zip ns hs)
+
+tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc
+tableRowToOpenDocument o tn ns cs =
+ inTagsIndented "table:table-row" . vcat <$>
+ mapM (tableItemToOpenDocument o tn) (zip ns cs)
+
+tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc
+tableItemToOpenDocument o tn (n,i) =
+ let a = [ ("table:style-name" , tn ++ ".A1" )
+ , ("office:value-type", "string" )
+ ]
+ in inTags True "table:table-cell" a <$>
+ withParagraphStyle o n (map plainToPara i)
+
+-- | Take list of inline elements and return wrapped doc.
+wrap :: WriterOptions -> [Inline] -> State WriterState Doc
+wrap o l = if writerWrapText o
+ then fsep <$> mapM (inlinesToOpenDocument o) (splitBy Space l)
+ else inlinesToOpenDocument o l
+
+-- | Convert a list of inline elements to OpenDocument.
+inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc
+inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l
+
+-- | Convert an inline element to OpenDocument.
+inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
+inlineToOpenDocument o ils
+ | Ellipses <- ils = inTextStyle $ text "&#8230;"
+ | EmDash <- ils = inTextStyle $ text "&#8212;"
+ | EnDash <- ils = inTextStyle $ text "&#8211;"
+ | Apostrophe <- ils = inTextStyle $ text "&#8217;"
+ | Space <- ils = inTextStyle $ char ' '
+ | LineBreak <- ils = return $ selfClosingTag "text:line-break" []
+ | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
+ | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l
+ | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l
+ | Strikeout l <- ils = withTextStyle Strike $ inlinesToOpenDocument o l
+ | Superscript l <- ils = withTextStyle Sup $ inlinesToOpenDocument o l
+ | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l
+ | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l
+ | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l
+ | Code s <- ils = preformatted s
+ | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s)
+ | Cite _ l <- ils = inlinesToOpenDocument o l
+ | TeX s <- ils = preformatted s
+ | HtmlInline s <- ils = preformatted s
+ | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
+ | Image _ (s,_) <- ils = return $ mkImg s
+ | Note l <- ils = mkNote l
+ | otherwise = return empty
+ where
+ preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML
+ mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple")
+ , ("xlink:href" , s )
+ , ("office:name", t )
+ ] . inSpanTags "Definition"
+ mkImg s = inTags False "draw:frame" [] $
+ selfClosingTag "draw:image" [ ("xlink:href" , s )
+ , ("xlink:type" , "simple")
+ , (" xlink:show" , "embed" )
+ , ("xlink:actuate", "onLoad")]
+ mkNote l = do
+ n <- length <$> gets stNotes
+ let footNote t = inTags False "text:note"
+ [ ("text:id" , "ftn" ++ show n)
+ , ("text:note-class", "footnote" )] $
+ inTagsSimple "text:note-citation" (text . show $ n + 1) $$
+ inTagsSimple "text:note-body" t
+ nn <- footNote <$> withParagraphStyle o "Footnote" l
+ addNote nn
+ return nn
+
+generateStyles :: [Doc] -> Doc
+generateStyles acc =
+ let scripts = selfClosingTag "office:scripts" []
+ fonts = inTagsIndented "office:font-face-decls"
+ (vcat $ map font ["Lucida Sans Unicode", "Tahoma", "Times New Roman"])
+ font fn = selfClosingTag "style:font-face"
+ [ ("style:name" , "&apos;" ++ fn ++ "&apos;")
+ , ("svg:font-family", fn )]
+ in scripts $$ fonts $$ inTagsIndented "office:automatic-styles" (vcat $ reverse acc)
+
+bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc]))
+bulletListStyle l =
+ let doStyles i = inTags True "text:list-level-style-bullet"
+ [ ("text:level" , show (i + 1) )
+ , ("text:style-name" , "Bullet_20_Symbols")
+ , ("style:num-suffix", "." )
+ , ("text:bullet-char", [bulletList !! i] )
+ ] (listLevelStyle (1 + i))
+ bulletList = map chr $ cycle [8226,8227,8259]
+ listElStyle = map doStyles [0..9]
+ in do pn <- paraListStyle l
+ return (pn, (l, listElStyle))
+
+orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc])
+orderedListLevelStyle (s,n, d) (l,ls) =
+ let suffix = case d of
+ OneParen -> [("style:num-suffix", ")")]
+ TwoParens -> [("style:num-prefix", "(")
+ ,("style:num-suffix", ")")]
+ _ -> [("style:num-suffix", ".")]
+ format = case n of
+ UpperAlpha -> "A"
+ LowerAlpha -> "a"
+ UpperRoman -> "I"
+ LowerRoman -> "i"
+ _ -> "1"
+ listStyle = inTags True "text:list-level-style-number"
+ ([ ("text:level" , show $ 1 + length ls )
+ , ("text:style-name" , "Numbering_20_Symbols")
+ , ("style:num-format", format )
+ , ("text:start-value", show s )
+ ] ++ suffix) (listLevelStyle (1 + length ls))
+ in (l, ls ++ [listStyle])
+
+listLevelStyle :: Int -> Doc
+listLevelStyle i =
+ let indent = show (0.25 * fromIntegral i :: Double) in
+ selfClosingTag "style:list-level-properties"
+ [ ("text:space-before" , indent ++ "in")
+ , ("text:min-label-width", "0.25in")]
+
+tableStyle :: Int -> [(Char,Double)] -> Doc
+tableStyle num wcs =
+ let tableId = "Table" ++ show (num + 1)
+ table = inTags True "style:style"
+ [("style:name", tableId)] $
+ selfClosingTag "style:table-properties"
+ [ ("style:rel-width", "100%" )
+ , ("table:align" , "center")]
+ colStyle (c,w) = inTags True "style:style"
+ [ ("style:name" , tableId ++ "." ++ [c])
+ , ("style:family", "table-column" )] $
+ selfClosingTag "style:table-column-properties"
+ [("style:column-width", printf "%.2f" (7 * w) ++ "in")]
+ cellStyle = inTags True "style:style"
+ [ ("style:name" , tableId ++ ".A1")
+ , ("style:family", "table-cell" )] $
+ selfClosingTag "style:table-cell-properties"
+ [ ("fo:border", "none")]
+ columnStyles = map colStyle wcs
+ in table $$ vcat columnStyles $$ cellStyle
+
+paraStyle :: String -> [(String,String)] -> State WriterState Int
+paraStyle parent attrs = do
+ pn <- (+) 1 . length <$> gets stParaStyles
+ i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double
+ b <- gets stInDefinition
+ t <- gets stTight
+ let styleAttr = [ ("style:name" , "P" ++ show pn)
+ , ("style:family" , "paragraph" )
+ , ("style:parent-style-name", parent )]
+ indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i
+ tight = if t then [ ("fo:margin-top" , "0in" )
+ , ("fo:margin-bottom" , "0in" )]
+ else []
+ indent = when (i /= 0 || b || t) $
+ selfClosingTag "style:paragraph-properties" $
+ [ ("fo:margin-left" , indentVal)
+ , ("fo:margin-right" , "0in" )
+ , ("fo:text-indent" , "0in" )
+ , ("style:auto-text-indent" , "false" )]
+ ++ tight
+ addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent
+ return pn
+
+paraListStyle :: Int -> State WriterState Int
+paraListStyle l = paraStyle "Text_20_body" [("style:list-style-name", "L" ++ show l )]
+
+paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)]
+paraTableStyles _ _ [] = []
+paraTableStyles t s (a:xs)
+ | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs
+ | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs
+ | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs
+ where pName sn = "P" ++ show (sn + 1)
+ res sn x = inTags True "style:style"
+ [ ("style:name" , pName sn )
+ , ("style:family" , "paragraph" )
+ , ("style:parent-style-name", "Table_20_" ++ t)] $
+ selfClosingTag "style:paragraph-properties"
+ [ ("fo:text-align", x)
+ , ("style:justify-single-word", "false")]
+
+data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq )
+
+textStyleAttr :: TextStyle -> [(String,String)]
+textStyleAttr s
+ | Italic <- s = [("fo:font-style" ,"italic" )
+ ,("style:font-style-asian" ,"italic" )
+ ,("style:font-style-complex" ,"italic" )]
+ | Bold <- s = [("fo:font-weight" ,"bold" )
+ ,("style:font-weight-asian" ,"bold" )
+ ,("style:font-weight-complex" ,"bold" )]
+ | Strike <- s = [("style:text-line-through-style", "solid" )]
+ | Sub <- s = [("style:text-position" ,"sub 58%" )]
+ | Sup <- s = [("style:text-position" ,"super 58%" )]
+ | SmallC <- s = [("fo:font-variant" ,"small-caps")]
+ | otherwise = []
+
+openDocumentNameSpaces :: [(String, String)]
+openDocumentNameSpaces =
+ [ ("xmlns:office" , "urn:oasis:names:tc:opendocument:xmlns:office:1.0" )
+ , ("xmlns:style" , "urn:oasis:names:tc:opendocument:xmlns:style:1.0" )
+ , ("xmlns:text" , "urn:oasis:names:tc:opendocument:xmlns:text:1.0" )
+ , ("xmlns:table" , "urn:oasis:names:tc:opendocument:xmlns:table:1.0" )
+ , ("xmlns:draw" , "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" )
+ , ("xmlns:fo" , "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0")
+ , ("xmlns:xlink" , "http://www.w3.org/1999/xlink" )
+ , ("xmlns:dc" , "http://purl.org/dc/elements/1.1/" )
+ , ("xmlns:meta" , "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" )
+ , ("xmlns:number" , "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" )
+ , ("xmlns:svg" , "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" )
+ , ("xmlns:chart" , "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" )
+ , ("xmlns:dr3d" , "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" )
+ , ("xmlns:math" , "http://www.w3.org/1998/Math/MathML" )
+ , ("xmlns:form" , "urn:oasis:names:tc:opendocument:xmlns:form:1.0" )
+ , ("xmlns:script" , "urn:oasis:names:tc:opendocument:xmlns:script:1.0" )
+ , ("xmlns:ooo" , "http://openoffice.org/2004/office" )
+ , ("xmlns:ooow" , "http://openoffice.org/2004/writer" )
+ , ("xmlns:oooc" , "http://openoffice.org/2004/calc" )
+ , ("xmlns:dom" , "http://www.w3.org/2001/xml-events" )
+ , ("xmlns:xforms" , "http://www.w3.org/2002/xforms" )
+ , ("xmlns:xsd" , "http://www.w3.org/2001/XMLSchema" )
+ , ("xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" )
+ , ("office:version", "1.0" )
+ ]
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
new file mode 100644
index 000000000..91826cbcd
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -0,0 +1,346 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RST
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to reStructuredText.
+
+reStructuredText: <http://docutils.sourceforge.net/rst.html>
+-}
+module Text.Pandoc.Writers.RST ( writeRST) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+import Control.Applicative ( (<$>) )
+
+data WriterState =
+ WriterState { stNotes :: [[Block]]
+ , stLinks :: KeyTable
+ , stImages :: KeyTable
+ , stIncludes :: [String]
+ , stOptions :: WriterOptions
+ }
+
+-- | Convert Pandoc to RST.
+writeRST :: WriterOptions -> Pandoc -> String
+writeRST opts document =
+ let st = WriterState { stNotes = [], stLinks = [],
+ stImages = [], stIncludes = [],
+ stOptions = opts }
+ in render $ evalState (pandocToRST document) st
+
+-- | Return RST representation of document.
+pandocToRST :: Pandoc -> State WriterState Doc
+pandocToRST (Pandoc meta blocks) = do
+ opts <- get >>= (return . stOptions)
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ before' = if null before then empty else text before
+ after' = if null after then empty else text after
+ metaBlock <- metaToRST opts meta
+ let head' = if (writerStandalone opts)
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ body <- blockListToRST blocks
+ includes <- get >>= (return . concat . stIncludes)
+ let includes' = if null includes then empty else text includes
+ notes <- get >>= (notesToRST . reverse . stNotes)
+ -- note that the notes may contain refs, so we do them first
+ refs <- get >>= (keyTableToRST . reverse . stLinks)
+ pics <- get >>= (pictTableToRST . reverse . stImages)
+ return $ head' $+$ before' $+$ includes' $+$ body $+$ notes $+$ text "" $+$
+ refs $+$ pics $+$ after'
+
+-- | Return RST representation of reference key table.
+keyTableToRST :: KeyTable -> State WriterState Doc
+keyTableToRST refs = mapM keyToRST refs >>= return . vcat
+
+-- | Return RST representation of a reference key.
+keyToRST :: ([Inline], (String, String))
+ -> State WriterState Doc
+keyToRST (label, (src, _)) = do
+ label' <- inlineListToRST label
+ let label'' = if ':' `elem` (render label')
+ then char '`' <> label' <> char '`'
+ else label'
+ return $ text ".. _" <> label'' <> text ": " <> text src
+
+-- | Return RST representation of notes.
+notesToRST :: [[Block]] -> State WriterState Doc
+notesToRST notes =
+ mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return RST representation of a note.
+noteToRST :: Int -> [Block] -> State WriterState Doc
+noteToRST num note = do
+ contents <- blockListToRST note
+ let marker = text ".. [" <> text (show num) <> text "]"
+ return $ marker $$ nest 3 contents
+
+-- | Return RST representation of picture reference table.
+pictTableToRST :: KeyTable -> State WriterState Doc
+pictTableToRST refs = mapM pictToRST refs >>= return . vcat
+
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: ([Inline], (String, String))
+ -> State WriterState Doc
+pictToRST (label, (src, _)) = do
+ label' <- inlineListToRST label
+ return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
+ text src
+
+-- | Take list of inline elements and return wrapped doc.
+wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedRST opts inlines = do
+ lineBreakDoc <- inlineToRST LineBreak
+ chunks <- mapM (wrapIfNeeded opts inlineListToRST)
+ (splitBy LineBreak inlines)
+ return $ vcat $ intersperse lineBreakDoc chunks
+
+-- | Escape special characters for RST.
+escapeString :: String -> String
+escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
+
+-- | Convert bibliographic information into RST header.
+metaToRST :: WriterOptions -> Meta -> State WriterState Doc
+metaToRST opts (Meta title authors date) = do
+ title' <- titleToRST title
+ authors' <- authorsToRST authors
+ date' <- dateToRST date
+ let toc = if writerTableOfContents opts
+ then text "" $+$ text ".. contents::"
+ else empty
+ return $ title' $+$ authors' $+$ date' $+$ toc
+
+titleToRST :: [Inline] -> State WriterState Doc
+titleToRST [] = return empty
+titleToRST lst = do
+ contents <- inlineListToRST lst
+ let titleLength = length $ render contents
+ let border = text (replicate titleLength '=')
+ return $ border $+$ contents $+$ border <> text "\n"
+
+authorsToRST :: [String] -> State WriterState Doc
+authorsToRST [] = return empty
+authorsToRST (first:rest) = do
+ rest' <- authorsToRST rest
+ return $ (text ":Author: " <> text first) $+$ rest'
+
+dateToRST :: String -> State WriterState Doc
+dateToRST [] = return empty
+dateToRST str = return $ text ":Date: " <> text (escapeString str)
+
+-- | Convert Pandoc block element to RST.
+blockToRST :: Block -- ^ Block element
+ -> State WriterState Doc
+blockToRST Null = return empty
+blockToRST (Plain inlines) = do
+ opts <- get >>= (return . stOptions)
+ wrappedRST opts inlines
+blockToRST (Para inlines) = do
+ opts <- get >>= (return . stOptions)
+ contents <- wrappedRST opts inlines
+ return $ contents <> text "\n"
+blockToRST (RawHtml str) =
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ (text "\n.. raw:: html\n") $$ (nest 3 $ vcat $ map text (lines str'))
+blockToRST HorizontalRule = return $ text "--------------\n"
+blockToRST (Header level inlines) = do
+ contents <- inlineListToRST inlines
+ let headerLength = length $ render contents
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate headerLength headerChar
+ return $ contents $+$ border <> text "\n"
+blockToRST (CodeBlock (_,classes,_) str) = do
+ opts <- stOptions <$> get
+ let tabstop = writerTabStop opts
+ if "haskell" `elem` classes && writerLiterateHaskell opts
+ then return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n"
+ else return $ (text "::\n") $+$
+ (nest tabstop $ vcat $ map text (lines str)) <> text "\n"
+blockToRST (BlockQuote blocks) = do
+ tabstop <- get >>= (return . writerTabStop . stOptions)
+ contents <- blockListToRST blocks
+ return $ (nest tabstop contents) <> text "\n"
+blockToRST (Table caption _ widths headers rows) = do
+ caption' <- inlineListToRST caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM blockListToRST headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let hpipeBlocks blocks = hcatBlocks [beg, middle, end]
+ where height = maximum (map heightOfBlock blocks)
+ sep' = TextBlock 3 height (replicate height " | ")
+ beg = TextBlock 2 height (replicate height "| ")
+ end = TextBlock 2 height (replicate height " |")
+ middle = hcatBlocks $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
+ let head' = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM blockListToRST row
+ return $ makeRow cols) rows
+ let border ch = char '+' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '+'
+ let body = vcat $ intersperse (border '-') $ map blockToDoc rows'
+ return $ border '-' $+$ blockToDoc head' $+$ border '=' $+$ body $+$
+ border '-' $$ caption'' $$ text ""
+blockToRST (BulletList items) = do
+ contents <- mapM bulletListItemToRST items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST (OrderedList (start, style', delim) items) = do
+ let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
+ then take (length items) $ repeat "#."
+ else take (length items) $ orderedListMarkers
+ (start, style', delim)
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST (DefinitionList items) = do
+ contents <- mapM definitionListItemToRST items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to RST.
+bulletListItemToRST :: [Block] -> State WriterState Doc
+bulletListItemToRST items = do
+ contents <- blockListToRST items
+ return $ (text "- ") <> contents
+
+-- | Convert ordered list item (a list of blocks) to RST.
+orderedListItemToRST :: String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToRST marker items = do
+ contents <- blockListToRST items
+ return $ (text marker <> char ' ') <> contents
+
+-- | Convert defintion list item (label, list of blocks) to RST.
+definitionListItemToRST :: ([Inline], [Block]) -> State WriterState Doc
+definitionListItemToRST (label, items) = do
+ label' <- inlineListToRST label
+ contents <- blockListToRST items
+ tabstop <- get >>= (return . writerTabStop . stOptions)
+ return $ label' $+$ nest tabstop contents
+
+-- | Convert list of Pandoc block elements to RST.
+blockListToRST :: [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST blocks = mapM blockToRST blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to RST.
+inlineListToRST :: [Inline] -> State WriterState Doc
+inlineListToRST lst = mapM inlineToRST lst >>= return . hcat
+
+-- | Convert Pandoc inline element to RST.
+inlineToRST :: Inline -> State WriterState Doc
+inlineToRST (Emph lst) = do
+ contents <- inlineListToRST lst
+ return $ char '*' <> contents <> char '*'
+inlineToRST (Strong lst) = do
+ contents <- inlineListToRST lst
+ return $ text "**" <> contents <> text "**"
+inlineToRST (Strikeout lst) = do
+ contents <- inlineListToRST lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToRST (Superscript lst) = do
+ contents <- inlineListToRST lst
+ return $ text "\\ :sup:`" <> contents <> text "`\\ "
+inlineToRST (Subscript lst) = do
+ contents <- inlineListToRST lst
+ return $ text "\\ :sub:`" <> contents <> text "`\\ "
+inlineToRST (SmallCaps lst) = inlineListToRST lst
+inlineToRST (Quoted SingleQuote lst) = do
+ contents <- inlineListToRST lst
+ return $ char '\'' <> contents <> char '\''
+inlineToRST (Quoted DoubleQuote lst) = do
+ contents <- inlineListToRST lst
+ return $ char '"' <> contents <> char '"'
+inlineToRST (Cite _ lst) =
+ inlineListToRST lst
+inlineToRST EmDash = return $ text "--"
+inlineToRST EnDash = return $ char '-'
+inlineToRST Apostrophe = return $ char '\''
+inlineToRST Ellipses = return $ text "..."
+inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``"
+inlineToRST (Str str) = return $ text $ escapeString str
+inlineToRST (Math t str) = do
+ includes <- get >>= (return . stIncludes)
+ let rawMathRole = ".. role:: math(raw)\n" ++
+ " :format: html latex\n"
+ if not (rawMathRole `elem` includes)
+ then modify $ \st -> st { stIncludes = rawMathRole : includes }
+ else return ()
+ return $ if t == InlineMath
+ then text $ ":math:`$" ++ str ++ "$`"
+ else text $ ":math:`$$" ++ str ++ "$$`"
+inlineToRST (TeX _) = return empty
+inlineToRST (HtmlInline _) = return empty
+inlineToRST (LineBreak) = do
+ return $ empty -- there's no line break in RST
+inlineToRST Space = return $ char ' '
+inlineToRST (Link [Code str] (src, _)) | src == str ||
+ src == "mailto:" ++ str = do
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ text srcSuffix
+inlineToRST (Link txt (src, tit)) = do
+ useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions)
+ linktext <- inlineListToRST $ normalizeSpaces txt
+ if useReferenceLinks
+ then do refs <- get >>= (return . stLinks)
+ let refs' = if (txt, (src, tit)) `elem` refs
+ then refs
+ else (txt, (src, tit)):refs
+ modify $ \st -> st { stLinks = refs' }
+ return $ char '`' <> linktext <> text "`_"
+ else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_"
+inlineToRST (Image alternate (source, tit)) = do
+ pics <- get >>= (return . stImages)
+ let labelsUsed = map fst pics
+ let txt = if null alternate || alternate == [Str ""] ||
+ alternate `elem` labelsUsed
+ then [Str $ "image" ++ show (length pics)]
+ else alternate
+ let pics' = if (txt, (source, tit)) `elem` pics
+ then pics
+ else (txt, (source, tit)):pics
+ modify $ \st -> st { stImages = pics' }
+ label <- inlineListToRST txt
+ return $ char '|' <> label <> char '|'
+inlineToRST (Note contents) = do
+ -- add to notes in state
+ notes <- get >>= (return . stNotes)
+ modify $ \st -> st { stNotes = contents:notes }
+ let ref = show $ (length notes) + 1
+ return $ text " [" <> text ref <> text "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
new file mode 100644
index 000000000..fc6cd1bf0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -0,0 +1,291 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RTF
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to RTF (rich text format).
+-}
+module Text.Pandoc.Writers.RTF ( writeRTF ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
+import Data.List ( isSuffixOf, intercalate )
+import Data.Char ( ord, isDigit )
+
+-- | Convert Pandoc to a string in rich text format.
+writeRTF :: WriterOptions -> Pandoc -> String
+writeRTF options (Pandoc meta blocks) =
+ let head' = if writerStandalone options
+ then rtfHeader (writerHeader options) meta
+ else ""
+ toc = if writerTableOfContents options
+ then tableOfContents $ filter isHeaderBlock blocks
+ else ""
+ foot = if writerStandalone options then "\n}\n" else ""
+ body = writerIncludeBefore options ++
+ concatMap (blockToRTF 0 AlignDefault) blocks ++
+ writerIncludeAfter options
+ in head' ++ toc ++ body ++ foot
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: [Block] -> String
+tableOfContents headers =
+ let contentsTree = hierarchicalize headers
+ in concatMap (blockToRTF 0 AlignDefault) $
+ [Header 1 [Str "Contents"],
+ BulletList (map elementToListItem contentsTree)]
+
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
+ if null subsecs
+ then []
+ else [BulletList (map elementToListItem subsecs)]
+
+-- | Convert unicode characters (> 127) into rich text format representation.
+handleUnicode :: String -> String
+handleUnicode [] = []
+handleUnicode (c:cs) =
+ if ord c > 127
+ then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
+ else c:(handleUnicode cs)
+
+-- | Escape special characters.
+escapeSpecial :: String -> String
+escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
+
+-- | Escape strings as needed for rich text format.
+stringToRTF :: String -> String
+stringToRTF = handleUnicode . escapeSpecial
+
+-- | Escape things as needed for code block in RTF.
+codeStringToRTF :: String -> String
+codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str)
+
+-- | Make a paragraph with first-line indent, block indent, and space after.
+rtfParSpaced :: Int -- ^ space after (in twips)
+ -> Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent alignment content =
+ let alignString = case alignment of
+ AlignLeft -> "\\ql "
+ AlignRight -> "\\qr "
+ AlignCenter -> "\\qc "
+ AlignDefault -> "\\ql "
+ in "{\\pard " ++ alignString ++
+ "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+
+-- | Default paragraph.
+rtfPar :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfPar = rtfParSpaced 180
+
+-- | Compact paragraph (e.g. for compact list items).
+rtfCompact :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfCompact = rtfParSpaced 0
+
+-- number of twips to indent
+indentIncrement :: Int
+indentIncrement = 720
+
+listIncrement :: Int
+listIncrement = 360
+
+-- | Returns appropriate bullet list marker for indent level.
+bulletMarker :: Int -> String
+bulletMarker indent = case indent `mod` 720 of
+ 0 -> "\\bullet "
+ _ -> "\\endash "
+
+-- | Returns appropriate (list of) ordered list markers for indent level.
+orderedMarkers :: Int -> ListAttributes -> [String]
+orderedMarkers indent (start, style, delim) =
+ if style == DefaultStyle && delim == DefaultDelim
+ then case indent `mod` 720 of
+ 0 -> orderedListMarkers (start, Decimal, Period)
+ _ -> orderedListMarkers (start, LowerAlpha, Period)
+ else orderedListMarkers (start, style, delim)
+
+-- | Returns RTF header.
+rtfHeader :: String -- ^ header text
+ -> Meta -- ^ bibliographic information
+ -> String
+rtfHeader headerText (Meta title authors date) =
+ let titletext = if null title
+ then ""
+ else rtfPar 0 0 AlignCenter $
+ "\\b \\fs36 " ++ inlineListToRTF title
+ authorstext = if null authors
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ (intercalate "\\" $
+ map stringToRTF authors))
+ datetext = if date == ""
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
+ let spacer = if null (titletext ++ authorstext ++ datetext)
+ then ""
+ else rtfPar 0 0 AlignDefault "" in
+ headerText ++ titletext ++ authorstext ++ datetext ++ spacer
+
+-- | Convert Pandoc block element to RTF.
+blockToRTF :: Int -- ^ indent level
+ -> Alignment -- ^ alignment
+ -> Block -- ^ block to convert
+ -> String
+blockToRTF _ _ Null = ""
+blockToRTF indent alignment (Plain lst) =
+ rtfCompact indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (Para lst) =
+ rtfPar indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (BlockQuote lst) =
+ concatMap (blockToRTF (indent + indentIncrement) alignment) lst
+blockToRTF indent _ (CodeBlock _ str) =
+ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
+blockToRTF _ _ (RawHtml _) = ""
+blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
+ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
+blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
+ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
+blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
+ concatMap (definitionListItemToRTF alignment indent) lst
+blockToRTF indent _ HorizontalRule =
+ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
+blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
+ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
+blockToRTF indent alignment (Table caption aligns sizes headers rows) =
+ tableRowToRTF True indent aligns sizes headers ++
+ concatMap (tableRowToRTF False indent aligns sizes) rows ++
+ rtfPar indent 0 alignment (inlineListToRTF caption)
+
+tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String
+tableRowToRTF header indent aligns sizes cols =
+ let columns = concat $ zipWith (tableItemToRTF indent) aligns cols
+ totalTwips = 6 * 1440 -- 6 inches
+ rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
+ (0 :: Integer) sizes
+ cellDefs = map (\edge -> (if header
+ then "\\clbrdrb\\brdrs"
+ else "") ++ "\\cellx" ++ show edge)
+ rightEdges
+ start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
+ "\\trkeep\\intbl\n{\n"
+ end = "}\n\\intbl\\row}\n"
+ in start ++ columns ++ end
+
+tableItemToRTF :: Int -> Alignment -> [Block] -> String
+tableItemToRTF indent alignment item =
+ let contents = concatMap (blockToRTF indent alignment) item
+ in "{\\intbl " ++ contents ++ "\\cell}\n"
+
+-- | Ensure that there's the same amount of space after compact
+-- lists as after regular lists.
+spaceAtEnd :: String -> String
+spaceAtEnd str =
+ if isSuffixOf "\\par}\n" str
+ then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ else str
+
+-- | Convert list item (list of blocks) to RTF.
+listItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> String -- ^ list start marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> [Char]
+listItemToRTF alignment indent marker [] =
+ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF alignment indent marker list =
+ let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list
+ listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++
+ show listIncrement ++ "\\tab"
+ insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d =
+ listMarker ++ dropWhile isDigit xs
+ insertListMarker ('\\':'f':'i':d:xs) | isDigit d =
+ listMarker ++ dropWhile isDigit xs
+ insertListMarker (x:xs) =
+ x : insertListMarker xs
+ insertListMarker [] = []
+ -- insert the list marker into the (processed) first block
+ in insertListMarker first ++ concat rest
+
+-- | Convert definition list item (label, list of blocks) to RTF.
+definitionListItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> ([Inline],[Block]) -- ^ list item (list of blocks)
+ -> [Char]
+definitionListItemToRTF alignment indent (label, items) =
+ let labelText = blockToRTF indent alignment (Plain label)
+ itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items
+ in labelText ++ itemsText
+
+-- | Convert list of inline items to RTF.
+inlineListToRTF :: [Inline] -- ^ list of inlines to convert
+ -> String
+inlineListToRTF lst = concatMap inlineToRTF lst
+
+-- | Convert inline item to RTF.
+inlineToRTF :: Inline -- ^ inline to convert
+ -> String
+inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}"
+inlineToRTF (Quoted SingleQuote lst) =
+ "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) =
+ "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
+inlineToRTF Apostrophe = "\\u8217'"
+inlineToRTF Ellipses = "\\u8230?"
+inlineToRTF EmDash = "\\u8212-"
+inlineToRTF EnDash = "\\u8211-"
+inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
+inlineToRTF (Str str) = stringToRTF str
+inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
+inlineToRTF (Cite _ lst) = inlineListToRTF lst
+inlineToRTF (TeX _) = ""
+inlineToRTF (HtmlInline _) = ""
+inlineToRTF (LineBreak) = "\\line "
+inlineToRTF Space = " "
+inlineToRTF (Link text (src, _)) =
+ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
+ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
+inlineToRTF (Image _ (source, _)) =
+ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Note contents) =
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"
diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs
new file mode 100644
index 000000000..6f528503a
--- /dev/null
+++ b/src/Text/Pandoc/Writers/S5.hs
@@ -0,0 +1,157 @@
+{-# LANGUAGE CPP, TemplateHaskell #-}
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.S5
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definitions for creation of S5 powerpoint-like HTML.
+(See <http://meyerweb.com/eric/tools/s5/>.)
+-}
+module Text.Pandoc.Writers.S5 (
+ -- * Strings
+ s5Meta,
+ s5Javascript,
+ s5CSS,
+ s5Links,
+ -- * Functions
+ writeS5,
+ writeS5String,
+ insertS5Structure
+ ) where
+import Text.Pandoc.Shared ( WriterOptions )
+import Text.Pandoc.TH ( contentsOf )
+import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString )
+import Text.Pandoc.Definition
+import Text.XHtml.Strict
+import System.FilePath ( (</>) )
+import Data.List ( intercalate )
+
+s5Meta :: String
+s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n"
+
+s5Javascript :: String
+#ifndef __HADDOCK__
+s5Javascript = "<script type=\"text/javascript\">\n" ++
+ $(contentsOf $ "data" </> "ui" </> "default" </> "slides.js.comment") ++
+ $(contentsOf $ "data" </> "ui" </> "default" </> "slides.js.packed") ++ "</script>\n"
+#endif
+
+s5CoreCSS :: String
+#ifndef __HADDOCK__
+s5CoreCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "s5-core.css")
+#endif
+
+s5FramingCSS :: String
+#ifndef __HADDOCK__
+s5FramingCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "framing.css")
+#endif
+
+s5PrettyCSS :: String
+#ifndef __HADDOCK__
+s5PrettyCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "pretty.css")
+#endif
+
+s5OperaCSS :: String
+#ifndef __HADDOCK__
+s5OperaCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "opera.css")
+#endif
+
+s5OutlineCSS :: String
+#ifndef __HADDOCK__
+s5OutlineCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "outline.css")
+#endif
+
+s5PrintCSS :: String
+#ifndef __HADDOCK__
+s5PrintCSS = $(contentsOf $ "data" </> "ui" </> "default" </> "print.css")
+#endif
+
+s5CSS :: String
+s5CSS = "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
+
+s5Links :: String
+s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n"
+
+-- | Converts Pandoc document to an S5 HTML presentation (Html structure).
+writeS5 :: WriterOptions -> Pandoc -> Html
+writeS5 options = (writeHtml options) . insertS5Structure
+
+-- | Converts Pandoc document to an S5 HTML presentation (string).
+writeS5String :: WriterOptions -> Pandoc -> String
+writeS5String options = (writeHtmlString options) . insertS5Structure
+
+-- | Inserts HTML needed for an S5 presentation (e.g. around slides).
+layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
+ -> String -- ^ Date of document (for header or footer)
+ -> [Block] -- ^ List of block elements returned
+layoutDiv title' date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 [Str date]), (Header 2 title'), (RawHtml "</div>\n</div>\n")]
+
+presentationStart :: Block
+presentationStart = RawHtml "<div class=\"presentation\">\n\n"
+
+presentationEnd :: Block
+presentationEnd = RawHtml "</div>\n"
+
+slideStart :: Block
+slideStart = RawHtml "<div class=\"slide\">\n"
+
+slideEnd :: Block
+slideEnd = RawHtml "</div>\n"
+
+-- | Returns 'True' if block is a Header 1.
+isH1 :: Block -> Bool
+isH1 (Header 1 _) = True
+isH1 _ = False
+
+-- | Insert HTML around sections to make individual slides.
+insertSlides :: Bool -> [Block] -> [Block]
+insertSlides beginning blocks =
+ let (beforeHead, rest) = break isH1 blocks in
+ if (null rest) then
+ if beginning then
+ beforeHead
+ else
+ beforeHead ++ [slideEnd]
+ else
+ if beginning then
+ beforeHead ++
+ slideStart:(head rest):(insertSlides False (tail rest))
+ else
+ beforeHead ++
+ slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
+
+-- | Insert blocks into 'Pandoc' for slide structure.
+insertS5Structure :: Pandoc -> Pandoc
+insertS5Structure (Pandoc meta' []) = Pandoc meta' []
+insertS5Structure (Pandoc (Meta title' authors date) blocks) =
+ let slides = insertSlides True blocks
+ firstSlide = if not (null title')
+ then [slideStart, (Header 1 title'),
+ (Header 3 [Str (intercalate ", " authors)]),
+ (Header 4 [Str date]), slideEnd]
+ else []
+ newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++
+ slides ++ [presentationEnd]
+ in Pandoc (Meta title' authors date) newBlocks
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
new file mode 100644
index 000000000..305a1a8d0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -0,0 +1,474 @@
+{-
+Copyright (C) 2008 John MacFarlane and Peter Wang
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Texinfo
+ Copyright : Copyright (C) 2008 John MacFarlane and Peter Wang
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into Texinfo.
+-}
+module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
+import Text.Printf ( printf )
+import Data.List ( isSuffixOf )
+import Data.Char ( chr, ord )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+data WriterState =
+ WriterState { stIncludes :: S.Set String -- strings to include in header
+ }
+
+{- TODO:
+ - internal cross references a la HTML
+ - generated .texi files don't work when run through texi2dvi
+ -}
+
+-- | Add line to header.
+addToHeader :: String -> State WriterState ()
+addToHeader str = do
+ st <- get
+ let includes = stIncludes st
+ put st {stIncludes = S.insert str includes}
+
+-- | Convert Pandoc to Texinfo.
+writeTexinfo :: WriterOptions -> Pandoc -> String
+writeTexinfo options document =
+ render $ evalState (pandocToTexinfo options $ wrapTop document) $
+ WriterState { stIncludes = S.empty }
+
+-- | Add a "Top" node around the document, needed by Texinfo.
+wrapTop :: Pandoc -> Pandoc
+wrapTop (Pandoc (Meta title authors date) blocks) =
+ Pandoc (Meta title authors date) (Header 0 title : blocks)
+
+pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToTexinfo options (Pandoc meta blocks) = do
+ main <- blockListToTexinfo blocks
+ head' <- if writerStandalone options
+ then texinfoHeader options meta
+ else return empty
+ let before = if null (writerIncludeBefore options)
+ then empty
+ else text (writerIncludeBefore options)
+ let after = if null (writerIncludeAfter options)
+ then empty
+ else text (writerIncludeAfter options)
+ let body = before $$ main $$ after
+ -- XXX toc untested
+ let toc = if writerTableOfContents options
+ then text "@contents"
+ else empty
+ let foot = if writerStandalone options
+ then text "@bye"
+ else empty
+ return $ head' $$ toc $$ body $$ foot
+
+-- | Insert bibliographic information into Texinfo header.
+texinfoHeader :: WriterOptions -- ^ Options, including Texinfo header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState Doc
+texinfoHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return empty
+ else do
+ t <- inlineListToTexinfo title
+ return $ text "@title " <> t
+ headerIncludes <- get >>= return . S.toList . stIncludes
+ let extras = text $ unlines headerIncludes
+ let authorstext = map makeAuthor authors
+ let datetext = if date == ""
+ then empty
+ else text $ stringToTexinfo date
+
+ let baseHeader = text $ writerHeader options
+ let header = baseHeader $$ extras
+ return $ text "\\input texinfo" $$
+ header $$
+ text "@ifnottex" $$
+ text "@paragraphindent 0" $$
+ text "@end ifnottex" $$
+ text "@titlepage" $$
+ titletext $$ vcat authorstext $$
+ datetext $$
+ text "@end titlepage"
+
+makeAuthor :: String -> Doc
+makeAuthor author = text $ "@author " ++ (stringToTexinfo author)
+
+-- | Escape things as needed for Texinfo.
+stringToTexinfo :: String -> String
+stringToTexinfo = escapeStringUsing texinfoEscapes
+ where texinfoEscapes = [ ('{', "@{")
+ , ('}', "@}")
+ , ('@', "@@")
+ , (',', "@comma{}") -- only needed in argument lists
+ , ('\160', "@ ")
+ ]
+
+-- | Puts contents into Texinfo command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '@' <> text cmd <> braces contents
+
+-- | Convert Pandoc block element to Texinfo.
+blockToTexinfo :: Block -- ^ Block to convert
+ -> State WriterState Doc
+
+blockToTexinfo Null = return empty
+
+blockToTexinfo (Plain lst) =
+ inlineListToTexinfo lst
+
+blockToTexinfo (Para lst) =
+ inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo
+
+blockToTexinfo (BlockQuote lst) = do
+ contents <- blockListToTexinfo lst
+ return $ text "@quotation" $$
+ contents $$
+ text "@end quotation"
+
+blockToTexinfo (CodeBlock _ str) = do
+ return $ text "@verbatim" $$
+ vcat (map text (lines str)) $$
+ text "@end verbatim\n"
+
+blockToTexinfo (RawHtml _) = return empty
+
+blockToTexinfo (BulletList lst) = do
+ items <- mapM listItemToTexinfo lst
+ return $ text "@itemize" $$
+ vcat items $$
+ text "@end itemize\n"
+
+blockToTexinfo (OrderedList (start, numstyle, _) lst) = do
+ items <- mapM listItemToTexinfo lst
+ return $ text "@enumerate " <> exemplar $$
+ vcat items $$
+ text "@end enumerate\n"
+ where
+ exemplar = case numstyle of
+ DefaultStyle -> decimal
+ Decimal -> decimal
+ UpperRoman -> decimal -- Roman numerals not supported
+ LowerRoman -> decimal
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ decimal = if start == 1
+ then empty
+ else text (show start)
+ upperAlpha = text [chr $ ord 'A' + start - 1]
+ lowerAlpha = text [chr $ ord 'a' + start - 1]
+
+blockToTexinfo (DefinitionList lst) = do
+ items <- mapM defListItemToTexinfo lst
+ return $ text "@table @asis" $$
+ vcat items $$
+ text "@end table\n"
+
+blockToTexinfo HorizontalRule =
+ -- XXX can't get the equivalent from LaTeX.hs to work
+ return $ text "@iftex" $$
+ text "@bigskip@hrule@bigskip" $$
+ text "@end iftex" $$
+ text "@ifnottex" $$
+ text (take 72 $ repeat '-') $$
+ text "@end ifnottex"
+
+blockToTexinfo (Header 0 lst) = do
+ txt <- if null lst
+ then return $ text "Top"
+ else inlineListToTexinfo lst
+ return $ text "@node Top" $$
+ text "@top " <> txt <> char '\n'
+
+blockToTexinfo (Header level lst) = do
+ node <- inlineListForNode lst
+ txt <- inlineListToTexinfo lst
+ return $ if (level > 0) && (level <= 4)
+ then text "\n@node " <> node <> char '\n' <>
+ text (seccmd level) <> txt
+ else txt
+ where
+ seccmd 1 = "@chapter "
+ seccmd 2 = "@section "
+ seccmd 3 = "@subsection "
+ seccmd 4 = "@subsubsection "
+ seccmd _ = error "illegal seccmd level"
+
+blockToTexinfo (Table caption aligns widths heads rows) = do
+ headers <- tableHeadToTexinfo aligns heads
+ captionText <- inlineListToTexinfo caption
+ rowsText <- mapM (tableRowToTexinfo aligns) rows
+ let colWidths = map (printf "%.2f ") widths
+ let colDescriptors = concat colWidths
+ let tableBody = text ("@multitable @columnfractions " ++ colDescriptors) $$
+ headers $$
+ vcat rowsText $$
+ text "@end multitable"
+ return $ if isEmpty captionText
+ then tableBody <> char '\n'
+ else text "@float" $$
+ tableBody $$
+ inCmd "caption" captionText $$
+ text "@end float"
+
+tableHeadToTexinfo :: [Alignment]
+ -> [[Block]]
+ -> State WriterState Doc
+tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem "
+
+tableRowToTexinfo :: [Alignment]
+ -> [[Block]]
+ -> State WriterState Doc
+tableRowToTexinfo = tableAnyRowToTexinfo "@item "
+
+tableAnyRowToTexinfo :: String
+ -> [Alignment]
+ -> [[Block]]
+ -> State WriterState Doc
+tableAnyRowToTexinfo itemtype aligns cols =
+ zipWithM alignedBlock aligns cols >>=
+ return . (text itemtype $$) . foldl (\row item -> row $$
+ (if isEmpty row then empty else text " @tab ") <> item) empty
+
+alignedBlock :: Alignment
+ -> [Block]
+ -> State WriterState Doc
+-- XXX @flushleft and @flushright text won't get word wrapped. Since word
+-- wrapping is more important than alignment, we ignore the alignment.
+alignedBlock _ = blockListToTexinfo
+{-
+alignedBlock AlignLeft col = do
+ b <- blockListToTexinfo col
+ return $ text "@flushleft" $$ b $$ text "@end flushleft"
+alignedBlock AlignRight col = do
+ b <- blockListToTexinfo col
+ return $ text "@flushright" $$ b $$ text "@end flushright"
+alignedBlock _ col = blockListToTexinfo col
+-}
+
+-- | Convert Pandoc block elements to Texinfo.
+blockListToTexinfo :: [Block]
+ -> State WriterState Doc
+blockListToTexinfo [] = return $ empty
+blockListToTexinfo (x:xs) = do
+ x' <- blockToTexinfo x
+ case x of
+ Header level _ -> do
+ -- We need need to insert a menu for this node.
+ let (before, after) = break isHeader xs
+ before' <- blockListToTexinfo before
+ let menu = if level < 4
+ then collectNodes (level + 1) after
+ else []
+ lines' <- mapM makeMenuLine menu
+ let menu' = if null lines'
+ then empty
+ else text "@menu" $$
+ vcat lines' $$
+ text "@end menu"
+ after' <- blockListToTexinfo after
+ return $ x' $$ before' $$ menu' $$ after'
+ Para _ -> do
+ xs' <- blockListToTexinfo xs
+ case xs of
+ ((CodeBlock _ _):_) -> return $ x' $$ xs'
+ _ -> return $ x' $$ text "" $$ xs'
+ _ -> do
+ xs' <- blockListToTexinfo xs
+ return $ x' $$ xs'
+
+isHeader :: Block -> Bool
+isHeader (Header _ _) = True
+isHeader _ = False
+
+collectNodes :: Int -> [Block] -> [Block]
+collectNodes _ [] = []
+collectNodes level (x:xs) =
+ case x of
+ (Header hl _) ->
+ if hl < level
+ then []
+ else if hl == level
+ then x : collectNodes level xs
+ else collectNodes level xs
+ _ ->
+ collectNodes level xs
+
+makeMenuLine :: Block
+ -> State WriterState Doc
+makeMenuLine (Header _ lst) = do
+ txt <- inlineListForNode lst
+ return $ text "* " <> txt <> text "::"
+makeMenuLine _ = error "makeMenuLine called with non-Header block"
+
+listItemToTexinfo :: [Block]
+ -> State WriterState Doc
+listItemToTexinfo lst = blockListToTexinfo lst >>=
+ return . (text "@item" $$)
+
+defListItemToTexinfo :: ([Inline], [Block])
+ -> State WriterState Doc
+defListItemToTexinfo (term, def) = do
+ term' <- inlineListToTexinfo term
+ def' <- blockListToTexinfo def
+ return $ text "@item " <> term' <> text "\n" $$ def'
+
+-- | Convert list of inline elements to Texinfo.
+inlineListToTexinfo :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
+
+-- | Convert list of inline elements to Texinfo acceptable for a node name.
+inlineListForNode :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListForNode lst = mapM inlineForNode lst >>= return . hcat
+
+inlineForNode :: Inline -> State WriterState Doc
+inlineForNode (Str str) = return $ text $ filter (not.disallowedInNode) str
+inlineForNode (Emph lst) = inlineListForNode lst
+inlineForNode (Strong lst) = inlineListForNode lst
+inlineForNode (Strikeout lst) = inlineListForNode lst
+inlineForNode (Superscript lst) = inlineListForNode lst
+inlineForNode (Subscript lst) = inlineListForNode lst
+inlineForNode (SmallCaps lst) = inlineListForNode lst
+inlineForNode (Quoted _ lst) = inlineListForNode lst
+inlineForNode (Cite _ lst) = inlineListForNode lst
+inlineForNode (Code str) = inlineForNode (Str str)
+inlineForNode Space = return $ char ' '
+inlineForNode EmDash = return $ text "---"
+inlineForNode EnDash = return $ text "--"
+inlineForNode Apostrophe = return $ char '\''
+inlineForNode Ellipses = return $ text "..."
+inlineForNode LineBreak = return empty
+inlineForNode (Math _ str) = inlineListForNode $ readTeXMath str
+inlineForNode (TeX _) = return empty
+inlineForNode (HtmlInline _) = return empty
+inlineForNode (Link lst _) = inlineListForNode lst
+inlineForNode (Image lst _) = inlineListForNode lst
+inlineForNode (Note _) = return empty
+
+-- periods, commas, colons, and parentheses are disallowed in node names
+disallowedInNode :: Char -> Bool
+disallowedInNode c = c `elem` ".,:()"
+
+-- | Convert inline element to Texinfo
+inlineToTexinfo :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+
+inlineToTexinfo (Emph lst) =
+ inlineListToTexinfo lst >>= return . inCmd "emph"
+
+inlineToTexinfo (Strong lst) =
+ inlineListToTexinfo lst >>= return . inCmd "strong"
+
+inlineToTexinfo (Strikeout lst) = do
+ addToHeader $ "@macro textstrikeout{text}\n" ++
+ "~~\\text\\~~\n" ++
+ "@end macro\n"
+ contents <- inlineListToTexinfo lst
+ return $ text "@textstrikeout{" <> contents <> text "}"
+
+inlineToTexinfo (Superscript lst) = do
+ addToHeader $ "@macro textsuperscript{text}\n" ++
+ "@iftex\n" ++
+ "@textsuperscript{\\text\\}\n" ++
+ "@end iftex\n" ++
+ "@ifnottex\n" ++
+ "^@{\\text\\@}\n" ++
+ "@end ifnottex\n" ++
+ "@end macro\n"
+ contents <- inlineListToTexinfo lst
+ return $ text "@textsuperscript{" <> contents <> char '}'
+
+inlineToTexinfo (Subscript lst) = do
+ addToHeader $ "@macro textsubscript{text}\n" ++
+ "@iftex\n" ++
+ "@textsubscript{\\text\\}\n" ++
+ "@end iftex\n" ++
+ "@ifnottex\n" ++
+ "_@{\\text\\@}\n" ++
+ "@end ifnottex\n" ++
+ "@end macro\n"
+ contents <- inlineListToTexinfo lst
+ return $ text "@textsubscript{" <> contents <> char '}'
+
+inlineToTexinfo (SmallCaps lst) =
+ inlineListToTexinfo lst >>= return . inCmd "sc"
+
+inlineToTexinfo (Code str) = do
+ return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
+
+inlineToTexinfo (Quoted SingleQuote lst) = do
+ contents <- inlineListToTexinfo lst
+ return $ char '`' <> contents <> char '\''
+
+inlineToTexinfo (Quoted DoubleQuote lst) = do
+ contents <- inlineListToTexinfo lst
+ return $ text "``" <> contents <> text "''"
+
+inlineToTexinfo (Cite _ lst) =
+ inlineListToTexinfo lst
+inlineToTexinfo Apostrophe = return $ char '\''
+inlineToTexinfo EmDash = return $ text "---"
+inlineToTexinfo EnDash = return $ text "--"
+inlineToTexinfo Ellipses = return $ text "@dots{}"
+inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
+inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
+inlineToTexinfo (TeX str) = return $ text "@tex" $$ text str $$ text "@end tex"
+inlineToTexinfo (HtmlInline _) = return empty
+inlineToTexinfo (LineBreak) = return $ text "@*"
+inlineToTexinfo Space = return $ char ' '
+
+inlineToTexinfo (Link txt (src, _)) = do
+ case txt of
+ [Code x] | x == src -> -- autolink
+ do return $ text $ "@url{" ++ x ++ "}"
+ _ -> do contents <- inlineListToTexinfo txt
+ let src1 = stringToTexinfo src
+ return $ text ("@uref{" ++ src1 ++ ",") <> contents <>
+ char '}'
+
+inlineToTexinfo (Image alternate (source, _)) = do
+ content <- inlineListToTexinfo alternate
+ return $ text ("@image{" ++ base ++ ",,,") <> content <> text "," <>
+ text (ext ++ "}")
+ where
+ (revext, revbase) = break (=='.') (reverse source)
+ ext = reverse revext
+ base = case revbase of
+ ('.' : rest) -> reverse rest
+ _ -> reverse revbase
+
+inlineToTexinfo (Note contents) = do
+ contents' <- blockListToTexinfo contents
+ let rawnote = stripTrailingNewlines $ render contents'
+ let optNewline = "@end verbatim" `isSuffixOf` rawnote
+ return $ text "@footnote{" <>
+ text rawnote <>
+ (if optNewline then char '\n' else empty) <>
+ char '}'
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
new file mode 100644
index 000000000..14e2eebbb
--- /dev/null
+++ b/src/Text/Pandoc/XML.hs
@@ -0,0 +1,88 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.XML
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for escaping and formatting XML.
+-}
+module Text.Pandoc.XML ( escapeCharForXML,
+ escapeStringForXML,
+ inTags,
+ selfClosingTag,
+ inTagsSimple,
+ inTagsIndented ) where
+import Text.PrettyPrint.HughesPJ
+
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '\160' -> "&#160;"
+ c -> [c]
+
+-- | True if the character needs to be escaped.
+needsEscaping :: Char -> Bool
+needsEscaping c = c `elem` "&<>\"\160"
+
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML "" = ""
+escapeStringForXML str =
+ case break needsEscaping str of
+ (okay, "") -> okay
+ (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
+
+-- | Return a text object with a string of formatted XML attributes.
+attributeList :: [(String, String)] -> Doc
+attributeList = text . concatMap
+ (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
+ escapeStringForXML b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented tagType attribs contents =
+ let openTag = char '<' <> text tagType <> attributeList attribs <>
+ char '>'
+ closeTag = text "</" <> text tagType <> char '>'
+ in if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: String -> [(String, String)] -> Doc
+selfClosingTag tagType attribs =
+ char '<' <> text tagType <> attributeList attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: String -> Doc -> Doc
+inTagsSimple tagType = inTags False tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: String -> Doc -> Doc
+inTagsIndented tagType = inTags True tagType []