From dc9c6450f3b16592d0ee865feafc17b670e4ad14 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 20 Dec 2006 06:50:14 +0000 Subject: + Added module data for haddock. + Reformatted code consistently. git-svn-id: https://pandoc.googlecode.com/svn/trunk@252 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Main.hs | 99 +++--- src/Text/Pandoc/Definition.hs | 34 +- src/Text/Pandoc/HtmlEntities.hs | 31 +- src/Text/Pandoc/Readers/HTML.hs | 85 ++--- src/Text/Pandoc/Readers/LaTeX.hs | 118 ++++--- src/Text/Pandoc/Readers/Markdown.hs | 601 +++++++++++++++++---------------- src/Text/Pandoc/Readers/RST.hs | 631 ++++++++++++++++++----------------- src/Text/Pandoc/Shared.hs | 267 +++++++++------ src/Text/Pandoc/UTF8.hs | 3 +- src/Text/Pandoc/Writers/HTML.hs | 311 +++++++++-------- src/Text/Pandoc/Writers/LaTeX.hs | 155 +++++---- src/Text/Pandoc/Writers/Markdown.hs | 148 ++++---- src/Text/Pandoc/Writers/RST.hs | 175 +++++----- src/Text/Pandoc/Writers/RTF.hs | 155 +++++---- src/Text/ParserCombinators/Pandoc.hs | 24 +- 15 files changed, 1571 insertions(+), 1266 deletions(-) (limited to 'src') diff --git a/src/Main.hs b/src/Main.hs index 0ca36f7ce..986ce9cf1 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,5 +1,15 @@ --- | Main Pandoc program. Parses command-line options and calls the --- appropriate readers and writers. +{- | + Module : Main + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} module Main where import Text.Pandoc.UTF8 ( decodeUTF8, encodeUTF8 ) import Text.Pandoc.Readers.Markdown ( readMarkdown ) @@ -13,7 +23,8 @@ import Text.Pandoc.Writers.LaTeX ( writeLaTeX ) import Text.Pandoc.Readers.LaTeX ( readLaTeX ) import Text.Pandoc.Writers.RTF ( writeRTF ) import Text.Pandoc.Writers.Markdown ( writeMarkdown ) -import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) +import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader, + defaultRTFHeader, defaultS5Header, defaultLaTeXHeader ) import Text.Pandoc.Definition import Text.Pandoc.Shared import System ( exitWith, getArgs, getProgName ) @@ -58,23 +69,24 @@ writeDoc options = prettyPandoc -- | Data structure for command line options. data Opt = Opt - { optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces - , optTabStop :: Int -- ^ Number of spaces per tab - , optStandalone :: Bool -- ^ If @True@, include header and footer - , optReader :: ParserState -> String -> Pandoc -- ^ Reader to use - , optWriter :: WriterOptions -> Pandoc -> String -- ^ Writer to use - , optParseRaw :: Bool -- ^ If @True@, parse unconvertable HTML and TeX - , optCSS :: String -- ^ CSS file to link to - , optIncludeInHeader :: String -- ^ File to include in header - , optIncludeBeforeBody :: String -- ^ File to include at beginning of body - , optIncludeAfterBody :: String -- ^ File to include at end of body - , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" - , optDefaultHeader :: String -- ^ Default header - , optTitlePrefix :: String -- ^ Optional prefix for HTML title - , optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX - , optIncremental :: Bool -- ^ If @True@, show lists incrementally in S5 - , optSmart :: Bool -- ^ If @True@, use smart quotes, dashes, ... - , optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML or S5 + { optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces + , optTabStop :: Int -- ^ Number of spaces per tab + , optStandalone :: Bool -- ^ If @True@, include header, footer + , optReader :: ParserState -> String -> Pandoc -- ^ Read format + , optWriter :: WriterOptions -> Pandoc -> String -- ^ Write fmt + , optParseRaw :: Bool -- ^ If @True@, parse unconvertable + -- HTML and TeX + , optCSS :: String -- ^ CSS file to link to + , optIncludeInHeader :: String -- ^ File to include in header + , optIncludeBeforeBody :: String -- ^ File to include at top of body + , optIncludeAfterBody :: String -- ^ File to include at end of body + , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" + , optDefaultHeader :: String -- ^ Default header + , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX + , optIncremental :: Bool -- ^ If @True@, incremental lists in S5 + , optSmart :: Bool -- ^ If @True@, use smart typography + , optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML } -- | Defaults for command-line options. @@ -121,18 +133,19 @@ options = , Option "fr" ["from","read"] (ReqArg (\arg opt -> case (lookup (map toLower arg) readers) of - Just reader -> return opt { optReader = reader } - Nothing -> error ("Unknown reader: " ++ arg) ) + Just reader -> return opt { optReader = reader } + Nothing -> error ("Unknown reader: " ++ arg) ) "FORMAT") - ("Source format (" ++ (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )") + ("Source format (" ++ + (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )") , Option "tw" ["to","write"] (ReqArg (\arg opt -> case (lookup (map toLower arg) writers) of - Just (writer, defaultHeader) -> - return opt { optWriter = writer, - optDefaultHeader = defaultHeader } - Nothing -> error ("Unknown writer: " ++ arg) ) + Just (writer, defaultHeader) -> + return opt { optWriter = writer, + optDefaultHeader = defaultHeader } + Nothing -> error ("Unknown writer: " ++ arg) ) "FORMAT") ("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )") @@ -164,7 +177,8 @@ options = , Option "m" ["asciimathml"] (NoArg - (\opt -> return opt { optASCIIMathML = True, optStandalone = True })) + (\opt -> return opt { optASCIIMathML = True, + optStandalone = True })) "Use ASCIIMathML script in html output" , Option "i" ["incremental"] @@ -179,7 +193,8 @@ options = , Option "c" ["css"] (ReqArg - (\arg opt -> return opt { optCSS = arg, optStandalone = True }) + (\arg opt -> return opt { optCSS = arg, + optStandalone = True }) "CSS") "Link to CSS style sheet" @@ -187,7 +202,8 @@ options = (ReqArg (\arg opt -> do text <- readFile arg - return opt { optIncludeInHeader = text, optStandalone = True }) + return opt { optIncludeInHeader = text, + optStandalone = True }) "FILENAME") "File to include at end of header (implies -s)" @@ -211,13 +227,15 @@ options = (ReqArg (\arg opt -> do text <- readFile arg - return opt { optCustomHeader = text, optStandalone = True }) + return opt { optCustomHeader = text, + optStandalone = True }) "FILENAME") "File to use for custom header (implies -s)" , Option "T" ["title-prefix"] (ReqArg - (\arg opt -> return opt { optTitlePrefix = arg, optStandalone = True }) + (\arg opt -> return opt { optTitlePrefix = arg, + optStandalone = True }) "STRING") "String to prefix to HTML window title" @@ -225,8 +243,8 @@ options = (ReqArg (\arg opt -> do let header = case (lookup arg writers) of - Just (writer, head) -> head - Nothing -> error ("Unknown reader: " ++ arg) + Just (writer, head) -> head + Nothing -> error ("Unknown reader: " ++ arg) hPutStrLn stdout header exitWith ExitSuccess) "FORMAT") @@ -267,13 +285,14 @@ main = do let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, stateStandalone = standalone } - let csslink = if (css == "") then - "" - else - "\n" + let csslink = if (css == "") + then "" + else "\n" let asciiMathMLLink = if asciiMathML then asciiMathMLScript else "" - let header = (if (customHeader == "DEFAULT") then defaultHeader else customHeader) ++ + let header = (if (customHeader == "DEFAULT") + then defaultHeader + else customHeader) ++ csslink ++ asciiMathMLLink ++ includeHeader let writerOptions = WriterOptions { writerStandalone = standalone, writerHeader = header, diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 08ff3905e..b2655ffa0 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -1,5 +1,15 @@ --- | Definition of 'Pandoc' data structure for format-neutral representation --- of documents. +{- | + Module : Text.Pandoc.Definition + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Definition of 'Pandoc' data structure for format-neutral representation +of documents. +-} module Text.Pandoc.Definition where data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) @@ -17,21 +27,24 @@ data Block | Blank -- ^ A blank line | Null -- ^ Nothing | Para [Inline] -- ^ Paragraph - | Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target' + | Key [Inline] Target -- ^ Reference key: name (inlines) and 'Target' | CodeBlock String -- ^ Code block (literal) | RawHtml String -- ^ Raw HTML block (literal) | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines) + | OrderedList [[Block]] -- ^ Ordered list (list of items, each + -- a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each + -- a list of blocks) + | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) | HorizontalRule -- ^ Horizontal rule - | Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks) + | Note String [Block] -- ^ Footnote or endnote - reference (string), + -- text (list of blocks) deriving (Eq, Read, Show) -- | Target for a link: either a URL or an indirect (labeled) reference. data Target = Src String String -- ^ First string is URL, second is title - | Ref [Inline] -- ^ Label (list of inlines) for an indirect reference + | Ref [Inline] -- ^ Label (list of inlines) for an indirect ref deriving (Show, Eq, Read) -- | Inline elements. @@ -42,9 +55,10 @@ data Inline | Code String -- ^ Inline code (literal) | Space -- ^ Inter-word space | LineBreak -- ^ Hard line break - | TeX String -- ^ LaTeX code (literal) + | TeX String -- ^ LaTeX code (literal) | HtmlInline String -- ^ HTML code (literal) | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target - | Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target + | Image [Inline] Target -- ^ Image: alternative text (list of inlines) + -- and target | NoteRef String -- ^ Footnote or endnote reference deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs index bbb438ef5..a03548388 100644 --- a/src/Text/Pandoc/HtmlEntities.hs +++ b/src/Text/Pandoc/HtmlEntities.hs @@ -1,12 +1,22 @@ --- | Functions for encoding unicode characters as HTML entity --- references, and vice versa. +{- | + Module : Text.Pandoc.HtmlEntities + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Functions for encoding unicode characters as HTML entity references, +and vice versa. +-} module Text.Pandoc.HtmlEntities ( htmlEntityToChar, charToHtmlEntity, decodeEntities, encodeEntities ) where -import Char ( chr, ord ) +import Data.Char ( chr, ord ) import Text.Regex ( mkRegex, matchRegexAll ) import Maybe ( fromMaybe ) @@ -19,13 +29,15 @@ characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" decodeEntities :: String -> String decodeEntities str = case (matchRegexAll characterEntity str) of - Nothing -> str - Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest) + Nothing -> str + Just (before, match, rest, _) -> before ++ replacement ++ + (decodeEntities rest) where replacement = case (htmlEntityToChar match) of Just ch -> [ch] Nothing -> match --- | Returns a string with characters replaced with entity references where possible. +-- | Returns a string with characters replaced with entity references where +-- possible. encodeEntities :: String -> String encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c)) @@ -44,10 +56,9 @@ htmlEntityToChar entity = charToHtmlEntity :: Char -> Maybe String charToHtmlEntity char = let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in - if (length matches) == 0 then - Nothing - else - Just (fst (head matches)) + if (length matches) == 0 + then Nothing + else Just (fst (head matches)) htmlEntityTable :: [(String, Char)] htmlEntityTable = [ diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f9a738e94..c157f3b0e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts HTML to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, @@ -30,10 +40,11 @@ testString = testStringWith parseHtml -- Constants -- -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", - "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", - "s", "samp", "select", "small", "span", "strike", "strong", "sub", - "sup", "textarea", "tt", "u", "var"] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] -- -- HTML utility functions @@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do return inlines) -- extract type from a tag: e.g. br from
, < br >,
, etc. -extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of - Just [match] -> (map toLower match) - Nothing -> "" +extractTagType tag = + case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" anyHtmlTag = try (do char '<' @@ -90,7 +102,8 @@ htmlTag tag = try (do -- parses a quoted html attribute value quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) return (result, [quoteChar]) htmlAttributes = do @@ -116,9 +129,11 @@ htmlRegularAttribute = try (do spaces (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), - (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); - return (a,"")} ) ] - return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) htmlEndTag tag = try (do char '<' @@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags anyHtmlBlockTag = try (do tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then - fail "inline tag" - else - return tag) + if isInline tag then fail "inline tag" else return tag) anyHtmlInlineTag = try (do tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then - return tag - else - fail "not an inline tag") + if isInline tag then return tag else fail "not an inline tag") -- scripts must be treated differently, because they can contain <> etc. htmlScript = try (do @@ -155,13 +164,11 @@ htmlScript = try (do rawHtmlBlock = try (do notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) - body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, + definition] sp <- (many space) state <- getState - if stateParseRaw state then - return (RawHtml (body ++ sp)) - else - return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) htmlComment = try (do string "