From dc9c6450f3b16592d0ee865feafc17b670e4ad14 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 20 Dec 2006 06:50:14 +0000 Subject: + Added module data for haddock. + Reformatted code consistently. git-svn-id: https://pandoc.googlecode.com/svn/trunk@252 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Definition.hs | 34 +- src/Text/Pandoc/HtmlEntities.hs | 31 +- src/Text/Pandoc/Readers/HTML.hs | 85 ++--- src/Text/Pandoc/Readers/LaTeX.hs | 118 ++++--- src/Text/Pandoc/Readers/Markdown.hs | 601 +++++++++++++++++---------------- src/Text/Pandoc/Readers/RST.hs | 631 ++++++++++++++++++----------------- src/Text/Pandoc/Shared.hs | 267 +++++++++------ src/Text/Pandoc/UTF8.hs | 3 +- src/Text/Pandoc/Writers/HTML.hs | 311 +++++++++-------- src/Text/Pandoc/Writers/LaTeX.hs | 155 +++++---- src/Text/Pandoc/Writers/Markdown.hs | 148 ++++---- src/Text/Pandoc/Writers/RST.hs | 175 +++++----- src/Text/Pandoc/Writers/RTF.hs | 155 +++++---- src/Text/ParserCombinators/Pandoc.hs | 24 +- 14 files changed, 1512 insertions(+), 1226 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 08ff3905e..b2655ffa0 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -1,5 +1,15 @@ --- | Definition of 'Pandoc' data structure for format-neutral representation --- of documents. +{- | + Module : Text.Pandoc.Definition + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Definition of 'Pandoc' data structure for format-neutral representation +of documents. +-} module Text.Pandoc.Definition where data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) @@ -17,21 +27,24 @@ data Block | Blank -- ^ A blank line | Null -- ^ Nothing | Para [Inline] -- ^ Paragraph - | Key [Inline] Target -- ^ Reference key: name (list of inlines) and 'Target' + | Key [Inline] Target -- ^ Reference key: name (inlines) and 'Target' | CodeBlock String -- ^ Code block (literal) | RawHtml String -- ^ Raw HTML block (literal) | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList [[Block]] -- ^ Ordered list (list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (list of inlines) + | OrderedList [[Block]] -- ^ Ordered list (list of items, each + -- a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each + -- a list of blocks) + | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) | HorizontalRule -- ^ Horizontal rule - | Note String [Block] -- ^ Footnote or endnote - reference (string), text (list of blocks) + | Note String [Block] -- ^ Footnote or endnote - reference (string), + -- text (list of blocks) deriving (Eq, Read, Show) -- | Target for a link: either a URL or an indirect (labeled) reference. data Target = Src String String -- ^ First string is URL, second is title - | Ref [Inline] -- ^ Label (list of inlines) for an indirect reference + | Ref [Inline] -- ^ Label (list of inlines) for an indirect ref deriving (Show, Eq, Read) -- | Inline elements. @@ -42,9 +55,10 @@ data Inline | Code String -- ^ Inline code (literal) | Space -- ^ Inter-word space | LineBreak -- ^ Hard line break - | TeX String -- ^ LaTeX code (literal) + | TeX String -- ^ LaTeX code (literal) | HtmlInline String -- ^ HTML code (literal) | Link [Inline] Target -- ^ Hyperlink: text (list of inlines) and target - | Image [Inline] Target -- ^ Image: alternative text (list of inlines) and target + | Image [Inline] Target -- ^ Image: alternative text (list of inlines) + -- and target | NoteRef String -- ^ Footnote or endnote reference deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs index bbb438ef5..a03548388 100644 --- a/src/Text/Pandoc/HtmlEntities.hs +++ b/src/Text/Pandoc/HtmlEntities.hs @@ -1,12 +1,22 @@ --- | Functions for encoding unicode characters as HTML entity --- references, and vice versa. +{- | + Module : Text.Pandoc.HtmlEntities + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Functions for encoding unicode characters as HTML entity references, +and vice versa. +-} module Text.Pandoc.HtmlEntities ( htmlEntityToChar, charToHtmlEntity, decodeEntities, encodeEntities ) where -import Char ( chr, ord ) +import Data.Char ( chr, ord ) import Text.Regex ( mkRegex, matchRegexAll ) import Maybe ( fromMaybe ) @@ -19,13 +29,15 @@ characterEntity = mkRegex "&#[0-9]+;|&[A-Za-z0-9]+;" decodeEntities :: String -> String decodeEntities str = case (matchRegexAll characterEntity str) of - Nothing -> str - Just (before, match, rest, _) -> before ++ replacement ++ (decodeEntities rest) + Nothing -> str + Just (before, match, rest, _) -> before ++ replacement ++ + (decodeEntities rest) where replacement = case (htmlEntityToChar match) of Just ch -> [ch] Nothing -> match --- | Returns a string with characters replaced with entity references where possible. +-- | Returns a string with characters replaced with entity references where +-- possible. encodeEntities :: String -> String encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c)) @@ -44,10 +56,9 @@ htmlEntityToChar entity = charToHtmlEntity :: Char -> Maybe String charToHtmlEntity char = let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in - if (length matches) == 0 then - Nothing - else - Just (fst (head matches)) + if (length matches) == 0 + then Nothing + else Just (fst (head matches)) htmlEntityTable :: [(String, Char)] htmlEntityTable = [ diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f9a738e94..c157f3b0e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts HTML to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, @@ -30,10 +40,11 @@ testString = testStringWith parseHtml -- Constants -- -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", - "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", - "s", "samp", "select", "small", "span", "strike", "strong", "sub", - "sup", "textarea", "tt", "u", "var"] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] -- -- HTML utility functions @@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do return inlines) -- extract type from a tag: e.g. br from
, < br >,
, etc. -extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of - Just [match] -> (map toLower match) - Nothing -> "" +extractTagType tag = + case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" anyHtmlTag = try (do char '<' @@ -90,7 +102,8 @@ htmlTag tag = try (do -- parses a quoted html attribute value quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) return (result, [quoteChar]) htmlAttributes = do @@ -116,9 +129,11 @@ htmlRegularAttribute = try (do spaces (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), - (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); - return (a,"")} ) ] - return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) htmlEndTag tag = try (do char '<' @@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags anyHtmlBlockTag = try (do tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then - fail "inline tag" - else - return tag) + if isInline tag then fail "inline tag" else return tag) anyHtmlInlineTag = try (do tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then - return tag - else - fail "not an inline tag") + if isInline tag then return tag else fail "not an inline tag") -- scripts must be treated differently, because they can contain <> etc. htmlScript = try (do @@ -155,13 +164,11 @@ htmlScript = try (do rawHtmlBlock = try (do notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) - body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, + definition] sp <- (many space) state <- getState - if stateParseRaw state then - return (RawHtml (body ++ sp)) - else - return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) htmlComment = try (do string "