From dc9c6450f3b16592d0ee865feafc17b670e4ad14 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 20 Dec 2006 06:50:14 +0000 Subject: + Added module data for haddock. + Reformatted code consistently. git-svn-id: https://pandoc.googlecode.com/svn/trunk@252 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/HTML.hs | 85 +++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 38 deletions(-) (limited to 'src/Text/Pandoc/Readers/HTML.hs') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f9a738e94..c157f3b0e 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,4 +1,14 @@ --- | Converts HTML to 'Pandoc' document. +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : unstable + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, @@ -30,10 +40,11 @@ testString = testStringWith parseHtml -- Constants -- -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "br", "cite", - "code", "dfn", "em", "font", "i", "img", "input", "kbd", "label", "q", - "s", "samp", "select", "small", "span", "strike", "strong", "sub", - "sup", "textarea", "tt", "u", "var"] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] -- -- HTML utility functions @@ -50,9 +61,10 @@ inlinesTilEnd tag = try (do return inlines) -- extract type from a tag: e.g. br from
, < br >,
, etc. -extractTagType tag = case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of - Just [match] -> (map toLower match) - Nothing -> "" +extractTagType tag = + case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of + Just [match] -> (map toLower match) + Nothing -> "" anyHtmlTag = try (do char '<' @@ -90,7 +102,8 @@ htmlTag tag = try (do -- parses a quoted html attribute value quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) (many (noneOf [quoteChar])) + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) return (result, [quoteChar]) htmlAttributes = do @@ -116,9 +129,11 @@ htmlRegularAttribute = try (do spaces (content, quoteStr) <- choice [ (quoted '\''), (quoted '"'), - (do{ a <- (many (alphaNum <|> (oneOf "-._:"))); - return (a,"")} ) ] - return (name, content, (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (" " ++ name ++ "=" ++ quoteStr ++ content ++ quoteStr))) htmlEndTag tag = try (do char '<' @@ -135,17 +150,11 @@ isInline tag = (extractTagType tag) `elem` inlineHtmlTags anyHtmlBlockTag = try (do tag <- choice [anyHtmlTag, anyHtmlEndTag] - if isInline tag then - fail "inline tag" - else - return tag) + if isInline tag then fail "inline tag" else return tag) anyHtmlInlineTag = try (do tag <- choice [ anyHtmlTag, anyHtmlEndTag ] - if isInline tag then - return tag - else - fail "not an inline tag") + if isInline tag then return tag else fail "not an inline tag") -- scripts must be treated differently, because they can contain <> etc. htmlScript = try (do @@ -155,13 +164,11 @@ htmlScript = try (do rawHtmlBlock = try (do notFollowedBy' (choice [htmlTag "/body", htmlTag "/html"]) - body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, definition] + body <- choice [htmlScript, anyHtmlBlockTag, htmlComment, xmlDec, + definition] sp <- (many space) state <- getState - if stateParseRaw state then - return (RawHtml (body ++ sp)) - else - return Null) + if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null) htmlComment = try (do string "