From a2422504ff5c3ad40255cbefb86f5e48012e9de9 Mon Sep 17 00:00:00 2001 From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> Date: Sat, 18 Oct 2008 23:42:23 +0000 Subject: HTML reader: Don't interpret contents of <pre> blocks as markdown. Added rawVerbatimBlock parser. Resolves Issue #94. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1468 788f1e2b-df1e-0410-8736-df70ead52e1b --- Text/Pandoc/Readers/HTML.hs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'Text') diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs index 10b7ad011..65e512b5e 100644 --- a/Text/Pandoc/Readers/HTML.hs +++ b/Text/Pandoc/Readers/HTML.hs @@ -320,10 +320,22 @@ htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definiti rawHtmlBlock :: GenParser Char ParserState Block rawHtmlBlock = try $ do - body <- htmlBlockElement <|> anyHtmlBlockTag + body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag state <- getState if stateParseRaw state then return (RawHtml body) else return Null +-- This is a block whose contents should be passed through verbatim, not interpreted. +rawVerbatimBlock :: GenParser Char ParserState [Char] +rawVerbatimBlock = try $ do + start <- anyHtmlBlockTag + let tagtype = extractTagType start + if tagtype `elem` ["pre"] + then do + contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar) + end <- htmlEndTag tagtype + return $ start ++ contents ++ end + else fail "Not a verbatim block" + -- We don't want to parse </body> or </html> as raw HTML, since these -- are handled in parseHtml. rawHtmlBlock' :: GenParser Char ParserState Block -- cgit v1.2.3