aboutsummaryrefslogtreecommitdiff
path: root/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Text/Pandoc/Readers/HTML.hs')
-rw-r--r--Text/Pandoc/Readers/HTML.hs14
1 files changed, 13 insertions, 1 deletions
diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
index 10b7ad011..65e512b5e 100644
--- a/Text/Pandoc/Readers/HTML.hs
+++ b/Text/Pandoc/Readers/HTML.hs
@@ -320,10 +320,22 @@ htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definiti
rawHtmlBlock :: GenParser Char ParserState Block
rawHtmlBlock = try $ do
- body <- htmlBlockElement <|> anyHtmlBlockTag
+ body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
state <- getState
if stateParseRaw state then return (RawHtml body) else return Null
+-- This is a block whose contents should be passed through verbatim, not interpreted.
+rawVerbatimBlock :: GenParser Char ParserState [Char]
+rawVerbatimBlock = try $ do
+ start <- anyHtmlBlockTag
+ let tagtype = extractTagType start
+ if tagtype `elem` ["pre"]
+ then do
+ contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
+ end <- htmlEndTag tagtype
+ return $ start ++ contents ++ end
+ else fail "Not a verbatim block"
+
-- We don't want to parse </body> or </html> as raw HTML, since these
-- are handled in parseHtml.
rawHtmlBlock' :: GenParser Char ParserState Block