From a2422504ff5c3ad40255cbefb86f5e48012e9de9 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Sat, 18 Oct 2008 23:42:23 +0000
Subject: HTML reader: Don't interpret contents of <pre> blocks as markdown.
 Added rawVerbatimBlock parser.  Resolves Issue #94.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1468 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Text/Pandoc/Readers/HTML.hs | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'Text')
diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs
index 10b7ad011..65e512b5e 100644
--- a/Text/Pandoc/Readers/HTML.hs
+++ b/Text/Pandoc/Readers/HTML.hs
@@ -320,10 +320,22 @@ htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definiti
 
 rawHtmlBlock :: GenParser Char ParserState Block
 rawHtmlBlock = try $ do
-  body <- htmlBlockElement <|> anyHtmlBlockTag
+  body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
   state <- getState
   if stateParseRaw state then return (RawHtml body) else return Null
 
+-- This is a block whose contents should be passed through verbatim, not interpreted.
+rawVerbatimBlock :: GenParser Char ParserState [Char]
+rawVerbatimBlock = try $ do
+  start <- anyHtmlBlockTag
+  let tagtype = extractTagType start
+  if tagtype `elem` ["pre"]
+     then do
+       contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
+       end <- htmlEndTag tagtype
+       return $ start ++ contents ++ end
+     else fail "Not a verbatim block"
+
 -- We don't want to parse </body> or </html> as raw HTML, since these
 -- are handled in parseHtml.
 rawHtmlBlock' :: GenParser Char ParserState Block
-- 
cgit v1.2.3