From 6f16d52c1185fea757047374100a9f10be7af3fc Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Mon, 17 Sep 2007 02:49:28 +0000 Subject: Changed parsing of code blocks in HTML reader: + tag is no longer needed.
 suffices. + all HTML tags in the code block (e.g. for
 syntax highlighting)   are skipped, because they are not portable to other
 output formats.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1022 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 src/Text/Pandoc/Readers/HTML.hs | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b6aac2b48..69ce97eff 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -100,7 +100,7 @@ extractTagType ('<':rest) =
   map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
 extractTagType _ = ""
 
--- | Parse any HTML tag (closing or opening) and return text of tag
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
 anyHtmlTag = try $ do
   char '<'
   spaces
@@ -313,15 +313,16 @@ hrule = try  $ do
 -- code blocks
 --
 
+-- Note:  HTML tags in code blocks (e.g. for syntax highlighting) are 
+-- skipped, because they are not portable to output formats other than HTML.
 codeBlock = try $ do
     htmlTag "pre" 
-    spaces
-    htmlTag "code"
-    result <- manyTill anyChar (htmlEndTag "code")
-    spaces
-    htmlEndTag "pre"
+    result <- manyTill 
+              (many1 (satisfy (/= '<')) <|> 
+               ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+              (htmlEndTag "pre")
     return $ CodeBlock $ stripTrailingNewlines $ 
-             decodeCharacterReferences result
+             decodeCharacterReferences $ concat result
 
 --
 -- block quotes
-- 
cgit v1.2.3