aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs15
1 files changed, 8 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b6aac2b48..69ce97eff 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -100,7 +100,7 @@ extractTagType ('<':rest) =
map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
extractTagType _ = ""
--- | Parse any HTML tag (closing or opening) and return text of tag
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
anyHtmlTag = try $ do
char '<'
spaces
@@ -313,15 +313,16 @@ hrule = try $ do
-- code blocks
--
+-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
+-- skipped, because they are not portable to output formats other than HTML.
codeBlock = try $ do
htmlTag "pre"
- spaces
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
- spaces
- htmlEndTag "pre"
+ result <- manyTill
+ (many1 (satisfy (/= '<')) <|>
+ ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+ (htmlEndTag "pre")
return $ CodeBlock $ stripTrailingNewlines $
- decodeCharacterReferences result
+ decodeCharacterReferences $ concat result
--
-- block quotes