diff options
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b6aac2b48..69ce97eff 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -100,7 +100,7 @@ extractTagType ('<':rest) = map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest extractTagType _ = "" --- | Parse any HTML tag (closing or opening) and return text of tag +-- | Parse any HTML tag (opening or self-closing) and return text of tag anyHtmlTag = try $ do char '<' spaces @@ -313,15 +313,16 @@ hrule = try $ do -- code blocks -- +-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are +-- skipped, because they are not portable to output formats other than HTML. codeBlock = try $ do htmlTag "pre" - spaces - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") - spaces - htmlEndTag "pre" + result <- manyTill + (many1 (satisfy (/= '<')) <|> + ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) + (htmlEndTag "pre") return $ CodeBlock $ stripTrailingNewlines $ - decodeCharacterReferences result + decodeCharacterReferences $ concat result -- -- block quotes |