aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-07 22:12:44 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-07 22:12:44 -0700
commit94466c006005892dc177a8a0518ccf3c55b4e51b (patch)
tree6d7a82079e533702341b289984f0ca5a2d9cd3c4 /src/Text/Pandoc/Readers
parent3c4079edc88deedb01128663bed8040617f0877c (diff)
downloadpandoc-94466c006005892dc177a8a0518ccf3c55b4e51b.tar.gz
HTML reader: Really ignore DOCTYPE and xml declarations.
This actually does what d71b013841f3c9c8c595591e312a31df16a728cb said it did. Revised epub tests to remove the repeated DOCTYPE and xml tags.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index a8df1394c..42ef11065 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -289,7 +289,7 @@ fixPlains inList bs = if any isParaish bs'
pRawTag :: TagParser String
pRawTag = do
tag <- pAnyTag
- let ignorable x = x `elem` ["html","head","body","DOCTYPE","?xml"]
+ let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
if tagOpen ignorable (const True) tag || tagClose ignorable tag
then return []
else return $ renderTags' [tag]
@@ -728,7 +728,7 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
-}
blockHtmlTags :: [String]
-blockHtmlTags = ["address", "article", "aside",
+blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
"blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
"dl", "dt", "embed", "fieldset", "figcaption", "figure",