aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-07 22:12:44 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-07 22:12:44 -0700
commit94466c006005892dc177a8a0518ccf3c55b4e51b (patch)
tree6d7a82079e533702341b289984f0ca5a2d9cd3c4
parent3c4079edc88deedb01128663bed8040617f0877c (diff)
downloadpandoc-94466c006005892dc177a8a0518ccf3c55b4e51b.tar.gz
HTML reader: Really ignore DOCTYPE and xml declarations.
This actually does what d71b013841f3c9c8c595591e312a31df16a728cb said it did. Revised epub tests to remove the repeated DOCTYPE and xml tags.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs4
-rw-r--r--tests/features.native18
-rw-r--r--tests/formatting.native7
3 files changed, 2 insertions, 27 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index a8df1394c..42ef11065 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -289,7 +289,7 @@ fixPlains inList bs = if any isParaish bs'
pRawTag :: TagParser String
pRawTag = do
tag <- pAnyTag
- let ignorable x = x `elem` ["html","head","body","DOCTYPE","?xml"]
+ let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"]
if tagOpen ignorable (const True) tag || tagClose ignorable tag
then return []
else return $ renderTags' [tag]
@@ -728,7 +728,7 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
-}
blockHtmlTags :: [String]
-blockHtmlTags = ["address", "article", "aside",
+blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
"blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
"dl", "dt", "embed", "fieldset", "figcaption", "figure",
diff --git a/tests/features.native b/tests/features.native
index b84d6781f..0a790e305 100644
--- a/tests/features.native
+++ b/tests/features.native
@@ -1,5 +1,4 @@
[Para [Span ("front.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 1 ("",[],[]) [Str "Reflowable",Space,Str "EPUB",Space,Str "3",Space,Str "Conformance",Space,Str "Test",Space,Str "Document:",Space,Str "0100"]
,RawBlock (Format "html") "<section>"
@@ -29,11 +28,9 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-xhtml-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "XHTML"]
,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "XHTML",Space,Str "content."]
,Para [Span ("content-xhtml-002.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"iframe-010\" class=\"ctest\">"
,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "iframe-010"],Space,Str "Inline",Space,Str "Frames"]
,Para [Str "Tests",Space,Str "whether",Space,Str "embedding",Space,Str "content",Space,Str "via",Space,Str "an",Space,Code ("",[],[]) "iframe",Space,Str "is",Space,Str "supported."]
@@ -41,7 +38,6 @@
,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "paragraph",Space,Str "reads",Space,Str "\"PASS\",",Space,Str "the",Space,Str "test",Space,Str "passes."]
,RawBlock (Format "html") "</section>"
,Para [Span ("content-images-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 2 ("content-images-001.xhtml#multimedia",[],[]) [Str "Multimedia"]
,RawBlock (Format "html") "<section>"
@@ -67,7 +63,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-multimedia-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,RawBlock (Format "html") "<section>"
,Header 3 ("content-multimedia-001.xhtml#audio",[],[]) [Str "Audio"]
@@ -92,7 +87,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-multimedia-002.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 3 ("content-multimedia-002.xhtml#video",[],[]) [Str "Video"]
,RawBlock (Format "html") "<section id=\"video-010\" class=\"otest\">"
@@ -146,7 +140,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-multimedia-003.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 3 ("content-multimedia-003.xhtml#trigger",[],[]) [Code ("",[],[]) "epub:trigger"]
,RawBlock (Format "html") "<section id=\"trigger-010\" class=\"ctest\">"
@@ -179,7 +172,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-ns-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 2 ("content-ns-001.xhtml#ns",[],[]) [Str "Namespace",Space,Str "Support"]
,RawBlock (Format "html") "<section id=\"namespace-010\" class=\"ctest\">"
@@ -191,7 +183,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-mathml-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 2 ("content-mathml-001.xhtml#mathml",[],[]) [Str "MathML"]
,RawBlock (Format "html") "<section id=\"mathml-010\" class=\"ctest\">"
@@ -259,7 +250,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-svg-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"svg\">"
,Header 2 ("",[],[]) [Str "SVG"]
,RawBlock (Format "html") "<section id=\"svg-shapes\">"
@@ -279,7 +269,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-svg-002.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"svg-text\">"
,Header 3 ("",[],[]) [Str "Text",Space,Str "content"]
,RawBlock (Format "html") "<section id=\"svg-text-styling\">"
@@ -365,7 +354,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-svg-003.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"svg-xhtml-support\">"
,Header 3 ("",[],[]) [Str "XHTML",Space,Str "Support"]
,RawBlock (Format "html") "<section id=\"svg-410\" class=\"ctest\">"
@@ -389,7 +377,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-svg-004.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"svg-image-embed\">"
,Header 3 ("",[],[]) [Str "Image",Space,Str "Embedding"]
,RawBlock (Format "html") "<section id=\"svg-510\" class=\"ctest\">"
@@ -400,11 +387,9 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-fallbacks.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,Header 1 ("",[],[]) [Str "Fallbacks"]
,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "Fallbacks."]
,Para [Span ("content-bindings-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"bindings\">"
,Header 3 ("",[],[]) [Str "Bindings"]
,RawBlock (Format "html") "<section id=\"bindings-010\" class=\"otest\">"
@@ -419,7 +404,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-fallbacks-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"fallbacks\">"
,Header 2 ("",[],[]) [Str "Fallbacks"]
,RawBlock (Format "html") "<section id=\"fallbacks-manifest\">"
@@ -457,7 +441,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("content-switch-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"epub-switch\">"
,Header 3 ("",[],[]) [Code ("",[],[]) "epub:switch"]
,RawBlock (Format "html") "<section id=\"switch-010\" class=\"ctest\">"
@@ -475,7 +458,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("svg-doc-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "SVG"]
,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "SVG",Space,Str "content."]
,Para [Str "Note",Space,Str "that",Space,Str "if",Space,Str "no",Space,Str "SVG",Space,Str "tests",Space,Str "appear",Space,Str "after",Space,Str "this",Space,Str "document,",Space,Str "SVG",Space,Str "is",Space,Str "not",Space,Str "supported",Space,Str "in",Space,Str "the",Space,Str "spine",Space,Str "and",Space,Str "all",Space,Str "tests",Space,Str "in",Space,Str "this",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "marked",Space,Code ("",[],[]) "Not Supported",Str "."]
diff --git a/tests/formatting.native b/tests/formatting.native
index b3204152e..bdf86fa20 100644
--- a/tests/formatting.native
+++ b/tests/formatting.native
@@ -1,5 +1,4 @@
[Para [Span ("front.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section>"
,Header 1 ("",[],[]) [Str "EPUB",Space,Str "3",Space,Str "Styling",Space,Str "Test",Space,Str "Document:",Space,Str "0101"]
,RawBlock (Format "html") "<section>"
@@ -29,13 +28,11 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-001.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"epub-css\">"
,Header 1 ("",[],[]) [Str "EPUB",Space,Str "Style",Space,Str "Sheets"]
,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "styling",Space,Str "and",Space,Str "layout."]
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-003.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"style-110\" class=\"ctest\">"
,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "style-110"],Space,Str "Multi-Column",Space,Str "Layouts"]
,Para [Str "Tests",Space,Str "whether",Space,Str "the",Space,Code ("",[],[]) "CSS Multi-Column Layout",Space,Str "properties",Space,Str "are",Space,Str "supported."]
@@ -43,7 +40,6 @@
,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "text",Space,Str "is",Space,Str "rendered",Space,Str "in",Space,Str "three",Space,Str "columns,",Space,Str "the",Space,Str "test",Space,Str "passes."]
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-002.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"style-lists\">"
,Header 2 ("",[],[]) [Str "Lists"]
,RawBlock (Format "html") "<section id=\"style-list-style-type\">"
@@ -342,7 +338,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-004.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"style-media-rules\">"
,Header 2 ("",[],[]) [Code ("",[],[]) "@media",Space,Str "Rules"]
,RawBlock (Format "html") "<section id=\"style-210\" class=\"ctest\">"
@@ -407,7 +402,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-005.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"style-text-xform\">"
,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "text-transform",Space,Str "property"]
,RawBlock (Format "html") "<section id=\"style-310\" class=\"ctest\">"
@@ -430,7 +424,6 @@
,RawBlock (Format "html") "</section>"
,RawBlock (Format "html") "</section>"
,Para [Span ("styling-xhtml-006.xhtml",[],[]) []]
-,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"]
,RawBlock (Format "html") "<section id=\"style-ruby\">"
,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "epub-ruby-position",Space,Str "property"]
,RawBlock (Format "html") "<section id=\"style-410\" class=\"ctest\">"