diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-08-07 22:12:44 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-08-07 22:12:44 -0700 |
commit | 94466c006005892dc177a8a0518ccf3c55b4e51b (patch) | |
tree | 6d7a82079e533702341b289984f0ca5a2d9cd3c4 | |
parent | 3c4079edc88deedb01128663bed8040617f0877c (diff) | |
download | pandoc-94466c006005892dc177a8a0518ccf3c55b4e51b.tar.gz |
HTML reader: Really ignore DOCTYPE and xml declarations.
This actually does what d71b013841f3c9c8c595591e312a31df16a728cb
said it did.
Revised epub tests to remove the repeated DOCTYPE and xml tags.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 4 | ||||
-rw-r--r-- | tests/features.native | 18 | ||||
-rw-r--r-- | tests/formatting.native | 7 |
3 files changed, 2 insertions, 27 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index a8df1394c..42ef11065 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -289,7 +289,7 @@ fixPlains inList bs = if any isParaish bs' pRawTag :: TagParser String pRawTag = do tag <- pAnyTag - let ignorable x = x `elem` ["html","head","body","DOCTYPE","?xml"] + let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] if tagOpen ignorable (const True) tag || tagClose ignorable tag then return [] else return $ renderTags' [tag] @@ -728,7 +728,7 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", -} blockHtmlTags :: [String] -blockHtmlTags = ["address", "article", "aside", +blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", "blockquote", "body", "button", "canvas", "caption", "center", "col", "colgroup", "dd", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", diff --git a/tests/features.native b/tests/features.native index b84d6781f..0a790e305 100644 --- a/tests/features.native +++ b/tests/features.native @@ -1,5 +1,4 @@ [Para [Span ("front.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 1 ("",[],[]) [Str "Reflowable",Space,Str "EPUB",Space,Str "3",Space,Str "Conformance",Space,Str "Test",Space,Str "Document:",Space,Str "0100"] ,RawBlock (Format "html") "<section>" @@ -29,11 +28,9 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-xhtml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "XHTML"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "XHTML",Space,Str "content."] ,Para [Span ("content-xhtml-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"iframe-010\" class=\"ctest\">" ,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "iframe-010"],Space,Str "Inline",Space,Str "Frames"] ,Para [Str "Tests",Space,Str "whether",Space,Str "embedding",Space,Str "content",Space,Str "via",Space,Str "an",Space,Code ("",[],[]) "iframe",Space,Str "is",Space,Str "supported."] @@ -41,7 +38,6 @@ ,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "paragraph",Space,Str "reads",Space,Str "\"PASS\",",Space,Str "the",Space,Str "test",Space,Str "passes."] ,RawBlock (Format "html") "</section>" ,Para [Span ("content-images-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 2 ("content-images-001.xhtml#multimedia",[],[]) [Str "Multimedia"] ,RawBlock (Format "html") "<section>" @@ -67,7 +63,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-multimedia-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,RawBlock (Format "html") "<section>" ,Header 3 ("content-multimedia-001.xhtml#audio",[],[]) [Str "Audio"] @@ -92,7 +87,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-multimedia-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 3 ("content-multimedia-002.xhtml#video",[],[]) [Str "Video"] ,RawBlock (Format "html") "<section id=\"video-010\" class=\"otest\">" @@ -146,7 +140,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-multimedia-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 3 ("content-multimedia-003.xhtml#trigger",[],[]) [Code ("",[],[]) "epub:trigger"] ,RawBlock (Format "html") "<section id=\"trigger-010\" class=\"ctest\">" @@ -179,7 +172,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-ns-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 2 ("content-ns-001.xhtml#ns",[],[]) [Str "Namespace",Space,Str "Support"] ,RawBlock (Format "html") "<section id=\"namespace-010\" class=\"ctest\">" @@ -191,7 +183,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-mathml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 2 ("content-mathml-001.xhtml#mathml",[],[]) [Str "MathML"] ,RawBlock (Format "html") "<section id=\"mathml-010\" class=\"ctest\">" @@ -259,7 +250,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-svg-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"svg\">" ,Header 2 ("",[],[]) [Str "SVG"] ,RawBlock (Format "html") "<section id=\"svg-shapes\">" @@ -279,7 +269,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-svg-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"svg-text\">" ,Header 3 ("",[],[]) [Str "Text",Space,Str "content"] ,RawBlock (Format "html") "<section id=\"svg-text-styling\">" @@ -365,7 +354,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-svg-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"svg-xhtml-support\">" ,Header 3 ("",[],[]) [Str "XHTML",Space,Str "Support"] ,RawBlock (Format "html") "<section id=\"svg-410\" class=\"ctest\">" @@ -389,7 +377,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-svg-004.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"svg-image-embed\">" ,Header 3 ("",[],[]) [Str "Image",Space,Str "Embedding"] ,RawBlock (Format "html") "<section id=\"svg-510\" class=\"ctest\">" @@ -400,11 +387,9 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-fallbacks.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,Header 1 ("",[],[]) [Str "Fallbacks"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "Fallbacks."] ,Para [Span ("content-bindings-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"bindings\">" ,Header 3 ("",[],[]) [Str "Bindings"] ,RawBlock (Format "html") "<section id=\"bindings-010\" class=\"otest\">" @@ -419,7 +404,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-fallbacks-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"fallbacks\">" ,Header 2 ("",[],[]) [Str "Fallbacks"] ,RawBlock (Format "html") "<section id=\"fallbacks-manifest\">" @@ -457,7 +441,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("content-switch-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"epub-switch\">" ,Header 3 ("",[],[]) [Code ("",[],[]) "epub:switch"] ,RawBlock (Format "html") "<section id=\"switch-010\" class=\"ctest\">" @@ -475,7 +458,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("svg-doc-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "SVG"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "SVG",Space,Str "content."] ,Para [Str "Note",Space,Str "that",Space,Str "if",Space,Str "no",Space,Str "SVG",Space,Str "tests",Space,Str "appear",Space,Str "after",Space,Str "this",Space,Str "document,",Space,Str "SVG",Space,Str "is",Space,Str "not",Space,Str "supported",Space,Str "in",Space,Str "the",Space,Str "spine",Space,Str "and",Space,Str "all",Space,Str "tests",Space,Str "in",Space,Str "this",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "marked",Space,Code ("",[],[]) "Not Supported",Str "."] diff --git a/tests/formatting.native b/tests/formatting.native index b3204152e..bdf86fa20 100644 --- a/tests/formatting.native +++ b/tests/formatting.native @@ -1,5 +1,4 @@ [Para [Span ("front.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section>" ,Header 1 ("",[],[]) [Str "EPUB",Space,Str "3",Space,Str "Styling",Space,Str "Test",Space,Str "Document:",Space,Str "0101"] ,RawBlock (Format "html") "<section>" @@ -29,13 +28,11 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"utf-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"epub-css\">" ,Header 1 ("",[],[]) [Str "EPUB",Space,Str "Style",Space,Str "Sheets"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "styling",Space,Str "and",Space,Str "layout."] ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"style-110\" class=\"ctest\">" ,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "style-110"],Space,Str "Multi-Column",Space,Str "Layouts"] ,Para [Str "Tests",Space,Str "whether",Space,Str "the",Space,Code ("",[],[]) "CSS Multi-Column Layout",Space,Str "properties",Space,Str "are",Space,Str "supported."] @@ -43,7 +40,6 @@ ,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "text",Space,Str "is",Space,Str "rendered",Space,Str "in",Space,Str "three",Space,Str "columns,",Space,Str "the",Space,Str "test",Space,Str "passes."] ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"style-lists\">" ,Header 2 ("",[],[]) [Str "Lists"] ,RawBlock (Format "html") "<section id=\"style-list-style-type\">" @@ -342,7 +338,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-004.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"style-media-rules\">" ,Header 2 ("",[],[]) [Code ("",[],[]) "@media",Space,Str "Rules"] ,RawBlock (Format "html") "<section id=\"style-210\" class=\"ctest\">" @@ -407,7 +402,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-005.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"style-text-xform\">" ,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "text-transform",Space,Str "property"] ,RawBlock (Format "html") "<section id=\"style-310\" class=\"ctest\">" @@ -430,7 +424,6 @@ ,RawBlock (Format "html") "</section>" ,RawBlock (Format "html") "</section>" ,Para [Span ("styling-xhtml-006.xhtml",[],[]) []] -,Para [RawInline (Format "html") "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>",Space,RawInline (Format "html") "<!DOCTYPE html>"] ,RawBlock (Format "html") "<section id=\"style-ruby\">" ,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "epub-ruby-position",Space,Str "property"] ,RawBlock (Format "html") "<section id=\"style-410\" class=\"ctest\">" |