From 94466c006005892dc177a8a0518ccf3c55b4e51b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 7 Aug 2014 22:12:44 -0700 Subject: HTML reader: Really ignore DOCTYPE and xml declarations. This actually does what d71b013841f3c9c8c595591e312a31df16a728cb said it did. Revised epub tests to remove the repeated DOCTYPE and xml tags. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- tests/features.native | 18 ------------------ tests/formatting.native | 7 ------- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index a8df1394c..42ef11065 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -289,7 +289,7 @@ fixPlains inList bs = if any isParaish bs' pRawTag :: TagParser String pRawTag = do tag <- pAnyTag - let ignorable x = x `elem` ["html","head","body","DOCTYPE","?xml"] + let ignorable x = x `elem` ["html","head","body","!DOCTYPE","?xml"] if tagOpen ignorable (const True) tag || tagClose ignorable tag then return [] else return $ renderTags' [tag] @@ -728,7 +728,7 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", -} blockHtmlTags :: [String] -blockHtmlTags = ["address", "article", "aside", +blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside", "blockquote", "body", "button", "canvas", "caption", "center", "col", "colgroup", "dd", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", diff --git a/tests/features.native b/tests/features.native index b84d6781f..0a790e305 100644 --- a/tests/features.native +++ b/tests/features.native @@ -1,5 +1,4 @@ [Para [Span ("front.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 1 ("",[],[]) [Str "Reflowable",Space,Str "EPUB",Space,Str "3",Space,Str "Conformance",Space,Str "Test",Space,Str "Document:",Space,Str "0100"] ,RawBlock (Format "html") "
" @@ -29,11 +28,9 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-xhtml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "XHTML"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "XHTML",Space,Str "content."] ,Para [Span ("content-xhtml-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "iframe-010"],Space,Str "Inline",Space,Str "Frames"] ,Para [Str "Tests",Space,Str "whether",Space,Str "embedding",Space,Str "content",Space,Str "via",Space,Str "an",Space,Code ("",[],[]) "iframe",Space,Str "is",Space,Str "supported."] @@ -41,7 +38,6 @@ ,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "paragraph",Space,Str "reads",Space,Str "\"PASS\",",Space,Str "the",Space,Str "test",Space,Str "passes."] ,RawBlock (Format "html") "
" ,Para [Span ("content-images-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("content-images-001.xhtml#multimedia",[],[]) [Str "Multimedia"] ,RawBlock (Format "html") "
" @@ -67,7 +63,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-multimedia-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Header 3 ("content-multimedia-001.xhtml#audio",[],[]) [Str "Audio"] @@ -92,7 +87,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-multimedia-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("content-multimedia-002.xhtml#video",[],[]) [Str "Video"] ,RawBlock (Format "html") "
" @@ -146,7 +140,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-multimedia-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("content-multimedia-003.xhtml#trigger",[],[]) [Code ("",[],[]) "epub:trigger"] ,RawBlock (Format "html") "
" @@ -179,7 +172,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-ns-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("content-ns-001.xhtml#ns",[],[]) [Str "Namespace",Space,Str "Support"] ,RawBlock (Format "html") "
" @@ -191,7 +183,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-mathml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("content-mathml-001.xhtml#mathml",[],[]) [Str "MathML"] ,RawBlock (Format "html") "
" @@ -259,7 +250,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-svg-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "SVG"] ,RawBlock (Format "html") "
" @@ -279,7 +269,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-svg-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("",[],[]) [Str "Text",Space,Str "content"] ,RawBlock (Format "html") "
" @@ -365,7 +354,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-svg-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("",[],[]) [Str "XHTML",Space,Str "Support"] ,RawBlock (Format "html") "
" @@ -389,7 +377,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-svg-004.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("",[],[]) [Str "Image",Space,Str "Embedding"] ,RawBlock (Format "html") "
" @@ -400,11 +387,9 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-fallbacks.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,Header 1 ("",[],[]) [Str "Fallbacks"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "Fallbacks."] ,Para [Span ("content-bindings-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("",[],[]) [Str "Bindings"] ,RawBlock (Format "html") "
" @@ -419,7 +404,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-fallbacks-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "Fallbacks"] ,RawBlock (Format "html") "
" @@ -457,7 +441,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("content-switch-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 3 ("",[],[]) [Code ("",[],[]) "epub:switch"] ,RawBlock (Format "html") "
" @@ -475,7 +458,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("svg-doc-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,Header 1 ("",[],[]) [Str "Content",Space,Str "Documents:",Space,Str "SVG"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "static",Space,Str "SVG",Space,Str "content."] ,Para [Str "Note",Space,Str "that",Space,Str "if",Space,Str "no",Space,Str "SVG",Space,Str "tests",Space,Str "appear",Space,Str "after",Space,Str "this",Space,Str "document,",Space,Str "SVG",Space,Str "is",Space,Str "not",Space,Str "supported",Space,Str "in",Space,Str "the",Space,Str "spine",Space,Str "and",Space,Str "all",Space,Str "tests",Space,Str "in",Space,Str "this",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "marked",Space,Code ("",[],[]) "Not Supported",Str "."] diff --git a/tests/formatting.native b/tests/formatting.native index b3204152e..bdf86fa20 100644 --- a/tests/formatting.native +++ b/tests/formatting.native @@ -1,5 +1,4 @@ [Para [Span ("front.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 1 ("",[],[]) [Str "EPUB",Space,Str "3",Space,Str "Styling",Space,Str "Test",Space,Str "Document:",Space,Str "0101"] ,RawBlock (Format "html") "
" @@ -29,13 +28,11 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-001.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 1 ("",[],[]) [Str "EPUB",Space,Str "Style",Space,Str "Sheets"] ,Para [Str "This",Space,Str "section",Space,Str "contains",Space,Str "tests",Space,Str "for",Space,Str "styling",Space,Str "and",Space,Str "layout."] ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-003.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Span ("",["nature"],[]) [Str "[REQUIRED]"],Space,Span ("",["test-id"],[]) [Str "style-110"],Space,Str "Multi-Column",Space,Str "Layouts"] ,Para [Str "Tests",Space,Str "whether",Space,Str "the",Space,Code ("",[],[]) "CSS Multi-Column Layout",Space,Str "properties",Space,Str "are",Space,Str "supported."] @@ -43,7 +40,6 @@ ,Para [Str "If",Space,Str "the",Space,Str "preceding",Space,Str "text",Space,Str "is",Space,Str "rendered",Space,Str "in",Space,Str "three",Space,Str "columns,",Space,Str "the",Space,Str "test",Space,Str "passes."] ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-002.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "Lists"] ,RawBlock (Format "html") "
" @@ -342,7 +338,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-004.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Code ("",[],[]) "@media",Space,Str "Rules"] ,RawBlock (Format "html") "
" @@ -407,7 +402,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-005.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "text-transform",Space,Str "property"] ,RawBlock (Format "html") "
" @@ -430,7 +424,6 @@ ,RawBlock (Format "html") "
" ,RawBlock (Format "html") "
" ,Para [Span ("styling-xhtml-006.xhtml",[],[]) []] -,Para [RawInline (Format "html") "",Space,RawInline (Format "html") ""] ,RawBlock (Format "html") "
" ,Header 2 ("",[],[]) [Str "The",Space,Code ("",[],[]) "epub-ruby-position",Space,Str "property"] ,RawBlock (Format "html") "
" -- cgit v1.2.3