aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs21
-rw-r--r--tests/html-reader.html34
-rw-r--r--tests/html-reader.native37
3 files changed, 14 insertions, 78 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 82ea560a8..2fd6d88bf 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -75,7 +75,7 @@ parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest)
t ~== TagOpen "body" []) tags
parseBody :: TagParser [Block]
-parseBody = liftM concat $ manyTill block eof
+parseBody = liftM (fixPlains False . concat) $ manyTill block eof
block :: TagParser [Block]
block = choice
@@ -107,7 +107,7 @@ pBulletList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
- return [BulletList $ map fixPlains items]
+ return [BulletList $ map (fixPlains True) items]
pOrderedList :: TagParser [Block]
pOrderedList = try $ do
@@ -138,7 +138,7 @@ pOrderedList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
- return [OrderedList (start, style, DefaultDelim) $ map fixPlains items]
+ return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items]
pDefinitionList :: TagParser [Block]
pDefinitionList = try $ do
@@ -154,16 +154,19 @@ pDefListItem = try $ do
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
let term = intercalate [LineBreak] terms
- return (term, map fixPlains defs)
+ return (term, map (fixPlains True) defs)
-fixPlains :: [Block] -> [Block]
-fixPlains bs = if any isParaish bs
- then map plainToPara bs
- else bs
+fixPlains :: Bool -> [Block] -> [Block]
+fixPlains inList bs = if any isParaish bs
+ then map plainToPara bs
+ else bs
where isParaish (Para _) = True
isParaish (CodeBlock _ _) = True
isParaish (Header _ _) = True
isParaish (BlockQuote _) = True
+ isParaish (BulletList _) = not inList
+ isParaish (OrderedList _ _) = not inList
+ isParaish (DefinitionList _) = not inList
isParaish _ = False
plainToPara (Plain xs) = Para xs
plainToPara x = x
@@ -231,7 +234,7 @@ pCell celltype = try $ do
pBlockQuote :: TagParser [Block]
pBlockQuote = do
contents <- pInTags "blockquote" block
- return [BlockQuote contents]
+ return [BlockQuote $ fixPlains False contents]
pPlain :: TagParser [Block]
pPlain = do
diff --git a/tests/html-reader.html b/tests/html-reader.html
index a51ee3307..ea10a306c 100644
--- a/tests/html-reader.html
+++ b/tests/html-reader.html
@@ -298,40 +298,6 @@ These should not be escaped: \$ \\ \> \[ \{
<dd>Low-voiced stringed instrument.</dd>
</dl>
<hr />
-<h1>HTML Blocks</h1>
-<p>Simple block on one line:</p>
-foo<p>And nested without indentation:</p>
-foobar<p>Interpreted markdown in a table:</p>
-This is <em>emphasized</em>. And this is <strong>strong</strong><p>Here's a simple block:</p>
-foo<p>This should be a code block, though:</p>
-<pre><code>&lt;div>
- foo
-&lt;/div>
-</code></pre>
-<p>As should this:</p>
-<pre><code>&lt;div>foo&lt;/div>
-</code></pre>
-<p>Now, nested:</p>
-foo<p>This should just be an HTML comment:</p>
-<p>Multiline:</p>
-<p>Code block:</p>
-<pre><code>&lt;!-- Comment -->
-</code></pre>
-<p>Just plain comment, with trailing spaces on the line:</p>
-<p>Code:</p>
-<pre><code>&lt;hr />
-</code></pre>
-<p>Hr's:</p>
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
-<hr />
<h1>Inline Markup</h1>
<p>This is <em>emphasized</em>, and so <em>is this</em>.</p>
<p>This is <strong>strong</strong>, and so <strong>is this</strong>.</p>
diff --git a/tests/html-reader.native b/tests/html-reader.native
index c7ba26568..a9070adc6 100644
--- a/tests/html-reader.native
+++ b/tests/html-reader.native
@@ -183,39 +183,6 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA
,([Str "Cello",LineBreak,Str "Violoncello"],
[[Plain [Str "Low",Str "-",Str "voiced",Space,Str "stringed",Space,Str "instrument",Str "."]]])]
,HorizontalRule
-,Header 1 [Str "HTML",Space,Str "Blocks"]
-,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"]
-,Plain [Str "foo"]
-,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"]
-,Plain [Str "foobar"]
-,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"]
-,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]]
-,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "simple",Space,Str "block:"]
-,Plain [Str "foo"]
-,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"]
-,CodeBlock ("",[],[]) "<div>\n foo\n</div>"
-,Para [Str "As",Space,Str "should",Space,Str "this:"]
-,CodeBlock ("",[],[]) "<div>foo</div>"
-,Para [Str "Now,",Space,Str "nested:"]
-,Plain [Str "foo"]
-,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"]
-,Para [Str "Multiline:"]
-,Para [Str "Code",Space,Str "block:"]
-,CodeBlock ("",[],[]) "<!-- Comment -->"
-,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"]
-,Para [Str "Code:"]
-,CodeBlock ("",[],[]) "<hr />"
-,Para [Str "Hr",Str "'",Str "s:"]
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
-,HorizontalRule
,Header 1 [Str "Inline",Space,Str "Markup"]
,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."]
,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."]
@@ -293,7 +260,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA
,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title preceded by a tab"),Str "."]
,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with \"quotes\" in it")]
,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with single quotes")]
-,Plain [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net)"]
+,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net)"]
,Para [Link [Str "Empty"] ("",""),Str "."]
,Header 2 [Str "Reference"]
,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."]
@@ -319,7 +286,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA
[[Plain [Str "In",Space,Str "a",Space,Str "list?"]]
,[Plain [Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]]
,[Plain [Str "It",Space,Str "should",Str "."]]]
-,Plain [Str "An",Space,Str "e",Str "-",Str "mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net"]
+,Para [Str "An",Space,Str "e",Str "-",Str "mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net"]
,BlockQuote
[Para [Str "Blockquoted:",Space,Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]]
,Para [Str "Auto",Str "-",Str "links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"]