diff options
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 21 | ||||
-rw-r--r-- | tests/html-reader.html | 34 | ||||
-rw-r--r-- | tests/html-reader.native | 37 |
3 files changed, 14 insertions, 78 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 82ea560a8..2fd6d88bf 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -75,7 +75,7 @@ parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest) t ~== TagOpen "body" []) tags parseBody :: TagParser [Block] -parseBody = liftM concat $ manyTill block eof +parseBody = liftM (fixPlains False . concat) $ manyTill block eof block :: TagParser [Block] block = choice @@ -107,7 +107,7 @@ pBulletList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") - return [BulletList $ map fixPlains items] + return [BulletList $ map (fixPlains True) items] pOrderedList :: TagParser [Block] pOrderedList = try $ do @@ -138,7 +138,7 @@ pOrderedList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") - return [OrderedList (start, style, DefaultDelim) $ map fixPlains items] + return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items] pDefinitionList :: TagParser [Block] pDefinitionList = try $ do @@ -154,16 +154,19 @@ pDefListItem = try $ do defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem let term = intercalate [LineBreak] terms - return (term, map fixPlains defs) + return (term, map (fixPlains True) defs) -fixPlains :: [Block] -> [Block] -fixPlains bs = if any isParaish bs - then map plainToPara bs - else bs +fixPlains :: Bool -> [Block] -> [Block] +fixPlains inList bs = if any isParaish bs + then map plainToPara bs + else bs where isParaish (Para _) = True isParaish (CodeBlock _ _) = True isParaish (Header _ _) = True isParaish (BlockQuote _) = True + isParaish (BulletList _) = not inList + isParaish (OrderedList _ _) = not inList + isParaish (DefinitionList _) = not inList isParaish _ = False plainToPara (Plain xs) = Para xs plainToPara x = x @@ -231,7 +234,7 @@ pCell celltype = try $ do pBlockQuote :: TagParser [Block] pBlockQuote = do contents <- pInTags "blockquote" block - return [BlockQuote contents] + return [BlockQuote $ fixPlains False contents] pPlain :: TagParser [Block] pPlain = do diff --git a/tests/html-reader.html b/tests/html-reader.html index a51ee3307..ea10a306c 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -298,40 +298,6 @@ These should not be escaped: \$ \\ \> \[ \{ <dd>Low-voiced stringed instrument.</dd> </dl> <hr /> -<h1>HTML Blocks</h1> -<p>Simple block on one line:</p> -foo<p>And nested without indentation:</p> -foobar<p>Interpreted markdown in a table:</p> -This is <em>emphasized</em>. And this is <strong>strong</strong><p>Here's a simple block:</p> -foo<p>This should be a code block, though:</p> -<pre><code><div> - foo -</div> -</code></pre> -<p>As should this:</p> -<pre><code><div>foo</div> -</code></pre> -<p>Now, nested:</p> -foo<p>This should just be an HTML comment:</p> -<p>Multiline:</p> -<p>Code block:</p> -<pre><code><!-- Comment --> -</code></pre> -<p>Just plain comment, with trailing spaces on the line:</p> -<p>Code:</p> -<pre><code><hr /> -</code></pre> -<p>Hr's:</p> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> -<hr /> <h1>Inline Markup</h1> <p>This is <em>emphasized</em>, and so <em>is this</em>.</p> <p>This is <strong>strong</strong>, and so <strong>is this</strong>.</p> diff --git a/tests/html-reader.native b/tests/html-reader.native index c7ba26568..a9070adc6 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -183,39 +183,6 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA ,([Str "Cello",LineBreak,Str "Violoncello"], [[Plain [Str "Low",Str "-",Str "voiced",Space,Str "stringed",Space,Str "instrument",Str "."]]])] ,HorizontalRule -,Header 1 [Str "HTML",Space,Str "Blocks"] -,Para [Str "Simple",Space,Str "block",Space,Str "on",Space,Str "one",Space,Str "line:"] -,Plain [Str "foo"] -,Para [Str "And",Space,Str "nested",Space,Str "without",Space,Str "indentation:"] -,Plain [Str "foobar"] -,Para [Str "Interpreted",Space,Str "markdown",Space,Str "in",Space,Str "a",Space,Str "table:"] -,Plain [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ".",Space,Str "And",Space,Str "this",Space,Str "is",Space,Strong [Str "strong"]] -,Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "simple",Space,Str "block:"] -,Plain [Str "foo"] -,Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "code",Space,Str "block,",Space,Str "though:"] -,CodeBlock ("",[],[]) "<div>\n foo\n</div>" -,Para [Str "As",Space,Str "should",Space,Str "this:"] -,CodeBlock ("",[],[]) "<div>foo</div>" -,Para [Str "Now,",Space,Str "nested:"] -,Plain [Str "foo"] -,Para [Str "This",Space,Str "should",Space,Str "just",Space,Str "be",Space,Str "an",Space,Str "HTML",Space,Str "comment:"] -,Para [Str "Multiline:"] -,Para [Str "Code",Space,Str "block:"] -,CodeBlock ("",[],[]) "<!-- Comment -->" -,Para [Str "Just",Space,Str "plain",Space,Str "comment,",Space,Str "with",Space,Str "trailing",Space,Str "spaces",Space,Str "on",Space,Str "the",Space,Str "line:"] -,Para [Str "Code:"] -,CodeBlock ("",[],[]) "<hr />" -,Para [Str "Hr",Str "'",Str "s:"] -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule -,HorizontalRule ,Header 1 [Str "Inline",Space,Str "Markup"] ,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] ,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] @@ -293,7 +260,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title preceded by a tab"),Str "."] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with \"quotes\" in it")] ,Para [Link [Str "URL",Space,Str "and",Space,Str "title"] ("/url/","title with single quotes")] -,Plain [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net)"] +,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net)"] ,Para [Link [Str "Empty"] ("",""),Str "."] ,Header 2 [Str "Reference"] ,Para [Str "Foo",Space,Link [Str "bar"] ("/url/",""),Str "."] @@ -319,7 +286,7 @@ Pandoc (Meta {docTitle = [Str "Pandoc",Space,Str "Test",Space,Str "Suite"], docA [[Plain [Str "In",Space,Str "a",Space,Str "list?"]] ,[Plain [Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]] ,[Plain [Str "It",Space,Str "should",Str "."]]] -,Plain [Str "An",Space,Str "e",Str "-",Str "mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net"] +,Para [Str "An",Space,Str "e",Str "-",Str "mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere",Str ".",Str "net"] ,BlockQuote [Para [Str "Blockquoted:",Space,Link [Str "http://example",Str ".",Str "com/"] ("http://example.com/","")]] ,Para [Str "Auto",Str "-",Str "links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"] |