diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-01-25 10:46:47 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-01-25 10:46:47 -0800 |
commit | 33d1c8cc0154266a2a54e9050408422c7884acdf (patch) | |
tree | e27c58d1c23c846a7fa7fc6e2cd48b03984275cd | |
parent | d90dc6b8b569805d2ffb36b6ad56da064343f13c (diff) | |
parent | b40d33b174d11c5f5b9b3011a3a3b6da42d5be20 (diff) | |
download | pandoc-33d1c8cc0154266a2a54e9050408422c7884acdf.tar.gz |
Merge pull request #1885 from mb21/html-reader-tables
fixes HTML Reader: tables
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 33 | ||||
-rw-r--r-- | tests/html-reader.html | 247 | ||||
-rw-r--r-- | tests/html-reader.native | 129 |
3 files changed, 397 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2a23f2a62..02ff07e73 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -374,12 +374,20 @@ pTable = try $ do caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank -- TODO actually read these and take width information from them widths' <- pColgroup <|> many pCol - head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") - skipMany pBlank - rows <- pOptInTag "tbody" - $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") - skipMany pBlank + let pTh = option [] $ pInTags "tr" (pCell "th") + pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") + pTBody = do pOptInTag "tbody" $ many1 pTr + head'' <- pOptInTag "thead" pTh + head' <- pOptInTag "tbody" $ do + if null head'' + then pTh + else return head'' + rowsLs <- many pTBody + rows' <- pOptInTag "tfoot" $ many pTr TagClose _ <- pSatisfy (~== TagClose "table") + let rows = (concat rowsLs) ++ rows' + -- fail on empty table + guard $ not $ null head' && null rows let isSinglePlain x = case B.toList x of [Plain _] -> True _ -> False @@ -624,14 +632,17 @@ pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) mconcat <$> manyTill parser (pCloses tagtype <|> eof) -pOptInTag :: String -> TagParser a - -> TagParser a -pOptInTag tagtype parser = try $ do - open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True) +-- parses p, preceeded by an optional opening tag +-- and followed by an optional closing tags +pOptInTag :: String -> TagParser a -> TagParser a +pOptInTag tagtype p = try $ do + skipMany pBlank + optional $ pSatisfy (~== TagOpen tagtype []) + skipMany pBlank + x <- p skipMany pBlank - x <- parser + optional $ pSatisfy (~== TagClose tagtype) skipMany pBlank - when open $ pCloses tagtype return x pCloses :: String -> TagParser () diff --git a/tests/html-reader.html b/tests/html-reader.html index e9ba2a68b..749925b2a 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -433,6 +433,7 @@ An e-mail address: nobody [at] nowhere.net<blockquote> <p>text<em> Leading spaces</em></p> <p><em>Trailing spaces </em>text</p> <h1>Tables</h1> +<h2>Tables with Headers</h2> <table> <tr> <th>X</th> @@ -450,5 +451,251 @@ An e-mail address: nobody [at] nowhere.net<blockquote> <td>6</td> </tr> </table> +<hr /> +<table> + <thead> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + </thead> + <tbody> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <thead> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + </thead> + <tbody> + <tr> + <th>1</th> + <td>2</td> + <td>3</td> + </tr> + <tr> + <th>4</th> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <thead> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + </thead> + <tbody> + <tr> + <th>1</th> + <td>2</td> + <td>3</td> + </tr> + </tbody> + <tfoot> + <tr> + <th>4</th> + <td>5</td> + <td>6</td> + </tr> + </tfoot> +</table> +<hr /> +<table> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + <tr> + <th>1</th> + <th>2</th> + <th>3</th> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> +</table> +<hr /> +<table> + <tbody> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <thead> + </thead> + <tbody> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <thead> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + </thead> + <tbody> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + </tbody> + <tbody> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <thead> + <tr> + <th>X</th> + <th>Y</th> + <th>Z</th> + </tr> + </thead> + <tbody> + <tr> + <td>1</td> + <td><p>2</p></td> + <td>3</td> + </tr> + </tbody> + <tbody> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<h2>Tables without Headers</h2> +<table> + <tbody> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> +</table> +<hr /> +<table> + <thead> + </thead> + <tbody> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tbody> +</table> +<hr /> +<table> + <tbody> + <tr> + <td>1</td> + <td>2</td> + <td>3</td> + </tr> + </tbody> + <tfoot> + <tr> + <td>4</td> + <td>5</td> + <td>6</td> + </tr> + </tfoot> +</table> +<h2>Empty Tables</h2> +<p>This section should be empty.</p> +<table> + <tbody> + </tbody> +</table> +<table> +</table> </body> </html> diff --git a/tests/html-reader.native b/tests/html-reader.native index aef6e40fc..b2d660fda 100644 --- a/tests/html-reader.native +++ b/tests/html-reader.native @@ -311,6 +311,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] ,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] ,Header 1 ("",[],[]) [Str "Tables"] +,Header 2 ("",[],[]) [Str "Tables",Space,Str "with",Space,Str "Headers"] ,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [[Plain [Str "X"]] ,[Plain [Str "Y"]] @@ -320,4 +321,130 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl ,[Plain [Str "3"]]] ,[[Plain [Str "4"]] ,[Plain [Str "5"]] - ,[Plain [Str "6"]]]]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.3333333333333333,0.3333333333333333,0.3333333333333333] + [[Plain [Str "X"]] + ,[Plain [Str "Y"]] + ,[Plain [Str "Z"]]] + [[[Plain [Str "1"]] + ,[Para [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,Header 2 ("",[],[]) [Str "Tables",Space,Str "without",Space,Str "Headers"] +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,HorizontalRule +,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [] + [[[Plain [Str "1"]] + ,[Plain [Str "2"]] + ,[Plain [Str "3"]]] + ,[[Plain [Str "4"]] + ,[Plain [Str "5"]] + ,[Plain [Str "6"]]]] +,Header 2 ("",[],[]) [Str "Empty",Space,Str "Tables"] +,Para [Str "This",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "empty."]] |