aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-01-25 10:46:47 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2015-01-25 10:46:47 -0800
commit33d1c8cc0154266a2a54e9050408422c7884acdf (patch)
treee27c58d1c23c846a7fa7fc6e2cd48b03984275cd
parentd90dc6b8b569805d2ffb36b6ad56da064343f13c (diff)
parentb40d33b174d11c5f5b9b3011a3a3b6da42d5be20 (diff)
downloadpandoc-33d1c8cc0154266a2a54e9050408422c7884acdf.tar.gz
Merge pull request #1885 from mb21/html-reader-tables
fixes HTML Reader: tables
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs33
-rw-r--r--tests/html-reader.html247
-rw-r--r--tests/html-reader.native129
3 files changed, 397 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 2a23f2a62..02ff07e73 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -374,12 +374,20 @@ pTable = try $ do
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
- head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
- skipMany pBlank
- rows <- pOptInTag "tbody"
- $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
- skipMany pBlank
+ let pTh = option [] $ pInTags "tr" (pCell "th")
+ pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = do pOptInTag "tbody" $ many1 pTr
+ head'' <- pOptInTag "thead" pTh
+ head' <- pOptInTag "tbody" $ do
+ if null head''
+ then pTh
+ else return head''
+ rowsLs <- many pTBody
+ rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
+ let rows = (concat rowsLs) ++ rows'
+ -- fail on empty table
+ guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
[Plain _] -> True
_ -> False
@@ -624,14 +632,17 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-pOptInTag :: String -> TagParser a
- -> TagParser a
-pOptInTag tagtype parser = try $ do
- open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
+-- parses p, preceeded by an optional opening tag
+-- and followed by an optional closing tags
+pOptInTag :: String -> TagParser a -> TagParser a
+pOptInTag tagtype p = try $ do
+ skipMany pBlank
+ optional $ pSatisfy (~== TagOpen tagtype [])
+ skipMany pBlank
+ x <- p
skipMany pBlank
- x <- parser
+ optional $ pSatisfy (~== TagClose tagtype)
skipMany pBlank
- when open $ pCloses tagtype
return x
pCloses :: String -> TagParser ()
diff --git a/tests/html-reader.html b/tests/html-reader.html
index e9ba2a68b..749925b2a 100644
--- a/tests/html-reader.html
+++ b/tests/html-reader.html
@@ -433,6 +433,7 @@ An e-mail address: nobody [at] nowhere.net<blockquote>
<p>text<em> Leading spaces</em></p>
<p><em>Trailing spaces </em>text</p>
<h1>Tables</h1>
+<h2>Tables with Headers</h2>
<table>
<tr>
<th>X</th>
@@ -450,5 +451,251 @@ An e-mail address: nobody [at] nowhere.net<blockquote>
<td>6</td>
</tr>
</table>
+<hr />
+<table>
+ <thead>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <thead>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <th>1</th>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <th>4</th>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <thead>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <th>1</th>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ </tbody>
+ <tfoot>
+ <tr>
+ <th>4</th>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tfoot>
+</table>
+<hr />
+<table>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ <tr>
+ <th>1</th>
+ <th>2</th>
+ <th>3</th>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+</table>
+<hr />
+<table>
+ <tbody>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <thead>
+ </thead>
+ <tbody>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <thead>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <thead>
+ <tr>
+ <th>X</th>
+ <th>Y</th>
+ <th>Z</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td><p>2</p></td>
+ <td>3</td>
+ </tr>
+ </tbody>
+ <tbody>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<h2>Tables without Headers</h2>
+<table>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+</table>
+<hr />
+<table>
+ <thead>
+ </thead>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tbody>
+</table>
+<hr />
+<table>
+ <tbody>
+ <tr>
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ </tr>
+ </tbody>
+ <tfoot>
+ <tr>
+ <td>4</td>
+ <td>5</td>
+ <td>6</td>
+ </tr>
+ </tfoot>
+</table>
+<h2>Empty Tables</h2>
+<p>This section should be empty.</p>
+<table>
+ <tbody>
+ </tbody>
+</table>
+<table>
+</table>
</body>
</html>
diff --git a/tests/html-reader.native b/tests/html-reader.native
index aef6e40fc..b2d660fda 100644
--- a/tests/html-reader.native
+++ b/tests/html-reader.native
@@ -311,6 +311,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]]
,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"]
,Header 1 ("",[],[]) [Str "Tables"]
+,Header 2 ("",[],[]) [Str "Tables",Space,Str "with",Space,Str "Headers"]
,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "X"]]
,[Plain [Str "Y"]]
@@ -320,4 +321,130 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,[Plain [Str "3"]]]
,[[Plain [Str "4"]]
,[Plain [Str "5"]]
- ,[Plain [Str "6"]]]]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.3333333333333333,0.3333333333333333,0.3333333333333333]
+ [[Plain [Str "X"]]
+ ,[Plain [Str "Y"]]
+ ,[Plain [Str "Z"]]]
+ [[[Plain [Str "1"]]
+ ,[Para [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,Header 2 ("",[],[]) [Str "Tables",Space,Str "without",Space,Str "Headers"]
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ []
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ []
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ []
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,HorizontalRule
+,Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ []
+ [[[Plain [Str "1"]]
+ ,[Plain [Str "2"]]
+ ,[Plain [Str "3"]]]
+ ,[[Plain [Str "4"]]
+ ,[Plain [Str "5"]]
+ ,[Plain [Str "6"]]]]
+,Header 2 ("",[],[]) [Str "Empty",Space,Str "Tables"]
+,Para [Str "This",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "empty."]]