aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Text/Pandoc/Readers/HTML/Table.hs102
-rw-r--r--test/command/1881.md6
-rw-r--r--test/html-reader.native41
3 files changed, 84 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index e40d90221..5a783988f 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -1,6 +1,6 @@
-{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.HTML.Table
Copyright : © 2006-2020 John MacFarlane,
@@ -60,13 +60,21 @@ pColgroup = try $ do
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
+data CellType
+ = HeaderCell
+ | BodyCell
+ deriving Eq
+
pCell :: PandocMonad m
=> TagParser m Blocks
- -> Text
- -> TagParser m [Cell]
+ -> CellType
+ -> TagParser m (CellType, Cell)
pCell block celltype = try $ do
+ let celltype' = case celltype of
+ HeaderCell -> "th"
+ BodyCell -> "td"
skipMany pBlank
- TagOpen _ attribs <- lookAhead $ pSatisfy (matchTagOpen celltype [])
+ TagOpen _ attribs <- lookAhead $ pSatisfy (matchTagOpen celltype' [])
let cssAttribs = maybe [] cssAttributes $ lookup "style" attribs
let align = case lookup "align" attribs <|>
lookup "text-align" cssAttribs of
@@ -78,7 +86,7 @@ pCell block celltype = try $ do
safeRead =<< lookup "rowspan" attribs
let colspan = ColSpan . fromMaybe 1 $
safeRead =<< lookup "colspan" attribs
- res <- pInTags celltype block
+ res <- pInTags celltype' block
skipMany pBlank
let handledAttribs = ["align", "colspan", "rowspan", "text-align"]
attribs' = foldr go [] attribs
@@ -89,48 +97,55 @@ pCell block celltype = try $ do
-- drop attrib if it's already handled
_ | k `elem` handledAttribs -> acc
_ -> kv : acc
- return [B.cellWith (toAttr attribs') align rowspan colspan res]
+ return (celltype, B.cellWith (toAttr attribs') align rowspan colspan res)
-- | Create a style attribute string from a list of CSS attributes
toStyleString :: [(Text, Text)] -> Text
toStyleString = T.intercalate "; " . map (\(k, v) -> k <> ": " <> v)
-data RowType
- = HeaderCells
- | AllCells
-
--- | Parses a table row
+-- | Parses a normal table row; returns the row together with the number
+-- of header cells at the beginning of the row.
pRow :: PandocMonad m
=> TagParser m Blocks
- -> RowType
- -> TagParser m [B.Row]
-pRow block rowType = try $ do
+ -> TagParser m (RowHeadColumns, B.Row)
+pRow block = try $ do
skipMany pBlank
- case rowType of
- HeaderCells -> do
- maybeCells <- optionMaybe (pInTags "tr" (pCell block "th"))
- return $ case maybeCells of
- Nothing -> []
- Just cells -> [Row nullAttr cells]
- AllCells -> do
- cells <- pInTags "tr" (pCell block "td" <|> pCell block "th")
- return [Row nullAttr cells]
-
--- | Parses a table head
+ TagOpen _ attribs <- pSatisfy (matchTagOpen "tr" []) <* skipMany pBlank
+ cells <- many (pCell block BodyCell <|> pCell block HeaderCell)
+ TagClose _ <- pSatisfy (matchTagClose "tr")
+ return ( RowHeadColumns $ length (takeWhile ((== HeaderCell) . fst) cells)
+ , Row (toAttr attribs) $ map snd cells
+ )
+
+-- | Parses a header row, i.e., a row which containing nothing but
+-- @<th>@ elements.
+pHeaderRow :: PandocMonad m
+ => TagParser m Blocks
+ -> TagParser m B.Row
+pHeaderRow block = try $ do
+ skipMany pBlank
+ let pThs = map snd <$> many (pCell block HeaderCell)
+ let mkRow (attribs, cells) = Row (toAttr attribs) cells
+ mkRow <$> pInTagWithAttribs TagsRequired "tr" pThs
+
+-- | Parses a table head. If there is no @thead@ element, this looks for
+-- a row of @<th>@-only elements as the first line of the table.
pTableHead :: PandocMonad m
=> TagParser m Blocks
-> TagParser m TableHead
pTableHead block = try $ do
skipMany pBlank
- (attribs, rows) <- pInTagWithAttribs ClosingTagOptional "thead"
- (option [] $ pRow block AllCells)
- <|> pInTagWithAttribs TagsOmittable "thead"
- (pRow block HeaderCells)
- let cells = concatMap (\(Row _ cs) -> cs) rows
- if null cells
- then TableHead nullAttr <$>
- pInTag TagsOmittable "tbody" (pRow block HeaderCells)
- else return $ TableHead (toAttr attribs) [Row nullAttr cells]
+ let pRows = many (pRow block)
+ let pThead = pInTagWithAttribs ClosingTagOptional "thead" pRows
+ optionMaybe pThead >>= \case
+ Just (attribs, rows) ->
+ return $ TableHead (toAttr attribs) $ map snd rows
+ Nothing -> mkTableHead <$> optionMaybe (pHeaderRow block)
+ where
+ mkTableHead = TableHead nullAttr . \case
+ -- Use row as header only if it's non-empty
+ Just row@(Row _ (_:_)) -> [row]
+ _ -> []
-- | Parses a table foot
pTableFoot :: PandocMonad m
@@ -139,7 +154,7 @@ pTableFoot :: PandocMonad m
pTableFoot block = try $ do
skipMany pBlank
TagOpen _ attribs <- pSatisfy (matchTagOpen "tfoot" []) <* skipMany pBlank
- rows <- mconcat <$> many (pRow block AllCells <* skipMany pBlank)
+ rows <- many (fmap snd $ pRow block <* skipMany pBlank)
optional $ pSatisfy (matchTagClose "tfoot")
return $ TableFoot (toAttr attribs) rows
@@ -147,12 +162,17 @@ pTableFoot block = try $ do
pTableBody :: PandocMonad m
=> TagParser m Blocks
-> TagParser m TableBody
-pTableBody block = do
+pTableBody block = try $ do
skipMany pBlank
- (attribs, rows) <- pInTagWithAttribs TagsOmittable "tbody"
- (mconcat <$> many1 (pRow block AllCells))
- return $ TableBody (toAttr attribs) 0 [] rows
-
+ attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" [])
+ <* skipMany pBlank
+ bodyheads <- many (pHeaderRow block)
+ (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank)
+ optional $ pSatisfy (matchTagClose "tbody")
+ return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows
+ where
+ getAttribs (TagOpen _ attribs) = attribs
+ getAttribs _ = []
-- | Parses a simple HTML table
pTable :: PandocMonad m
diff --git a/test/command/1881.md b/test/command/1881.md
index fe8cd52a0..f91c50d68 100644
--- a/test/command/1881.md
+++ b/test/command/1881.md
@@ -27,7 +27,7 @@
,(AlignCenter,ColWidthDefault)
,(AlignDefault,ColWidthDefault)]
(TableHead ("",[],[])
- [Row ("",[],[])
+ [Row ("",["header"],[])
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "Right"]]
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
@@ -38,7 +38,7 @@
[Plain [Str "Default"]]]])
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
- [Row ("",[],[])
+ [Row ("",["odd"],[])
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
@@ -72,7 +72,7 @@
[])
[(TableBody ("",[],[]) (RowHeadColumns 0)
[]
- [Row ("",[],[])
+ [Row ("",["odd"],[])
[Cell ("",[],[]) AlignRight (RowSpan 1) (ColSpan 1)
[Plain [Str "12"]]
,Cell ("",[],[]) AlignLeft (RowSpan 1) (ColSpan 1)
diff --git a/test/html-reader.native b/test/html-reader.native
index 1b5e4f813..ea74c25e1 100644
--- a/test/html-reader.native
+++ b/test/html-reader.native
@@ -408,7 +408,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
[Plain [Str "Y"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "Z"]]]])
- [(TableBody ("",[],[]) (RowHeadColumns 0)
+ [(TableBody ("",[],[]) (RowHeadColumns 1)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -440,7 +440,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
[Plain [Str "Y"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "Z"]]]])
- [(TableBody ("",[],[]) (RowHeadColumns 0)
+ [(TableBody ("",[],[]) (RowHeadColumns 1)
[]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -472,15 +472,14 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "Z"]]]])
[(TableBody ("",[],[]) (RowHeadColumns 0)
- []
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "1"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "2"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "3"]]]
- ,Row ("",[],[])
+ [Plain [Str "3"]]]]
+ [Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "4"]]
,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
@@ -496,15 +495,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,(AlignDefault,ColWidthDefault)
,(AlignDefault,ColWidthDefault)]
(TableHead ("",[],[])
- [Row ("",[],[])
- [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "X"]]
- ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "Y"]]
- ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "Z"]]]])
+ [])
[(TableBody ("",[],[]) (RowHeadColumns 0)
- []
+ [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "X"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "Y"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "Z"]]]]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "1"]]
@@ -528,15 +527,15 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,(AlignDefault,ColWidthDefault)
,(AlignDefault,ColWidthDefault)]
(TableHead ("",[],[])
- [Row ("",[],[])
- [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "X"]]
- ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "Y"]]
- ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
- [Plain [Str "Z"]]]])
+ [])
[(TableBody ("",[],[]) (RowHeadColumns 0)
- []
+ [Row ("",[],[])
+ [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "X"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "Y"]]
+ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
+ [Plain [Str "Z"]]]]
[Row ("",[],[])
[Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1)
[Plain [Str "1"]]