1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.HTML.Table
Copyright : © 2006-2020 John MacFarlane,
2020 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <albert@zeitkraut.de>
Stability : alpha
Portability : portable
HTML table parser.
-}
module Text.Pandoc.Readers.HTML.Table (pTable') where
import Control.Monad (guard)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks)
import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
( (<|>), eof, many, many1, manyTill, option, optional, skipMany, try)
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
-- | Parses a @<col>@ element, returning the column's width. Defaults to
-- @'ColWidthDefault'@ if the width is not set or cannot be determined.
pCol :: PandocMonad m => TagParser m ColWidth
pCol = try $ do
TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
let attribs = toStringAttr attribs'
skipMany pBlank
optional $ pSatisfy (matchTagClose "col")
skipMany pBlank
let width = case lookup "width" attribs of
Nothing -> case lookup "style" attribs of
Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
fromMaybe 0.0 $ safeRead (T.filter
(`notElem` (" \t\r\n%'\";" :: [Char])) xs)
_ -> 0.0
Just (T.unsnoc -> Just (xs, '%')) ->
fromMaybe 0.0 $ safeRead xs
_ -> 0.0
if width > 0.0
then return $ ColWidth $ width / 100.0
else return ColWidthDefault
pColgroup :: PandocMonad m => TagParser m [ColWidth]
pColgroup = try $ do
pSatisfy (matchTagOpen "colgroup" [])
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
-- | Parses a simple HTML table
pTable' :: PandocMonad m
=> TagParser m Blocks -- ^ Caption parser
-> (Text -> TagParser m [Cell]) -- ^ Table cell parser
-> TagParser m Blocks
pTable' block pCell = try $ do
TagOpen _ attribs' <- pSatisfy (matchTagOpen "table" [])
let attribs = toAttr attribs'
skipMany pBlank
caption <- option mempty $ pInTags "caption" block <* skipMany pBlank
widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
let pTh = option [] $ pInTags "tr" (pCell "th")
pTr = try $ skipMany pBlank >>
pInTags "tr" (pCell "td" <|> pCell "th")
pTBody = pInTag True "tbody" $ many1 pTr
head'' <- pInTag False "thead" (option [] pTr) <|> pInTag True "thead" pTh
head' <- pInTag True "tbody"
(if null head'' then pTh else return head'')
topfoot <- option [] $ pInTag False "tfoot" $ many pTr
rowsLs <- many pTBody
bottomfoot <- option [] $ pInTag False "tfoot" $ many pTr
TagClose _ <- pSatisfy (matchTagClose "table")
let rows = concat rowsLs <> topfoot <> bottomfoot
rows''' = map (map cellContents) rows
-- fail on empty table
guard $ not $ null head' && null rows'''
let isSimple = onlySimpleTableCells $
map cellContents head' : rows'''
let cols = if null head'
then maximum (map length rows''')
else length head'
let aligns = case rows of
(cs:_) -> take cols $
concatMap cellAligns cs ++ repeat AlignDefault
_ -> replicate cols AlignDefault
let widths = if null widths'
then if isSimple
then replicate cols ColWidthDefault
else replicate cols (ColWidth (1.0 / fromIntegral cols))
else widths'
let toRow = Row nullAttr
toHeaderRow l = [toRow l | not (null l)]
return $ B.tableWith attribs
(B.simpleCaption caption)
(zip aligns widths)
(TableHead nullAttr $ toHeaderRow head')
[TableBody nullAttr 0 [] $ map toRow rows]
(TableFoot nullAttr [])
cellContents :: Cell -> [Block]
cellContents (Cell _ _ _ _ bs) = bs
cellAligns :: Cell -> [Alignment]
cellAligns (Cell _ align _ (ColSpan cs) _) = replicate cs align
|