aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML/Table.hs
blob: e6d0a9097c27d69f6022bf319d4992394543574b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Table
   Copyright   : © 2006-2020 John MacFarlane,
                   2020 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
   Stability   : alpha
   Portability : portable

HTML table parser.
-}
module Text.Pandoc.Readers.HTML.Table (pTable') where

import Control.Monad (guard)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks)
import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
  ( (<|>), eof, many, many1, manyTill, option, optional, skipMany, try)
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B

-- | Parses a @<col>@ element, returning the column's width. Defaults to
-- @'ColWidthDefault'@ if the width is not set or cannot be determined.
pCol :: PandocMonad m => TagParser m ColWidth
pCol = try $ do
  TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
  let attribs = toStringAttr attribs'
  skipMany pBlank
  optional $ pSatisfy (matchTagClose "col")
  skipMany pBlank
  let width = case lookup "width" attribs of
                Nothing -> case lookup "style" attribs of
                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
                    fromMaybe 0.0 $ safeRead (T.filter
                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
                  _ -> 0.0
                Just (T.unsnoc -> Just (xs, '%')) ->
                  fromMaybe 0.0 $ safeRead xs
                _ -> 0.0
  if width > 0.0
    then return $ ColWidth $ width / 100.0
    else return ColWidthDefault

pColgroup :: PandocMonad m => TagParser m [ColWidth]
pColgroup = try $ do
  pSatisfy (matchTagOpen "colgroup" [])
  skipMany pBlank
  manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank

-- | Parses a simple HTML table
pTable' :: PandocMonad m
        => TagParser m Blocks           -- ^ Caption parser
        -> (Text -> TagParser m [Cell]) -- ^ Table cell parser
        -> TagParser m Blocks
pTable' block pCell = try $ do
  TagOpen _ attribs' <- pSatisfy (matchTagOpen "table" [])
  let attribs = toAttr attribs'
  skipMany pBlank
  caption <- option mempty $ pInTags "caption" block <* skipMany pBlank
  widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
  let pTh = option [] $ pInTags "tr" (pCell "th")
      pTr = try $ skipMany pBlank
               *> pInTags "tr" (pCell "td" <|> pCell "th")
      pTBody = pInTag TagsOmittable "tbody" $ many1 pTr
  head'' <- pInTag ClosingTagOptional "thead" (option [] pTr)
        <|> pInTag TagsOmittable "thead" pTh
  head'  <- pInTag TagsOmittable "tbody"
               (if null head'' then pTh else return head'')
  topfoot <- option [] $ pInTag TagsRequired "tfoot" $ many pTr
  rowsLs <- many pTBody
  bottomfoot <- option [] $ pInTag ClosingTagOptional "tfoot" $ many pTr
  TagClose _ <- pSatisfy (matchTagClose "table")
  let rows = concat rowsLs <> topfoot <> bottomfoot
      rows''' = map (map cellContents) rows
  -- fail on empty table
  guard $ not $ null head' && null rows'''
  let isSimple = onlySimpleTableCells $
                 map cellContents head' : rows'''
  let cols = if null head'
                then maximum (map length rows''')
                else length head'
  let aligns = case rows of
                 (cs:_) -> take cols $
                           concatMap cellAligns cs ++ repeat AlignDefault
                 _      -> replicate cols AlignDefault
  let widths = if null widths'
                  then if isSimple
                       then replicate cols ColWidthDefault
                       else replicate cols (ColWidth (1.0 / fromIntegral cols))
                  else widths'
  let toRow = Row nullAttr
      toHeaderRow l = [toRow l | not (null l)]
  return $ B.tableWith attribs
                   (B.simpleCaption caption)
                   (zip aligns widths)
                   (TableHead nullAttr $ toHeaderRow head')
                   [TableBody nullAttr 0 [] $ map toRow rows]
                   (TableFoot nullAttr [])

cellContents :: Cell -> [Block]
cellContents (Cell _ _ _ _ bs) = bs

cellAligns :: Cell -> [Alignment]
cellAligns (Cell _ align _ (ColSpan cs) _) = replicate cs align