aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Tables.hs
blob: 757ac41c6925dad6c962e28deec37623cd07d37d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TupleSections #-}

{- |
   Module      : Text.Pandoc.Writers.Tables
   Copyright   : Copyright 2020 Christian Despres
   License     : GNU GPL, version 2 or above

   Maintainer  : Christian Despres <christian.j.j.despres@gmail.com>
   Stability   : alpha
   Portability : portable

Definitions and helper functions for an intermediate 'AnnTable' type,
which annotates the existing 'Table' types with additional inferred
information. For use in writers that need to know the details of
columns that cells span, row numbers, and the cells that are in the
row head.
-}

module Text.Pandoc.Writers.Tables
  ( toAnnTable
  , fromAnnTable
  , AnnTable(..)
  , AnnTableHead(..)
  , AnnTableBody(..)
  , AnnTableFoot(..)
  , AnnHeaderRow(..)
  , AnnBodyRow(..)
  , RowNumber(..)
  , AnnRowHead
  , AnnRowBody
  , AnnCell(..)
  , ColNumber(..)
  )
where

import           Control.Monad.RWS.Strict
                                         hiding ( (<>) )
import           Data.Generics                  ( Data
                                                , Typeable
                                                )
import           Data.List.NonEmpty             ( NonEmpty(..) )
import           GHC.Generics                   ( Generic )
import           Text.Pandoc.Builder

-- | An annotated table type, corresponding to the 'Table' constructor
-- and the HTML @\<table\>@ element. It records the data of the
-- columns that cells span, the cells in the row head, the row numbers
-- of rows, and the column numbers of cells, in addition to the data
-- in a 'Table'. The type itself does not enforce any guarantees about
-- the consistency of this data. Use 'toAnnTable' to produce an
-- 'AnnTable' from a pandoc 'Table'.
data AnnTable = AnnTable Attr Caption [ColSpec] AnnTableHead [AnnTableBody] AnnTableFoot
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | An annotated table head, corresponding to 'TableHead' and the
-- HTML @\<thead\>@ element.
data AnnTableHead = AnnTableHead Attr [AnnHeaderRow]
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | An annotated table body, with an intermediate head and body,
-- corresponding to 'TableBody' and the HTML @\<tbody\>@ element.
data AnnTableBody = AnnTableBody Attr RowHeadColumns [AnnHeaderRow] [AnnBodyRow]
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | An annotated table foot, corresponding to 'TableFoot' and the
-- HTML @\<tfoot\>@ element.
data AnnTableFoot = AnnTableFoot Attr [AnnHeaderRow]
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | An annotated header row, corresponding to 'Row' and the HTML
-- @\<tr\>@ element, and also recording the row number of the row. All
-- the cells in an 'AnnHeaderRow' are header (@\<th\>@) cells.
data AnnHeaderRow = AnnHeaderRow Attr RowNumber [AnnCell]
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | An annotated body row, corresponding to 'Row' and the HTML
-- @\<tr\>@ element, and also recording its row number and separating
-- the row head cells from the row body cells.
data AnnBodyRow = AnnBodyRow Attr RowNumber AnnRowHead AnnRowBody
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | The row number of a row. Note that rows are numbered continuously
-- from zero from the start of the table, so the first row in a table
-- body, for instance, may have a large 'RowNumber'.
newtype RowNumber = RowNumber Int
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum)

-- | The head of a body row; the portion of the row lying in the stub
-- of the 'TableBody'. Its cells correspond to HTML @\<th\>@ cells.
type AnnRowHead = [AnnCell]

-- | The body of a body row; the portion of the row lying after the
-- stub of the 'TableBody'. Its cells correspond to HTML @\<td\>@
-- cells.
type AnnRowBody = [AnnCell]

-- | An annotated table cell, wrapping a 'Cell' with its 'ColNumber'
-- and the 'ColSpec' data for the columns that the cell spans.
data AnnCell = AnnCell (NonEmpty ColSpec) ColNumber Cell
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic)

-- | The column number of a cell, meaning the column number of the
-- first column that the cell spans, if the table were laid on a
-- grid. Columns are numbered starting from zero.
newtype ColNumber = ColNumber Int
  deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum)

-- | Convert a 'Table' to an 'AnnTable'. This function also performs
-- the same normalization that the 'table' builder does (fixing
-- overlapping cells, cells that protrude out of their table section,
-- and so on). If the input table happens to satisfy the conditions
-- that 'table' guarantees, then the resulting 'AnnTable' will be
-- identical, save for the addition of the inferred table information.
toAnnTable
  :: Attr
  -> Caption
  -> [ColSpec]
  -> TableHead
  -> [TableBody]
  -> TableFoot
  -> AnnTable
toAnnTable attr cap cs th tbs tf = AnnTable attr cap cs th' tbs' tf'
 where
  (th', tbs', tf') = fst $ evalRWS (annotateTable th tbs tf) (cs, length cs) 0

-- | Internal monad for annotating a table, passing in the 'ColSpec'
-- data for the table, the grid width, and the current 'RowNumber' to
-- be referenced or updated.
type AnnM a = RWS ([ColSpec], Int) () RowNumber a

incRowNumber :: AnnM RowNumber
incRowNumber = do
  rn <- get
  put $ rn + 1
  return rn

annotateTable
  :: TableHead
  -> [TableBody]
  -> TableFoot
  -> AnnM (AnnTableHead, [AnnTableBody], AnnTableFoot)
annotateTable th tbs tf = do
  th'  <- annotateTableHead th
  tbs' <- traverse annotateTableBody tbs
  tf'  <- annotateTableFoot tf
  return (th', tbs', tf')

annotateTableHead :: TableHead -> AnnM AnnTableHead
annotateTableHead (TableHead attr rows) =
  AnnTableHead attr <$> annotateHeaderSection rows

annotateTableBody :: TableBody -> AnnM AnnTableBody
annotateTableBody (TableBody attr rhc th tb) = do
  twidth <- asks snd
  let rhc' = max 0 $ min (RowHeadColumns twidth) rhc
  th' <- annotateHeaderSection th
  tb' <- annotateBodySection rhc' tb
  return $ AnnTableBody attr rhc' th' tb'

annotateTableFoot :: TableFoot -> AnnM AnnTableFoot
annotateTableFoot (TableFoot attr rows) =
  AnnTableFoot attr <$> annotateHeaderSection rows

annotateHeaderSection :: [Row] -> AnnM [AnnHeaderRow]
annotateHeaderSection rows = do
  colspec <- asks fst
  let hangcolspec = (1, ) <$> colspec
  annotateHeaderSection' hangcolspec id $ clipRows rows
 where
  annotateHeaderSection' oldHang acc (Row attr cells : rs) = do
    let (_, newHang, cells', _) =
          annotateRowSection 0 oldHang $ cells <> repeat emptyCell
    n <- incRowNumber
    let annRow = AnnHeaderRow attr n cells'
    annotateHeaderSection' newHang (acc . (annRow :)) rs
  annotateHeaderSection' _ acc [] = return $ acc []

annotateBodySection :: RowHeadColumns -> [Row] -> AnnM [AnnBodyRow]
annotateBodySection (RowHeadColumns rhc) rows = do
  colspec <- asks fst
  let colspec'             = (1, ) <$> colspec
  let (stubspec, bodyspec) = splitAt rhc colspec'
  normalizeBodySection' stubspec bodyspec id $ clipRows rows
 where
  normalizeBodySection' headHang bodyHang acc (Row attr cells : rs) = do
    let (colnum, headHang', rowStub, cells') =
          annotateRowSection 0 headHang $ cells <> repeat emptyCell
    let (_, bodyHang', rowBody, _) = annotateRowSection colnum bodyHang cells'
    n <- incRowNumber
    let annRow = AnnBodyRow attr n rowStub rowBody
    normalizeBodySection' headHang' bodyHang' (acc . (annRow :)) rs
  normalizeBodySection' _ _ acc [] = return $ acc []

-- | Lay out a section of a 'Table' row on a grid row, annotating the
-- cells with the 'ColSpec' data for the columns that they
-- span. Performs the same normalization as 'placeRowSection'.
annotateRowSection
  :: ColNumber -- ^ The current column number
  -> [(RowSpan, ColSpec)] -- ^ The overhang of the previous grid row,
                          -- with column data
  -> [Cell] -- ^ The cells to annotate
  -> (ColNumber, [(RowSpan, ColSpec)], [AnnCell], [Cell]) -- ^ The new
                                                          -- column
                                                          -- number,
                                                          -- overhang,
                                                          -- annotated
                                                          -- cells,
                                                          -- and
                                                          -- remaining
                                                          -- cells
annotateRowSection !colnum oldHang cells
  -- If the grid has overhang at our position, try to re-lay in
  -- the next position.
  | (o, colspec) : os <- oldHang
  , o > 1
  = let (colnum', newHang, newCell, cells') =
            annotateRowSection (colnum + 1) os cells
    in  (colnum', (o - 1, colspec) : newHang, newCell, cells')
  -- Otherwise if there is any available width, place the cell and
  -- continue.
  | c : cells' <- cells
  , (h, w) <- getDim c
  , w' <- max 1 w
  , (w'', cellHang@(chStart : chRest), oldHang') <- splitCellHang h w' oldHang
  = let c'      = setW w'' c
        annCell = AnnCell (snd <$> chStart :| chRest) colnum c'
        colnum' = colnum + ColNumber (getColSpan w'')
        (colnum'', newHang, newCells, remainCells) =
            annotateRowSection colnum' oldHang' cells'
    in  (colnum'', cellHang <> newHang, annCell : newCells, remainCells)
  -- Otherwise there is no room in the section
  | otherwise
  = (colnum, [], [], cells)
 where
  getColSpan (ColSpan x) = x
  getDim (Cell _ _ h w _) = (h, w)
  setW w (Cell a b h _ c) = Cell a b h w c

-- | In @'splitCellHang' rs cs coldata@, with @rs@ the height of a
-- cell that lies at the beginning of @coldata@, and @cs@ its width
-- (which is not assumed to fit in the available space), return the
-- actual width of the cell (what will fit in the available space),
-- the data for the columns that the cell spans (including updating
-- the overhang to equal @rs@), and the remaining column data.
splitCellHang
  :: RowSpan
  -> ColSpan
  -> [(RowSpan, ColSpec)]
  -> (ColSpan, [(RowSpan, ColSpec)], [(RowSpan, ColSpec)])
splitCellHang h n = go 0
 where
  go acc ((1, spec) : ls) | acc < n =
    let (acc', hang, ls') = go (acc + 1) ls in (acc', (h, spec) : hang, ls')
  go acc l = (acc, [], l)

-- | Convert an 'AnnTable' to a 'Table'. This is the inverse of
-- 'toAnnTable' on well-formed tables (i.e. tables satisfying the
-- guarantees of 'table').
fromAnnTable
  :: AnnTable -> (Attr, Caption, [ColSpec], TableHead, [TableBody], TableFoot)
fromAnnTable (AnnTable attr cap cs th tbs tf) = (attr, cap, cs, th', tbs', tf')
 where
  th'  = fromAnnTableHead th
  tbs' = map fromAnnTableBody tbs
  tf'  = fromAnnTableFoot tf

fromAnnTableHead :: AnnTableHead -> TableHead
fromAnnTableHead (AnnTableHead attr rows) =
  TableHead attr $ fromAnnHeaderRow <$> rows

fromAnnTableBody :: AnnTableBody -> TableBody
fromAnnTableBody (AnnTableBody attr rhc th tb) =
  TableBody attr rhc (fromAnnHeaderRow <$> th) (fromAnnBodyRow <$> tb)

fromAnnTableFoot :: AnnTableFoot -> TableFoot
fromAnnTableFoot (AnnTableFoot attr rows) =
  TableFoot attr $ fromAnnHeaderRow <$> rows

fromAnnHeaderRow :: AnnHeaderRow -> Row
fromAnnHeaderRow (AnnHeaderRow attr _ cells) = Row attr $ fromAnnCell <$> cells

fromAnnBodyRow :: AnnBodyRow -> Row
fromAnnBodyRow (AnnBodyRow attr _ rh rb) =
  Row attr ((fromAnnCell <$> rh) <> (fromAnnCell <$> rb))

fromAnnCell :: AnnCell -> Cell
fromAnnCell (AnnCell _ _ c) = c