diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Writers/Tables.hs | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/Tables.hs b/src/Text/Pandoc/Writers/Tables.hs new file mode 100644 index 000000000..757ac41c6 --- /dev/null +++ b/src/Text/Pandoc/Writers/Tables.hs @@ -0,0 +1,291 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE TupleSections #-} + +{- | + Module : Text.Pandoc.Writers.Tables + Copyright : Copyright 2020 Christian Despres + License : GNU GPL, version 2 or above + + Maintainer : Christian Despres <christian.j.j.despres@gmail.com> + Stability : alpha + Portability : portable + +Definitions and helper functions for an intermediate 'AnnTable' type, +which annotates the existing 'Table' types with additional inferred +information. For use in writers that need to know the details of +columns that cells span, row numbers, and the cells that are in the +row head. +-} + +module Text.Pandoc.Writers.Tables + ( toAnnTable + , fromAnnTable + , AnnTable(..) + , AnnTableHead(..) + , AnnTableBody(..) + , AnnTableFoot(..) + , AnnHeaderRow(..) + , AnnBodyRow(..) + , RowNumber(..) + , AnnRowHead + , AnnRowBody + , AnnCell(..) + , ColNumber(..) + ) +where + +import Control.Monad.RWS.Strict + hiding ( (<>) ) +import Data.Generics ( Data + , Typeable + ) +import Data.List.NonEmpty ( NonEmpty(..) ) +import GHC.Generics ( Generic ) +import Text.Pandoc.Builder + +-- | An annotated table type, corresponding to the 'Table' constructor +-- and the HTML @\<table\>@ element. It records the data of the +-- columns that cells span, the cells in the row head, the row numbers +-- of rows, and the column numbers of cells, in addition to the data +-- in a 'Table'. The type itself does not enforce any guarantees about +-- the consistency of this data. Use 'toAnnTable' to produce an +-- 'AnnTable' from a pandoc 'Table'. +data AnnTable = AnnTable Attr Caption [ColSpec] AnnTableHead [AnnTableBody] AnnTableFoot + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table head, corresponding to 'TableHead' and the +-- HTML @\<thead\>@ element. +data AnnTableHead = AnnTableHead Attr [AnnHeaderRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table body, with an intermediate head and body, +-- corresponding to 'TableBody' and the HTML @\<tbody\>@ element. +data AnnTableBody = AnnTableBody Attr RowHeadColumns [AnnHeaderRow] [AnnBodyRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table foot, corresponding to 'TableFoot' and the +-- HTML @\<tfoot\>@ element. +data AnnTableFoot = AnnTableFoot Attr [AnnHeaderRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated header row, corresponding to 'Row' and the HTML +-- @\<tr\>@ element, and also recording the row number of the row. All +-- the cells in an 'AnnHeaderRow' are header (@\<th\>@) cells. +data AnnHeaderRow = AnnHeaderRow Attr RowNumber [AnnCell] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated body row, corresponding to 'Row' and the HTML +-- @\<tr\>@ element, and also recording its row number and separating +-- the row head cells from the row body cells. +data AnnBodyRow = AnnBodyRow Attr RowNumber AnnRowHead AnnRowBody + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | The row number of a row. Note that rows are numbered continuously +-- from zero from the start of the table, so the first row in a table +-- body, for instance, may have a large 'RowNumber'. +newtype RowNumber = RowNumber Int + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum) + +-- | The head of a body row; the portion of the row lying in the stub +-- of the 'TableBody'. Its cells correspond to HTML @\<th\>@ cells. +type AnnRowHead = [AnnCell] + +-- | The body of a body row; the portion of the row lying after the +-- stub of the 'TableBody'. Its cells correspond to HTML @\<td\>@ +-- cells. +type AnnRowBody = [AnnCell] + +-- | An annotated table cell, wrapping a 'Cell' with its 'ColNumber' +-- and the 'ColSpec' data for the columns that the cell spans. +data AnnCell = AnnCell (NonEmpty ColSpec) ColNumber Cell + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | The column number of a cell, meaning the column number of the +-- first column that the cell spans, if the table were laid on a +-- grid. Columns are numbered starting from zero. +newtype ColNumber = ColNumber Int + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum) + +-- | Convert a 'Table' to an 'AnnTable'. This function also performs +-- the same normalization that the 'table' builder does (fixing +-- overlapping cells, cells that protrude out of their table section, +-- and so on). If the input table happens to satisfy the conditions +-- that 'table' guarantees, then the resulting 'AnnTable' will be +-- identical, save for the addition of the inferred table information. +toAnnTable + :: Attr + -> Caption + -> [ColSpec] + -> TableHead + -> [TableBody] + -> TableFoot + -> AnnTable +toAnnTable attr cap cs th tbs tf = AnnTable attr cap cs th' tbs' tf' + where + (th', tbs', tf') = fst $ evalRWS (annotateTable th tbs tf) (cs, length cs) 0 + +-- | Internal monad for annotating a table, passing in the 'ColSpec' +-- data for the table, the grid width, and the current 'RowNumber' to +-- be referenced or updated. +type AnnM a = RWS ([ColSpec], Int) () RowNumber a + +incRowNumber :: AnnM RowNumber +incRowNumber = do + rn <- get + put $ rn + 1 + return rn + +annotateTable + :: TableHead + -> [TableBody] + -> TableFoot + -> AnnM (AnnTableHead, [AnnTableBody], AnnTableFoot) +annotateTable th tbs tf = do + th' <- annotateTableHead th + tbs' <- traverse annotateTableBody tbs + tf' <- annotateTableFoot tf + return (th', tbs', tf') + +annotateTableHead :: TableHead -> AnnM AnnTableHead +annotateTableHead (TableHead attr rows) = + AnnTableHead attr <$> annotateHeaderSection rows + +annotateTableBody :: TableBody -> AnnM AnnTableBody +annotateTableBody (TableBody attr rhc th tb) = do + twidth <- asks snd + let rhc' = max 0 $ min (RowHeadColumns twidth) rhc + th' <- annotateHeaderSection th + tb' <- annotateBodySection rhc' tb + return $ AnnTableBody attr rhc' th' tb' + +annotateTableFoot :: TableFoot -> AnnM AnnTableFoot +annotateTableFoot (TableFoot attr rows) = + AnnTableFoot attr <$> annotateHeaderSection rows + +annotateHeaderSection :: [Row] -> AnnM [AnnHeaderRow] +annotateHeaderSection rows = do + colspec <- asks fst + let hangcolspec = (1, ) <$> colspec + annotateHeaderSection' hangcolspec id $ clipRows rows + where + annotateHeaderSection' oldHang acc (Row attr cells : rs) = do + let (_, newHang, cells', _) = + annotateRowSection 0 oldHang $ cells <> repeat emptyCell + n <- incRowNumber + let annRow = AnnHeaderRow attr n cells' + annotateHeaderSection' newHang (acc . (annRow :)) rs + annotateHeaderSection' _ acc [] = return $ acc [] + +annotateBodySection :: RowHeadColumns -> [Row] -> AnnM [AnnBodyRow] +annotateBodySection (RowHeadColumns rhc) rows = do + colspec <- asks fst + let colspec' = (1, ) <$> colspec + let (stubspec, bodyspec) = splitAt rhc colspec' + normalizeBodySection' stubspec bodyspec id $ clipRows rows + where + normalizeBodySection' headHang bodyHang acc (Row attr cells : rs) = do + let (colnum, headHang', rowStub, cells') = + annotateRowSection 0 headHang $ cells <> repeat emptyCell + let (_, bodyHang', rowBody, _) = annotateRowSection colnum bodyHang cells' + n <- incRowNumber + let annRow = AnnBodyRow attr n rowStub rowBody + normalizeBodySection' headHang' bodyHang' (acc . (annRow :)) rs + normalizeBodySection' _ _ acc [] = return $ acc [] + +-- | Lay out a section of a 'Table' row on a grid row, annotating the +-- cells with the 'ColSpec' data for the columns that they +-- span. Performs the same normalization as 'placeRowSection'. +annotateRowSection + :: ColNumber -- ^ The current column number + -> [(RowSpan, ColSpec)] -- ^ The overhang of the previous grid row, + -- with column data + -> [Cell] -- ^ The cells to annotate + -> (ColNumber, [(RowSpan, ColSpec)], [AnnCell], [Cell]) -- ^ The new + -- column + -- number, + -- overhang, + -- annotated + -- cells, + -- and + -- remaining + -- cells +annotateRowSection !colnum oldHang cells + -- If the grid has overhang at our position, try to re-lay in + -- the next position. + | (o, colspec) : os <- oldHang + , o > 1 + = let (colnum', newHang, newCell, cells') = + annotateRowSection (colnum + 1) os cells + in (colnum', (o - 1, colspec) : newHang, newCell, cells') + -- Otherwise if there is any available width, place the cell and + -- continue. + | c : cells' <- cells + , (h, w) <- getDim c + , w' <- max 1 w + , (w'', cellHang@(chStart : chRest), oldHang') <- splitCellHang h w' oldHang + = let c' = setW w'' c + annCell = AnnCell (snd <$> chStart :| chRest) colnum c' + colnum' = colnum + ColNumber (getColSpan w'') + (colnum'', newHang, newCells, remainCells) = + annotateRowSection colnum' oldHang' cells' + in (colnum'', cellHang <> newHang, annCell : newCells, remainCells) + -- Otherwise there is no room in the section + | otherwise + = (colnum, [], [], cells) + where + getColSpan (ColSpan x) = x + getDim (Cell _ _ h w _) = (h, w) + setW w (Cell a b h _ c) = Cell a b h w c + +-- | In @'splitCellHang' rs cs coldata@, with @rs@ the height of a +-- cell that lies at the beginning of @coldata@, and @cs@ its width +-- (which is not assumed to fit in the available space), return the +-- actual width of the cell (what will fit in the available space), +-- the data for the columns that the cell spans (including updating +-- the overhang to equal @rs@), and the remaining column data. +splitCellHang + :: RowSpan + -> ColSpan + -> [(RowSpan, ColSpec)] + -> (ColSpan, [(RowSpan, ColSpec)], [(RowSpan, ColSpec)]) +splitCellHang h n = go 0 + where + go acc ((1, spec) : ls) | acc < n = + let (acc', hang, ls') = go (acc + 1) ls in (acc', (h, spec) : hang, ls') + go acc l = (acc, [], l) + +-- | Convert an 'AnnTable' to a 'Table'. This is the inverse of +-- 'toAnnTable' on well-formed tables (i.e. tables satisfying the +-- guarantees of 'table'). +fromAnnTable + :: AnnTable -> (Attr, Caption, [ColSpec], TableHead, [TableBody], TableFoot) +fromAnnTable (AnnTable attr cap cs th tbs tf) = (attr, cap, cs, th', tbs', tf') + where + th' = fromAnnTableHead th + tbs' = map fromAnnTableBody tbs + tf' = fromAnnTableFoot tf + +fromAnnTableHead :: AnnTableHead -> TableHead +fromAnnTableHead (AnnTableHead attr rows) = + TableHead attr $ fromAnnHeaderRow <$> rows + +fromAnnTableBody :: AnnTableBody -> TableBody +fromAnnTableBody (AnnTableBody attr rhc th tb) = + TableBody attr rhc (fromAnnHeaderRow <$> th) (fromAnnBodyRow <$> tb) + +fromAnnTableFoot :: AnnTableFoot -> TableFoot +fromAnnTableFoot (AnnTableFoot attr rows) = + TableFoot attr $ fromAnnHeaderRow <$> rows + +fromAnnHeaderRow :: AnnHeaderRow -> Row +fromAnnHeaderRow (AnnHeaderRow attr _ cells) = Row attr $ fromAnnCell <$> cells + +fromAnnBodyRow :: AnnBodyRow -> Row +fromAnnBodyRow (AnnBodyRow attr _ rh rb) = + Row attr ((fromAnnCell <$> rh) <> (fromAnnCell <$> rb)) + +fromAnnCell :: AnnCell -> Cell +fromAnnCell (AnnCell _ _ c) = c |