diff options
author | Christian Despres <50160106+despresc@users.noreply.github.com> | 2020-09-05 17:36:51 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-05 14:36:51 -0700 |
commit | 10c6c411f9bd582ce9bafe159728f322a54395cd (patch) | |
tree | 710e87c33e06824eeb171a5ddc5c9eb174c9f43e /src | |
parent | 3935c9c5c4b070d43b2d7d946fd28dfdaf87e080 (diff) | |
download | pandoc-10c6c411f9bd582ce9bafe159728f322a54395cd.tar.gz |
Add Writers.Tables helper functions and types, add tests for those (#6655)
Add Writers.Tables helper functions and types, add tests for those
The Writers.Tables module contains an AnnTable type that is a pandoc
Table with added inferred information that should be enough for
writers (in particular the HTML writer) to operate on without having
to lay out the table themselves.
The toAnnTable and fromAnnTable functions in that module convert
between AnnTable and Table. In addition to producing an AnnTable with
coherent and well-formed annotations, the toAnnTable function also
normalizes its input Table like the table builder does.
Various tests ensure that toAnnTable normalizes tables exactly like
the table builder, and that its annotations are coherent.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Writers/Tables.hs | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Writers/Tables.hs b/src/Text/Pandoc/Writers/Tables.hs new file mode 100644 index 000000000..757ac41c6 --- /dev/null +++ b/src/Text/Pandoc/Writers/Tables.hs @@ -0,0 +1,291 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE TupleSections #-} + +{- | + Module : Text.Pandoc.Writers.Tables + Copyright : Copyright 2020 Christian Despres + License : GNU GPL, version 2 or above + + Maintainer : Christian Despres <christian.j.j.despres@gmail.com> + Stability : alpha + Portability : portable + +Definitions and helper functions for an intermediate 'AnnTable' type, +which annotates the existing 'Table' types with additional inferred +information. For use in writers that need to know the details of +columns that cells span, row numbers, and the cells that are in the +row head. +-} + +module Text.Pandoc.Writers.Tables + ( toAnnTable + , fromAnnTable + , AnnTable(..) + , AnnTableHead(..) + , AnnTableBody(..) + , AnnTableFoot(..) + , AnnHeaderRow(..) + , AnnBodyRow(..) + , RowNumber(..) + , AnnRowHead + , AnnRowBody + , AnnCell(..) + , ColNumber(..) + ) +where + +import Control.Monad.RWS.Strict + hiding ( (<>) ) +import Data.Generics ( Data + , Typeable + ) +import Data.List.NonEmpty ( NonEmpty(..) ) +import GHC.Generics ( Generic ) +import Text.Pandoc.Builder + +-- | An annotated table type, corresponding to the 'Table' constructor +-- and the HTML @\<table\>@ element. It records the data of the +-- columns that cells span, the cells in the row head, the row numbers +-- of rows, and the column numbers of cells, in addition to the data +-- in a 'Table'. The type itself does not enforce any guarantees about +-- the consistency of this data. Use 'toAnnTable' to produce an +-- 'AnnTable' from a pandoc 'Table'. +data AnnTable = AnnTable Attr Caption [ColSpec] AnnTableHead [AnnTableBody] AnnTableFoot + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table head, corresponding to 'TableHead' and the +-- HTML @\<thead\>@ element. +data AnnTableHead = AnnTableHead Attr [AnnHeaderRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table body, with an intermediate head and body, +-- corresponding to 'TableBody' and the HTML @\<tbody\>@ element. +data AnnTableBody = AnnTableBody Attr RowHeadColumns [AnnHeaderRow] [AnnBodyRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated table foot, corresponding to 'TableFoot' and the +-- HTML @\<tfoot\>@ element. +data AnnTableFoot = AnnTableFoot Attr [AnnHeaderRow] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated header row, corresponding to 'Row' and the HTML +-- @\<tr\>@ element, and also recording the row number of the row. All +-- the cells in an 'AnnHeaderRow' are header (@\<th\>@) cells. +data AnnHeaderRow = AnnHeaderRow Attr RowNumber [AnnCell] + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | An annotated body row, corresponding to 'Row' and the HTML +-- @\<tr\>@ element, and also recording its row number and separating +-- the row head cells from the row body cells. +data AnnBodyRow = AnnBodyRow Attr RowNumber AnnRowHead AnnRowBody + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | The row number of a row. Note that rows are numbered continuously +-- from zero from the start of the table, so the first row in a table +-- body, for instance, may have a large 'RowNumber'. +newtype RowNumber = RowNumber Int + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum) + +-- | The head of a body row; the portion of the row lying in the stub +-- of the 'TableBody'. Its cells correspond to HTML @\<th\>@ cells. +type AnnRowHead = [AnnCell] + +-- | The body of a body row; the portion of the row lying after the +-- stub of the 'TableBody'. Its cells correspond to HTML @\<td\>@ +-- cells. +type AnnRowBody = [AnnCell] + +-- | An annotated table cell, wrapping a 'Cell' with its 'ColNumber' +-- and the 'ColSpec' data for the columns that the cell spans. +data AnnCell = AnnCell (NonEmpty ColSpec) ColNumber Cell + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic) + +-- | The column number of a cell, meaning the column number of the +-- first column that the cell spans, if the table were laid on a +-- grid. Columns are numbered starting from zero. +newtype ColNumber = ColNumber Int + deriving (Eq, Ord, Read, Show, Typeable, Data, Generic, Num, Enum) + +-- | Convert a 'Table' to an 'AnnTable'. This function also performs +-- the same normalization that the 'table' builder does (fixing +-- overlapping cells, cells that protrude out of their table section, +-- and so on). If the input table happens to satisfy the conditions +-- that 'table' guarantees, then the resulting 'AnnTable' will be +-- identical, save for the addition of the inferred table information. +toAnnTable + :: Attr + -> Caption + -> [ColSpec] + -> TableHead + -> [TableBody] + -> TableFoot + -> AnnTable +toAnnTable attr cap cs th tbs tf = AnnTable attr cap cs th' tbs' tf' + where + (th', tbs', tf') = fst $ evalRWS (annotateTable th tbs tf) (cs, length cs) 0 + +-- | Internal monad for annotating a table, passing in the 'ColSpec' +-- data for the table, the grid width, and the current 'RowNumber' to +-- be referenced or updated. +type AnnM a = RWS ([ColSpec], Int) () RowNumber a + +incRowNumber :: AnnM RowNumber +incRowNumber = do + rn <- get + put $ rn + 1 + return rn + +annotateTable + :: TableHead + -> [TableBody] + -> TableFoot + -> AnnM (AnnTableHead, [AnnTableBody], AnnTableFoot) +annotateTable th tbs tf = do + th' <- annotateTableHead th + tbs' <- traverse annotateTableBody tbs + tf' <- annotateTableFoot tf + return (th', tbs', tf') + +annotateTableHead :: TableHead -> AnnM AnnTableHead +annotateTableHead (TableHead attr rows) = + AnnTableHead attr <$> annotateHeaderSection rows + +annotateTableBody :: TableBody -> AnnM AnnTableBody +annotateTableBody (TableBody attr rhc th tb) = do + twidth <- asks snd + let rhc' = max 0 $ min (RowHeadColumns twidth) rhc + th' <- annotateHeaderSection th + tb' <- annotateBodySection rhc' tb + return $ AnnTableBody attr rhc' th' tb' + +annotateTableFoot :: TableFoot -> AnnM AnnTableFoot +annotateTableFoot (TableFoot attr rows) = + AnnTableFoot attr <$> annotateHeaderSection rows + +annotateHeaderSection :: [Row] -> AnnM [AnnHeaderRow] +annotateHeaderSection rows = do + colspec <- asks fst + let hangcolspec = (1, ) <$> colspec + annotateHeaderSection' hangcolspec id $ clipRows rows + where + annotateHeaderSection' oldHang acc (Row attr cells : rs) = do + let (_, newHang, cells', _) = + annotateRowSection 0 oldHang $ cells <> repeat emptyCell + n <- incRowNumber + let annRow = AnnHeaderRow attr n cells' + annotateHeaderSection' newHang (acc . (annRow :)) rs + annotateHeaderSection' _ acc [] = return $ acc [] + +annotateBodySection :: RowHeadColumns -> [Row] -> AnnM [AnnBodyRow] +annotateBodySection (RowHeadColumns rhc) rows = do + colspec <- asks fst + let colspec' = (1, ) <$> colspec + let (stubspec, bodyspec) = splitAt rhc colspec' + normalizeBodySection' stubspec bodyspec id $ clipRows rows + where + normalizeBodySection' headHang bodyHang acc (Row attr cells : rs) = do + let (colnum, headHang', rowStub, cells') = + annotateRowSection 0 headHang $ cells <> repeat emptyCell + let (_, bodyHang', rowBody, _) = annotateRowSection colnum bodyHang cells' + n <- incRowNumber + let annRow = AnnBodyRow attr n rowStub rowBody + normalizeBodySection' headHang' bodyHang' (acc . (annRow :)) rs + normalizeBodySection' _ _ acc [] = return $ acc [] + +-- | Lay out a section of a 'Table' row on a grid row, annotating the +-- cells with the 'ColSpec' data for the columns that they +-- span. Performs the same normalization as 'placeRowSection'. +annotateRowSection + :: ColNumber -- ^ The current column number + -> [(RowSpan, ColSpec)] -- ^ The overhang of the previous grid row, + -- with column data + -> [Cell] -- ^ The cells to annotate + -> (ColNumber, [(RowSpan, ColSpec)], [AnnCell], [Cell]) -- ^ The new + -- column + -- number, + -- overhang, + -- annotated + -- cells, + -- and + -- remaining + -- cells +annotateRowSection !colnum oldHang cells + -- If the grid has overhang at our position, try to re-lay in + -- the next position. + | (o, colspec) : os <- oldHang + , o > 1 + = let (colnum', newHang, newCell, cells') = + annotateRowSection (colnum + 1) os cells + in (colnum', (o - 1, colspec) : newHang, newCell, cells') + -- Otherwise if there is any available width, place the cell and + -- continue. + | c : cells' <- cells + , (h, w) <- getDim c + , w' <- max 1 w + , (w'', cellHang@(chStart : chRest), oldHang') <- splitCellHang h w' oldHang + = let c' = setW w'' c + annCell = AnnCell (snd <$> chStart :| chRest) colnum c' + colnum' = colnum + ColNumber (getColSpan w'') + (colnum'', newHang, newCells, remainCells) = + annotateRowSection colnum' oldHang' cells' + in (colnum'', cellHang <> newHang, annCell : newCells, remainCells) + -- Otherwise there is no room in the section + | otherwise + = (colnum, [], [], cells) + where + getColSpan (ColSpan x) = x + getDim (Cell _ _ h w _) = (h, w) + setW w (Cell a b h _ c) = Cell a b h w c + +-- | In @'splitCellHang' rs cs coldata@, with @rs@ the height of a +-- cell that lies at the beginning of @coldata@, and @cs@ its width +-- (which is not assumed to fit in the available space), return the +-- actual width of the cell (what will fit in the available space), +-- the data for the columns that the cell spans (including updating +-- the overhang to equal @rs@), and the remaining column data. +splitCellHang + :: RowSpan + -> ColSpan + -> [(RowSpan, ColSpec)] + -> (ColSpan, [(RowSpan, ColSpec)], [(RowSpan, ColSpec)]) +splitCellHang h n = go 0 + where + go acc ((1, spec) : ls) | acc < n = + let (acc', hang, ls') = go (acc + 1) ls in (acc', (h, spec) : hang, ls') + go acc l = (acc, [], l) + +-- | Convert an 'AnnTable' to a 'Table'. This is the inverse of +-- 'toAnnTable' on well-formed tables (i.e. tables satisfying the +-- guarantees of 'table'). +fromAnnTable + :: AnnTable -> (Attr, Caption, [ColSpec], TableHead, [TableBody], TableFoot) +fromAnnTable (AnnTable attr cap cs th tbs tf) = (attr, cap, cs, th', tbs', tf') + where + th' = fromAnnTableHead th + tbs' = map fromAnnTableBody tbs + tf' = fromAnnTableFoot tf + +fromAnnTableHead :: AnnTableHead -> TableHead +fromAnnTableHead (AnnTableHead attr rows) = + TableHead attr $ fromAnnHeaderRow <$> rows + +fromAnnTableBody :: AnnTableBody -> TableBody +fromAnnTableBody (AnnTableBody attr rhc th tb) = + TableBody attr rhc (fromAnnHeaderRow <$> th) (fromAnnBodyRow <$> tb) + +fromAnnTableFoot :: AnnTableFoot -> TableFoot +fromAnnTableFoot (AnnTableFoot attr rows) = + TableFoot attr $ fromAnnHeaderRow <$> rows + +fromAnnHeaderRow :: AnnHeaderRow -> Row +fromAnnHeaderRow (AnnHeaderRow attr _ cells) = Row attr $ fromAnnCell <$> cells + +fromAnnBodyRow :: AnnBodyRow -> Row +fromAnnBodyRow (AnnBodyRow attr _ rh rb) = + Row attr ((fromAnnCell <$> rh) <> (fromAnnCell <$> rb)) + +fromAnnCell :: AnnCell -> Cell +fromAnnCell (AnnCell _ _ c) = c |