aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/JATS/Table.hs
blob: 70569bdcd0eed9b9c281538fcfafd6a4512de68c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{- |
   Module      : Text.Pandoc.Writers.JATS.Table
   Copyright   : © 2020-2021 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <tarleb@zeitkraut.de>
   Stability   : alpha
   Portability : portable

Conversion of 'Pandoc' tables to JATS XML.
-}
module Text.Pandoc.Writers.JATS.Table
  ( tableToJATS
  ) where
import Control.Monad.Reader (asks)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import Text.DocLayout (Doc, empty, vcat, ($$))
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Writers.JATS.Types
import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag)
import qualified Data.Text as T
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann

tableToJATS :: PandocMonad m
            => WriterOptions
            -> Ann.Table
            -> JATS m (Doc Text)
tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
  let (Caption _maybeShortCaption captionBlocks) = caption
  -- Only paragraphs are allowed in captions, all other blocks must be
  -- wrapped in @<p>@ elements.
  let needsWrapping = \case
        Plain{} -> False
        Para{}  -> False
        _       -> True
  tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
  captionDoc <- if null captionBlocks
                then return empty
                else do
                  blockToJATS <- asks jatsBlockWriter
                  inTagsIndented "caption" <$>
                    blockToJATS needsWrapping opts captionBlocks
  return $ inTags True "table-wrap" [] $ captionDoc $$ tbl

captionlessTable :: PandocMonad m
                 => WriterOptions
                 -> Attr
                 -> [ColSpec]
                 -> Ann.TableHead
                 -> [Ann.TableBody]
                 -> Ann.TableFoot
                 -> JATS m (Doc Text)
captionlessTable opts attr colspecs thead tbodies tfoot = do
  head' <- tableHeadToJats opts thead
  bodies <- mapM (tableBodyToJats opts) tbodies
  foot' <- tableFootToJats opts tfoot
  let validAttribs = [ "border", "cellpadding", "cellspacing", "content-type"
                     , "frame", "rules", "specific-use", "style", "summary"
                     , "width"
                     ]
  let attribs = toAttribs attr validAttribs
  return $ inTags True "table" attribs $ vcat
    [ colSpecListToJATS colspecs
    , head'
    , foot'
    , vcat bodies
    ]

validTablePartAttribs :: [Text]
validTablePartAttribs =
  [ "align", "char", "charoff", "content-type", "style", "valign" ]

tableBodyToJats :: PandocMonad m
                => WriterOptions
                -> Ann.TableBody
                -> JATS m (Doc Text)
tableBodyToJats opts (Ann.TableBody attr _rowHeadCols inthead rows) = do
  let attribs = toAttribs attr validTablePartAttribs
  intermediateHead <- if null inthead
                      then return mempty
                      else headerRowsToJats opts Thead inthead
  bodyRows <- bodyRowsToJats opts rows
  return $ inTags True "tbody" attribs $ intermediateHead $$ bodyRows

tableHeadToJats :: PandocMonad m
                => WriterOptions
                -> Ann.TableHead
                -> JATS m (Doc Text)
tableHeadToJats opts (Ann.TableHead attr rows) =
  tablePartToJats opts Thead attr rows

tableFootToJats :: PandocMonad m
                => WriterOptions
                -> Ann.TableFoot
                -> JATS m (Doc Text)
tableFootToJats opts (Ann.TableFoot attr rows) =
  tablePartToJats opts Tfoot attr rows

tablePartToJats :: PandocMonad m
                => WriterOptions
                -> TablePart
                -> Attr
                -> [Ann.HeaderRow]
                -> JATS m (Doc Text)
tablePartToJats opts tblpart attr rows =
  if null rows || all isEmptyRow rows
  then return mempty
  else do
    let tag' = case tblpart of
                 Thead -> "thead"
                 Tfoot -> "tfoot"
                 Tbody -> "tbody" -- this would be unexpected
    let attribs = toAttribs attr validTablePartAttribs
    inTags True tag' attribs <$> headerRowsToJats opts tblpart rows
  where
    isEmptyRow (Ann.HeaderRow _attr _rownum cells) = all isEmptyCell cells
    isEmptyCell (Ann.Cell _colspecs _colnum cell) =
      cell == Cell nullAttr AlignDefault (RowSpan 1) (ColSpan 1) []

-- | The part of a table; header, footer, or body.
data TablePart = Thead | Tfoot | Tbody
  deriving (Eq)

data CellType = HeaderCell | BodyCell

data TableRow = TableRow TablePart Attr Ann.RowNumber Ann.RowHead Ann.RowBody

headerRowsToJats :: PandocMonad m
                 => WriterOptions
                 -> TablePart
                 -> [Ann.HeaderRow]
                 -> JATS m (Doc Text)
headerRowsToJats opts tablepart =
  rowListToJats opts . map toTableRow
  where
    toTableRow (Ann.HeaderRow attr rownum rowbody) =
      TableRow tablepart attr rownum [] rowbody

bodyRowsToJats :: PandocMonad m
               => WriterOptions
               -> [Ann.BodyRow]
               -> JATS m (Doc Text)
bodyRowsToJats opts =
  rowListToJats opts . zipWith toTableRow [1..]
  where
    toTableRow rownum (Ann.BodyRow attr _rownum rowhead rowbody) =
      TableRow Tbody attr rownum rowhead rowbody

rowListToJats :: PandocMonad m
              => WriterOptions
              -> [TableRow]
              -> JATS m (Doc Text)
rowListToJats opts = fmap vcat . mapM (tableRowToJats opts)

colSpecListToJATS :: [ColSpec] -> Doc Text
colSpecListToJATS colspecs =
  let hasDefaultWidth (_, ColWidthDefault) = True
      hasDefaultWidth _                    = False

      percent w = tshow (round (100*w) :: Integer) <> "%"

      col :: ColWidth -> Doc Text
      col = selfClosingTag "col" . \case
        ColWidthDefault -> mempty
        ColWidth w -> [("width", percent w)]

   in if all hasDefaultWidth colspecs
      then mempty
      else inTags True "colgroup" [] $ vcat $ map (col . snd) colspecs

tableRowToJats :: PandocMonad m
               => WriterOptions
               -> TableRow
               -> JATS m (Doc Text)
tableRowToJats opts (TableRow tblpart attr _rownum rowhead rowbody) = do
  let validAttribs = [ "align", "char", "charoff", "content-type"
                     , "style", "valign"
                     ]
  let attr' = toAttribs attr validAttribs
  let celltype = case tblpart of
                   Thead -> HeaderCell
                   _     -> BodyCell
  headcells <- mapM (cellToJats opts HeaderCell) rowhead
  bodycells <- mapM (cellToJats opts celltype) rowbody
  return $ inTags True "tr" attr' $ mconcat
    [ vcat headcells
    , vcat bodycells
    ]

alignmentAttrib :: Alignment -> Maybe (Text, Text)
alignmentAttrib = fmap ("align",) . \case
  AlignLeft    -> Just "left"
  AlignRight   -> Just "right"
  AlignCenter  -> Just "center"
  AlignDefault -> Nothing

colspanAttrib :: ColSpan -> Maybe (Text, Text)
colspanAttrib = \case
  ColSpan 1 -> Nothing
  ColSpan n -> Just ("colspan", tshow n)

rowspanAttrib :: RowSpan -> Maybe (Text, Text)
rowspanAttrib = \case
  RowSpan 1 -> Nothing
  RowSpan n -> Just ("rowspan", tshow n)

cellToJats :: PandocMonad m
           => WriterOptions
           -> CellType
           -> Ann.Cell
           -> JATS m (Doc Text)
cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) =
  let align = fst colspec
  in tableCellToJats opts celltype align cell

toAttribs :: Attr -> [Text] -> [(Text, Text)]
toAttribs (ident, _classes, kvs) knownAttribs =
  (if T.null ident then id else (("id", escapeNCName ident) :)) $
  filter ((`elem` knownAttribs) . fst) kvs

tableCellToJats :: PandocMonad m
                => WriterOptions
                -> CellType
                -> Alignment
                -> Cell
                -> JATS m (Doc Text)
tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
  blockToJats   <- asks jatsBlockWriter
  inlinesToJats <- asks jatsInlinesWriter
  let cellContents = \case
        [Plain inlines] -> inlinesToJats opts inlines
        blocks          -> blockToJats needsWrapInCell opts blocks
  let tag' = case ctype of
        BodyCell   -> "td"
        HeaderCell -> "th"
  let align' = case align of
        AlignDefault -> colAlign
        _            -> align
  let maybeCons = maybe id (:)
  let validAttribs = [ "abbr", "align", "axis", "char", "charoff"
                     , "content-type", "headers", "scope", "style", "valign"
                     ]
  let attribs = maybeCons (alignmentAttrib align')
              . maybeCons (rowspanAttrib rowspan)
              . maybeCons (colspanAttrib colspan)
              $ toAttribs attr validAttribs
  inTags False tag' attribs <$> cellContents item

-- | Whether the JATS produced from this block should be wrapped in a
-- @<p>@ element when put directly below a @<td>@ element.
needsWrapInCell :: Block -> Bool
needsWrapInCell = \case
  Plain{}          -> False  -- should be unwrapped anyway
  Para{}           -> False
  BulletList{}     -> False
  OrderedList{}    -> False
  DefinitionList{} -> False
  HorizontalRule   -> False
  CodeBlock{}      -> False
  RawBlock{}       -> False  -- responsibility of the user
  _                -> True