1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
|
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{- |
Module : Text.Pandoc.Writers.JATS.Table
Copyright : © 2020-2021 Albert Krewinkel
License : GNU GPL, version 2 or above
Maintainer : Albert Krewinkel <tarleb@zeitkraut.de>
Stability : alpha
Portability : portable
Conversion of 'Pandoc' tables to JATS XML.
-}
module Text.Pandoc.Writers.JATS.Table
( tableToJATS
) where
import Control.Monad.Reader (asks)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import Text.DocLayout (Doc, empty, vcat, ($$))
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Writers.JATS.Types
import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag)
import qualified Data.Text as T
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
tableToJATS :: PandocMonad m
=> WriterOptions
-> Ann.Table
-> JATS m (Doc Text)
tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do
let (Caption _maybeShortCaption captionBlocks) = caption
-- Only paragraphs are allowed in captions, all other blocks must be
-- wrapped in @<p>@ elements.
let needsWrapping = \case
Plain{} -> False
Para{} -> False
_ -> True
tbl <- captionlessTable opts attr colspecs thead tbodies tfoot
captionDoc <- if null captionBlocks
then return empty
else do
blockToJATS <- asks jatsBlockWriter
inTagsIndented "caption" <$>
blockToJATS needsWrapping opts captionBlocks
return $ inTags True "table-wrap" [] $ captionDoc $$ tbl
captionlessTable :: PandocMonad m
=> WriterOptions
-> Attr
-> [ColSpec]
-> Ann.TableHead
-> [Ann.TableBody]
-> Ann.TableFoot
-> JATS m (Doc Text)
captionlessTable opts attr colspecs thead tbodies tfoot = do
head' <- tableHeadToJats opts thead
bodies <- mapM (tableBodyToJats opts) tbodies
foot' <- tableFootToJats opts tfoot
let validAttribs = [ "border", "cellpadding", "cellspacing", "content-type"
, "frame", "rules", "specific-use", "style", "summary"
, "width"
]
let attribs = toAttribs attr validAttribs
return $ inTags True "table" attribs $ vcat
[ colSpecListToJATS colspecs
, head'
, foot'
, vcat bodies
]
validTablePartAttribs :: [Text]
validTablePartAttribs =
[ "align", "char", "charoff", "content-type", "style", "valign" ]
tableBodyToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableBody
-> JATS m (Doc Text)
tableBodyToJats opts (Ann.TableBody attr _rowHeadCols inthead rows) = do
let attribs = toAttribs attr validTablePartAttribs
intermediateHead <- if null inthead
then return mempty
else headerRowsToJats opts Thead inthead
bodyRows <- bodyRowsToJats opts rows
return $ inTags True "tbody" attribs $ intermediateHead $$ bodyRows
tableHeadToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableHead
-> JATS m (Doc Text)
tableHeadToJats opts (Ann.TableHead attr rows) =
tablePartToJats opts Thead attr rows
tableFootToJats :: PandocMonad m
=> WriterOptions
-> Ann.TableFoot
-> JATS m (Doc Text)
tableFootToJats opts (Ann.TableFoot attr rows) =
tablePartToJats opts Tfoot attr rows
tablePartToJats :: PandocMonad m
=> WriterOptions
-> TablePart
-> Attr
-> [Ann.HeaderRow]
-> JATS m (Doc Text)
tablePartToJats opts tblpart attr rows =
if null rows || all isEmptyRow rows
then return mempty
else do
let tag' = case tblpart of
Thead -> "thead"
Tfoot -> "tfoot"
Tbody -> "tbody" -- this would be unexpected
let attribs = toAttribs attr validTablePartAttribs
inTags True tag' attribs <$> headerRowsToJats opts tblpart rows
where
isEmptyRow (Ann.HeaderRow _attr _rownum cells) = all isEmptyCell cells
isEmptyCell (Ann.Cell _colspecs _colnum cell) =
cell == Cell nullAttr AlignDefault (RowSpan 1) (ColSpan 1) []
-- | The part of a table; header, footer, or body.
data TablePart = Thead | Tfoot | Tbody
deriving (Eq)
data CellType = HeaderCell | BodyCell
data TableRow = TableRow TablePart Attr Ann.RowNumber Ann.RowHead Ann.RowBody
headerRowsToJats :: PandocMonad m
=> WriterOptions
-> TablePart
-> [Ann.HeaderRow]
-> JATS m (Doc Text)
headerRowsToJats opts tablepart =
rowListToJats opts . map toTableRow
where
toTableRow (Ann.HeaderRow attr rownum rowbody) =
TableRow tablepart attr rownum [] rowbody
bodyRowsToJats :: PandocMonad m
=> WriterOptions
-> [Ann.BodyRow]
-> JATS m (Doc Text)
bodyRowsToJats opts =
rowListToJats opts . zipWith toTableRow [1..]
where
toTableRow rownum (Ann.BodyRow attr _rownum rowhead rowbody) =
TableRow Tbody attr rownum rowhead rowbody
rowListToJats :: PandocMonad m
=> WriterOptions
-> [TableRow]
-> JATS m (Doc Text)
rowListToJats opts = fmap vcat . mapM (tableRowToJats opts)
colSpecListToJATS :: [ColSpec] -> Doc Text
colSpecListToJATS colspecs =
let hasDefaultWidth (_, ColWidthDefault) = True
hasDefaultWidth _ = False
percent w = tshow (round (100*w) :: Integer) <> "%"
col :: ColWidth -> Doc Text
col = selfClosingTag "col" . \case
ColWidthDefault -> mempty
ColWidth w -> [("width", percent w)]
in if all hasDefaultWidth colspecs
then mempty
else inTags True "colgroup" [] $ vcat $ map (col . snd) colspecs
tableRowToJats :: PandocMonad m
=> WriterOptions
-> TableRow
-> JATS m (Doc Text)
tableRowToJats opts (TableRow tblpart attr _rownum rowhead rowbody) = do
let validAttribs = [ "align", "char", "charoff", "content-type"
, "style", "valign"
]
let attr' = toAttribs attr validAttribs
let celltype = case tblpart of
Thead -> HeaderCell
_ -> BodyCell
headcells <- mapM (cellToJats opts HeaderCell) rowhead
bodycells <- mapM (cellToJats opts celltype) rowbody
return $ inTags True "tr" attr' $ mconcat
[ vcat headcells
, vcat bodycells
]
alignmentAttrib :: Alignment -> Maybe (Text, Text)
alignmentAttrib = fmap ("align",) . \case
AlignLeft -> Just "left"
AlignRight -> Just "right"
AlignCenter -> Just "center"
AlignDefault -> Nothing
colspanAttrib :: ColSpan -> Maybe (Text, Text)
colspanAttrib = \case
ColSpan 1 -> Nothing
ColSpan n -> Just ("colspan", tshow n)
rowspanAttrib :: RowSpan -> Maybe (Text, Text)
rowspanAttrib = \case
RowSpan 1 -> Nothing
RowSpan n -> Just ("rowspan", tshow n)
cellToJats :: PandocMonad m
=> WriterOptions
-> CellType
-> Ann.Cell
-> JATS m (Doc Text)
cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) =
let align = fst colspec
in tableCellToJats opts celltype align cell
toAttribs :: Attr -> [Text] -> [(Text, Text)]
toAttribs (ident, _classes, kvs) knownAttribs =
(if T.null ident then id else (("id", escapeNCName ident) :)) $
filter ((`elem` knownAttribs) . fst) kvs
tableCellToJats :: PandocMonad m
=> WriterOptions
-> CellType
-> Alignment
-> Cell
-> JATS m (Doc Text)
tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do
blockToJats <- asks jatsBlockWriter
inlinesToJats <- asks jatsInlinesWriter
let cellContents = \case
[Plain inlines] -> inlinesToJats opts inlines
blocks -> blockToJats needsWrapInCell opts blocks
let tag' = case ctype of
BodyCell -> "td"
HeaderCell -> "th"
let align' = case align of
AlignDefault -> colAlign
_ -> align
let maybeCons = maybe id (:)
let validAttribs = [ "abbr", "align", "axis", "char", "charoff"
, "content-type", "headers", "scope", "style", "valign"
]
let attribs = maybeCons (alignmentAttrib align')
. maybeCons (rowspanAttrib rowspan)
. maybeCons (colspanAttrib colspan)
$ toAttribs attr validAttribs
inTags False tag' attribs <$> cellContents item
-- | Whether the JATS produced from this block should be wrapped in a
-- @<p>@ element when put directly below a @<td>@ element.
needsWrapInCell :: Block -> Bool
needsWrapInCell = \case
Plain{} -> False -- should be unwrapped anyway
Para{} -> False
BulletList{} -> False
OrderedList{} -> False
DefinitionList{} -> False
HorizontalRule -> False
CodeBlock{} -> False
RawBlock{} -> False -- responsibility of the user
_ -> True
|