1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
|
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.LaTeX.Table
( tableEnvironments )
where
import Data.Functor (($>))
import Text.Pandoc.Class
import Text.Pandoc.Readers.LaTeX.Parsing
import Text.Pandoc.Readers.LaTeX.Types
import Text.Pandoc.Builder as B
import qualified Data.Map as M
import Data.Text (Text)
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Control.Applicative ((<|>), optional, many)
import Control.Monad (when, void)
import Text.Pandoc.Shared (safeRead, trim)
import Text.Pandoc.Logging (LogMessage(SkippedContent))
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
optional, space, spaces, withRaw, (<|>))
tableEnvironments :: PandocMonad m
=> LP m Blocks
-> LP m Inlines
-> M.Map Text (LP m Blocks)
tableEnvironments blocks inline =
M.fromList
[ ("longtable", env "longtable" $
resetCaption *>
simpTable blocks inline "longtable" False >>= addTableCaption)
, ("table", env "table" $
skipopts *> resetCaption *> blocks >>= addTableCaption)
, ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
, ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
, ("tabular", env "tabular" $ simpTable blocks inline "tabular" False)
]
hline :: PandocMonad m => LP m ()
hline = try $ do
spaces
controlSeq "hline" <|>
(controlSeq "cline" <* braced) <|>
-- booktabs rules:
controlSeq "toprule" <|>
controlSeq "bottomrule" <|>
controlSeq "midrule" <|>
controlSeq "endhead" <|>
controlSeq "endfirsthead"
spaces
optional rawopt
return ()
lbreak :: PandocMonad m => LP m Tok
lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
<* skipopts <* spaces
amp :: PandocMonad m => LP m Tok
amp = symbol '&'
-- Split a Word into individual Symbols (for parseAligns)
splitWordTok :: PandocMonad m => LP m ()
splitWordTok = do
inp <- getInput
case inp of
(Tok spos Word t : rest) ->
setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
_ -> return ()
parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
parseAligns = try $ do
let maybeBar = skipMany
(try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
let cAlign = AlignCenter <$ symbol 'c'
let lAlign = AlignLeft <$ symbol 'l'
let rAlign = AlignRight <$ symbol 'r'
let parAlign = AlignLeft <$ symbol 'p'
-- aligns from tabularx
let xAlign = AlignLeft <$ symbol 'X'
let mAlign = AlignLeft <$ symbol 'm'
let bAlign = AlignLeft <$ symbol 'b'
let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
<|> xAlign <|> mAlign <|> bAlign )
let alignPrefix = symbol '>' >> braced
let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
symbol '{'
ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
spaces
symbol '}'
return $ safeRead ds
let alignSpec = do
pref <- option [] alignPrefix
spaces
al <- alignChar
width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
pos <- getPosition
report $ SkippedContent s pos
return Nothing)
spaces
suff <- option [] alignSuffix
return (al, width, (pref, suff))
let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
symbol '*'
spaces
ds <- trim . untokenize <$> braced
spaces
spec <- braced
case safeRead ds of
Just n ->
getInput >>= setInput . (mconcat (replicate n spec) ++)
Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
bgroup
spaces
maybeBar
aligns' <- many $ try $ spaces >> optional starAlign >>
(alignSpec <* maybeBar)
spaces
egroup
spaces
return $ map toSpec aligns'
where
toColWidth (Just w) | w > 0 = ColWidth w
toColWidth _ = ColWidthDefault
toSpec (x, y, z) = (x, toColWidth y, z)
-- N.B. this parser returns a Row that may have erroneous empty cells
-- in it. See the note above fixTableHead for details.
parseTableRow :: PandocMonad m
=> LP m Blocks -- ^ block parser
-> LP m Inlines -- ^ inline parser
-> Text -- ^ table environment name
-> [([Tok], [Tok])] -- ^ pref/suffixes
-> LP m Row
parseTableRow blocks inline envname prefsufs = do
notFollowedBy (spaces *> end_ envname)
-- contexts that can contain & that is not colsep:
let canContainAmp (Tok _ (CtrlSeq "begin") _) = True
canContainAmp (Tok _ (CtrlSeq "verb") _) = True
canContainAmp (Tok _ (CtrlSeq "Verb") _) = True
canContainAmp _ = False
-- add prefixes and suffixes in token stream:
let celltoks (pref, suff) = do
prefpos <- getPosition
contents <- mconcat <$>
many ( snd <$> withRaw
((lookAhead (controlSeq "parbox") >>
void blocks) -- #5711
<|>
(lookAhead (satisfyTok canContainAmp) >> void inline)
<|>
(lookAhead (symbol '$') >> void inline))
<|>
(do notFollowedBy
(() <$ amp <|> () <$ lbreak <|> end_ envname)
count 1 anyTok) )
suffpos <- getPosition
option [] (count 1 amp)
return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
rawcells <- mapM celltoks prefsufs
cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
spaces
return $ Row nullAttr cells
parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
parseTableCell blocks = do
spaces
updateState $ \st -> st{ sInTableCell = True }
cell' <- multicolumnCell blocks
<|> multirowCell blocks
<|> parseSimpleCell
<|> parseEmptyCell
updateState $ \st -> st{ sInTableCell = False }
spaces
return cell'
where
-- The parsing of empty cells is important in LaTeX, especially when dealing
-- with multirow/multicolumn. See #6603.
parseEmptyCell = spaces $> emptyCell
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
cellAlignment :: PandocMonad m => LP m Alignment
cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
where
alignment = do
c <- untoken <$> singleChar
return $ case c of
"l" -> AlignLeft
"r" -> AlignRight
"c" -> AlignCenter
"*" -> AlignDefault
_ -> AlignDefault
plainify :: Blocks -> Blocks
plainify bs = case toList bs of
[Para ils] -> plain (fromList ils)
_ -> bs
multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
multirowCell blocks = controlSeq "multirow" >> do
-- Full prototype for \multirow macro is:
-- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
-- However, everything except `nrows` and `text` make
-- sense in the context of the Pandoc AST
_ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
_ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
_ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
multicolumnCell blocks = controlSeq "multicolumn" >> do
span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
alignment <- symbol '{' *> cellAlignment <* symbol '}'
let singleCell = do
content <- plainify <$> blocks
return $ cell alignment (RowSpan 1) (ColSpan span') content
-- Two possible contents: either a \multirow cell, or content.
-- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
-- Note that a \multirow cell can be nested in a \multicolumn,
-- but not the other way around. See #6603
let nestedCell = do
(Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
return $ cell
alignment
(RowSpan rs)
(ColSpan span')
(fromList bs)
symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-- LaTeX tables are stored with empty cells underneath multirow cells
-- denoting the grid spaces taken up by them. More specifically, if a
-- cell spans m rows, then it will overwrite all the cells in the
-- columns it spans for (m-1) rows underneath it, requiring padding
-- cells in these places. These padding cells need to be removed for
-- proper table reading. See #6603.
--
-- These fixTable functions do not otherwise fix up malformed
-- input tables: that is left to the table builder.
fixTableHead :: TableHead -> TableHead
fixTableHead (TableHead attr rows) = TableHead attr rows'
where
rows' = fixTableRows rows
fixTableBody :: TableBody -> TableBody
fixTableBody (TableBody attr rhc th tb)
= TableBody attr rhc th' tb'
where
th' = fixTableRows th
tb' = fixTableRows tb
fixTableRows :: [Row] -> [Row]
fixTableRows = fixTableRows' $ repeat Nothing
where
fixTableRows' oldHang (Row attr cells : rs)
= let (newHang, cells') = fixTableRow oldHang cells
rs' = fixTableRows' newHang rs
in Row attr cells' : rs'
fixTableRows' _ [] = []
-- The overhang is represented as Just (relative cell dimensions) or
-- Nothing for an empty grid space.
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
fixTableRow oldHang cells
-- If there's overhang, drop cells until their total width meets the
-- width of the occupied grid spaces (or we run out)
| (n, prefHang, restHang) <- splitHang oldHang
, n > 0
= let cells' = dropToWidth getCellW n cells
(restHang', cells'') = fixTableRow restHang cells'
in (prefHang restHang', cells'')
-- Otherwise record the overhang of a pending cell and fix the rest
-- of the row
| c@(Cell _ _ h w _):cells' <- cells
= let h' = max 1 h
w' = max 1 w
oldHang' = dropToWidth getHangW w' oldHang
(newHang, cells'') = fixTableRow oldHang' cells'
in (toHang w' h' <> newHang, c : cells'')
| otherwise
= (oldHang, [])
where
getCellW (Cell _ _ _ w _) = w
getHangW = maybe 1 fst
getCS (ColSpan n) = n
toHang c r
| r > 1 = [Just (c, r)]
| otherwise = replicate (getCS c) Nothing
-- Take the prefix of the overhang list representing filled grid
-- spaces. Also return the remainder and the length of this prefix.
splitHang = splitHang' 0 id
splitHang' !n l (Just (c, r):xs)
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
splitHang' n l xs = (n, l, xs)
-- Drop list items until the total width of the dropped items
-- exceeds the passed width.
dropToWidth _ n l | n < 1 = l
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
dropToWidth _ _ [] = []
simpTable :: PandocMonad m
=> LP m Blocks
-> LP m Inlines
-> Text
-> Bool
-> LP m Blocks
simpTable blocks inline envname hasWidthParameter = try $ do
when hasWidthParameter $ () <$ tokWith inline
skipopts
colspecs <- parseAligns
let (aligns, widths, prefsufs) = unzip3 colspecs
optional $ controlSeq "caption" *> setCaption inline
spaces
optional label
spaces
optional lbreak
spaces
skipMany hline
spaces
header' <- option [] . try . fmap (:[]) $
parseTableRow blocks inline envname prefsufs <*
lbreak <* many1 hline
spaces
rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
(lbreak <* optional (skipMany hline))
spaces
optional $ controlSeq "caption" *> setCaption inline
spaces
optional label
spaces
optional lbreak
spaces
lookAhead $ controlSeq "end" -- make sure we're at end
let th = fixTableHead $ TableHead nullAttr header'
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
let tf = TableFoot nullAttr []
return $ table emptyCaption (zip aligns widths) th tbs tf
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
addTableCaption = walkM go
where go (Table attr c spec th tb tf) = do
st <- getState
let mblabel = sLastLabel st
capt <- case (sCaption st, mblabel) of
(Just ils, Nothing) -> return $ caption Nothing (plain ils)
(Just ils, Just lab) -> do
num <- getNextNumber sLastTableNum
setState
st{ sLastTableNum = num
, sLabels = M.insert lab
[Str (renderDottedNum num)]
(sLabels st) }
return $ caption Nothing (plain ils) -- add number??
(Nothing, _) -> return c
let attr' = case (attr, mblabel) of
((_,classes,kvs), Just ident) ->
(ident,classes,kvs)
_ -> attr
return $ addAttrDiv attr'
$ maybe id removeLabel mblabel
$ Table nullAttr capt spec th tb tf
go x = return x
-- TODO: For now we add a Div to contain table attributes, since
-- most writers don't do anything yet with attributes on Table.
-- This can be removed when that changes.
addAttrDiv :: Attr -> Block -> Block
addAttrDiv ("",[],[]) b = b
addAttrDiv attr b = Div attr [b]
|