aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-27 21:40:56 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-27 21:40:56 -0800
commit08231f5cdd16e31d38d9d6bf59bc5ca12638b438 (patch)
tree5b67f617d33a5aaad2398eac2d944659a7a0edee
parent925815bb33b462e1a4c19a8e2c617d403dec0ce7 (diff)
downloadpandoc-08231f5cdd16e31d38d9d6bf59bc5ca12638b438.tar.gz
Factor out T.P.Readers.LaTeX.Table.
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs368
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs33
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Table.hs373
4 files changed, 412 insertions, 363 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 68edb2b64..9149c4f8f 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -631,6 +631,7 @@ library
Text.Pandoc.Readers.LaTeX.Lang,
Text.Pandoc.Readers.LaTeX.SIunitx,
Text.Pandoc.Readers.LaTeX.Accent,
+ Text.Pandoc.Readers.LaTeX.Table,
Text.Pandoc.Readers.Odt.Base,
Text.Pandoc.Readers.Odt.Namespaces,
Text.Pandoc.Readers.Odt.StyleReader,
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 51c031f78..831c5df05 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,4 +1,3 @@
-{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
@@ -32,7 +31,6 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isDigit, isLetter, toUpper, chr)
import Data.Default
-import Data.Functor (($>))
import Data.List (intercalate)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, maybeToList)
@@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Readers.LaTeX.Parsing
import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
+import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments)
import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
babelLangToBCP47)
import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -551,12 +550,8 @@ inlineCommand' = try $ do
<|> ignore rawcommand
lookupListDefault raw names inlineCommands
-
tok :: PandocMonad m => LP m Inlines
-tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
- where singleChar' = do
- Tok _ _ t <- singleChar
- return $ str t
+tok = tokWith inline
opt :: PandocMonad m => LP m Inlines
opt = do
@@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList
, "pagebreak"
]
-label :: PandocMonad m => LP m ()
-label = do
- controlSeq "label"
- t <- braced
- updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
-
dolabel :: PandocMonad m => LP m Inlines
dolabel = do
v <- braced
@@ -1421,13 +1410,6 @@ bracketedNum = do
Just i -> return i
_ -> return 0
-setCaption :: PandocMonad m => LP m ()
-setCaption = try $ do
- skipopts
- ils <- tok
- optional $ try $ spaces *> label
- updateState $ \st -> st{ sCaption = Just ils }
-
looseItem :: PandocMonad m => LP m Blocks
looseItem = do
inListItem <- sInListItem <$> getState
@@ -1441,10 +1423,6 @@ epigraph = do
p2 <- grouped block
return $ divWith ("", ["epigraph"], []) (p1 <> p2)
-resetCaption :: PandocMonad m => LP m ()
-resetCaption = updateState $ \st -> st{ sCaption = Nothing
- , sLastLabel = Nothing }
-
section :: PandocMonad m => Attr -> Int -> LP m Blocks
section (ident, classes, kvs) lvl = do
skipopts
@@ -1585,7 +1563,7 @@ blockCommands = M.fromList
, ("item", looseItem)
, ("documentclass", skipopts *> braced *> preamble)
, ("centerline", para . trimInlines <$> (skipopts *> tok))
- , ("caption", mempty <$ setCaption)
+ , ("caption", mempty <$ setCaption inline)
, ("bibliography", mempty <$ (skipopts *> braced >>=
addMeta "bibliography" . splitBibs . untokenize))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
@@ -1623,7 +1601,8 @@ blockCommands = M.fromList
environments :: PandocMonad m => M.Map Text (LP m Blocks)
-environments = M.fromList
+environments = M.union (tableEnvironments blocks inline) $
+ M.fromList
[ ("document", env "document" blocks <* skipMany anyTok)
, ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract"))
, ("sloppypar", env "sloppypar" blocks)
@@ -1633,13 +1612,6 @@ environments = M.fromList
, ("figure", env "figure" $ skipopts *> figure)
, ("subfigure", env "subfigure" $ skipopts *> tok *> figure)
, ("center", divWith ("", ["center"], []) <$> env "center" blocks)
- , ("longtable", env "longtable" $
- resetCaption *> simpTable "longtable" False >>= addTableCaption)
- , ("table", env "table" $
- skipopts *> resetCaption *> blocks >>= addTableCaption)
- , ("tabular*", env "tabular*" $ simpTable "tabular*" True)
- , ("tabularx", env "tabularx" $ simpTable "tabularx" True)
- , ("tabular", env "tabular" $ simpTable "tabular" False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
@@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils]
italicize (Plain ils) = Plain [Emph ils]
italicize x = x
-env :: PandocMonad m => Text -> LP m a -> LP m a
-env name p = p <* end_ name
-
rawEnv :: PandocMonad m => Text -> LP m Blocks
rawEnv name = do
exts <- getOption readerExtensions
@@ -2045,333 +2014,6 @@ orderedList' = try $ do
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
--- tables
-
-hline :: PandocMonad m => LP m ()
-hline = try $ do
- spaces
- controlSeq "hline" <|>
- -- booktabs rules:
- controlSeq "toprule" <|>
- controlSeq "bottomrule" <|>
- controlSeq "midrule" <|>
- controlSeq "endhead" <|>
- controlSeq "endfirsthead"
- spaces
- optional opt
- return ()
-
-lbreak :: PandocMonad m => LP m Tok
-lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
- <* skipopts <* spaces
-
-amp :: PandocMonad m => LP m Tok
-amp = symbol '&'
-
--- Split a Word into individual Symbols (for parseAligns)
-splitWordTok :: PandocMonad m => LP m ()
-splitWordTok = do
- inp <- getInput
- case inp of
- (Tok spos Word t : rest) ->
- setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
- _ -> return ()
-
-parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
-parseAligns = try $ do
- let maybeBar = skipMany
- (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
- let cAlign = AlignCenter <$ symbol 'c'
- let lAlign = AlignLeft <$ symbol 'l'
- let rAlign = AlignRight <$ symbol 'r'
- let parAlign = AlignLeft <$ symbol 'p'
- -- aligns from tabularx
- let xAlign = AlignLeft <$ symbol 'X'
- let mAlign = AlignLeft <$ symbol 'm'
- let bAlign = AlignLeft <$ symbol 'b'
- let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
- <|> xAlign <|> mAlign <|> bAlign )
- let alignPrefix = symbol '>' >> braced
- let alignSuffix = symbol '<' >> braced
- let colWidth = try $ do
- symbol '{'
- ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
- spaces
- symbol '}'
- return $ safeRead ds
- let alignSpec = do
- pref <- option [] alignPrefix
- spaces
- al <- alignChar
- width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
- pos <- getPosition
- report $ SkippedContent s pos
- return Nothing)
- spaces
- suff <- option [] alignSuffix
- return (al, width, (pref, suff))
- let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
- symbol '*'
- spaces
- ds <- trim . untokenize <$> braced
- spaces
- spec <- braced
- case safeRead ds of
- Just n ->
- getInput >>= setInput . (mconcat (replicate n spec) ++)
- Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
- bgroup
- spaces
- maybeBar
- aligns' <- many $ try $ spaces >> optional starAlign >>
- (alignSpec <* maybeBar)
- spaces
- egroup
- spaces
- return $ map toSpec aligns'
- where
- toColWidth (Just w) | w > 0 = ColWidth w
- toColWidth _ = ColWidthDefault
- toSpec (x, y, z) = (x, toColWidth y, z)
-
--- N.B. this parser returns a Row that may have erroneous empty cells
--- in it. See the note above fixTableHead for details.
-parseTableRow :: PandocMonad m
- => Text -- ^ table environment name
- -> [([Tok], [Tok])] -- ^ pref/suffixes
- -> LP m Row
-parseTableRow envname prefsufs = do
- notFollowedBy (spaces *> end_ envname)
- -- add prefixes and suffixes in token stream:
- let celltoks (pref, suff) = do
- prefpos <- getPosition
- contents <- mconcat <$>
- many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711
- <|>
- snd <$> withRaw (inlineEnvironment <|> dollarsMath)
- <|>
- (do notFollowedBy
- (() <$ amp <|> () <$ lbreak <|> end_ envname)
- count 1 anyTok) )
-
- suffpos <- getPosition
- option [] (count 1 amp)
- return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
- rawcells <- mapM celltoks prefsufs
- cells <- mapM (parseFromToks parseTableCell) rawcells
- spaces
- return $ Row nullAttr cells
-
-parseTableCell :: PandocMonad m => LP m Cell
-parseTableCell = do
- spaces
- updateState $ \st -> st{ sInTableCell = True }
- cell' <- multicolumnCell
- <|> multirowCell
- <|> parseSimpleCell
- <|> parseEmptyCell
- updateState $ \st -> st{ sInTableCell = False }
- spaces
- return cell'
- where
- -- The parsing of empty cells is important in LaTeX, especially when dealing
- -- with multirow/multicolumn. See #6603.
- parseEmptyCell = spaces $> emptyCell
-
-cellAlignment :: PandocMonad m => LP m Alignment
-cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
- where
- alignment = do
- c <- untoken <$> singleChar
- return $ case c of
- "l" -> AlignLeft
- "r" -> AlignRight
- "c" -> AlignCenter
- "*" -> AlignDefault
- _ -> AlignDefault
-
-plainify :: Blocks -> Blocks
-plainify bs = case toList bs of
- [Para ils] -> plain (fromList ils)
- _ -> bs
-
-multirowCell :: PandocMonad m => LP m Cell
-multirowCell = controlSeq "multirow" >> do
- -- Full prototype for \multirow macro is:
- -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
- -- However, everything except `nrows` and `text` make
- -- sense in the context of the Pandoc AST
- _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
- nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
- _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
- _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
- _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
- content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
- return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
-
-multicolumnCell :: PandocMonad m => LP m Cell
-multicolumnCell = controlSeq "multicolumn" >> do
- span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
- alignment <- symbol '{' *> cellAlignment <* symbol '}'
-
- let singleCell = do
- content <- plainify <$> blocks
- return $ cell alignment (RowSpan 1) (ColSpan span') content
-
- -- Two possible contents: either a \multirow cell, or content.
- -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
- -- Note that a \multirow cell can be nested in a \multicolumn,
- -- but not the other way around. See #6603
- let nestedCell = do
- (Cell _ _ (RowSpan rs) _ bs) <- multirowCell
- return $ cell
- alignment
- (RowSpan rs)
- (ColSpan span')
- (fromList bs)
-
- symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
-
--- Parse a simple cell, i.e. not multirow/multicol
-parseSimpleCell :: PandocMonad m => LP m Cell
-parseSimpleCell = simpleCell <$> (plainify <$> blocks)
-
--- LaTeX tables are stored with empty cells underneath multirow cells
--- denoting the grid spaces taken up by them. More specifically, if a
--- cell spans m rows, then it will overwrite all the cells in the
--- columns it spans for (m-1) rows underneath it, requiring padding
--- cells in these places. These padding cells need to be removed for
--- proper table reading. See #6603.
---
--- These fixTable functions do not otherwise fix up malformed
--- input tables: that is left to the table builder.
-fixTableHead :: TableHead -> TableHead
-fixTableHead (TableHead attr rows) = TableHead attr rows'
- where
- rows' = fixTableRows rows
-
-fixTableBody :: TableBody -> TableBody
-fixTableBody (TableBody attr rhc th tb)
- = TableBody attr rhc th' tb'
- where
- th' = fixTableRows th
- tb' = fixTableRows tb
-
-fixTableRows :: [Row] -> [Row]
-fixTableRows = fixTableRows' $ repeat Nothing
- where
- fixTableRows' oldHang (Row attr cells : rs)
- = let (newHang, cells') = fixTableRow oldHang cells
- rs' = fixTableRows' newHang rs
- in Row attr cells' : rs'
- fixTableRows' _ [] = []
-
--- The overhang is represented as Just (relative cell dimensions) or
--- Nothing for an empty grid space.
-fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
-fixTableRow oldHang cells
- -- If there's overhang, drop cells until their total width meets the
- -- width of the occupied grid spaces (or we run out)
- | (n, prefHang, restHang) <- splitHang oldHang
- , n > 0
- = let cells' = dropToWidth getCellW n cells
- (restHang', cells'') = fixTableRow restHang cells'
- in (prefHang restHang', cells'')
- -- Otherwise record the overhang of a pending cell and fix the rest
- -- of the row
- | c@(Cell _ _ h w _):cells' <- cells
- = let h' = max 1 h
- w' = max 1 w
- oldHang' = dropToWidth getHangW w' oldHang
- (newHang, cells'') = fixTableRow oldHang' cells'
- in (toHang w' h' <> newHang, c : cells'')
- | otherwise
- = (oldHang, [])
- where
- getCellW (Cell _ _ _ w _) = w
- getHangW = maybe 1 fst
- getCS (ColSpan n) = n
-
- toHang c r
- | r > 1 = [Just (c, r)]
- | otherwise = replicate (getCS c) Nothing
-
- -- Take the prefix of the overhang list representing filled grid
- -- spaces. Also return the remainder and the length of this prefix.
- splitHang = splitHang' 0 id
-
- splitHang' !n l (Just (c, r):xs)
- = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
- splitHang' n l xs = (n, l, xs)
-
- -- Drop list items until the total width of the dropped items
- -- exceeds the passed width.
- dropToWidth _ n l | n < 1 = l
- dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
- dropToWidth _ _ [] = []
-
-simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
-simpTable envname hasWidthParameter = try $ do
- when hasWidthParameter $ () <$ (spaces >> tok)
- skipopts
- colspecs <- parseAligns
- let (aligns, widths, prefsufs) = unzip3 colspecs
- optional $ controlSeq "caption" *> setCaption
- spaces
- optional label
- spaces
- optional lbreak
- spaces
- skipMany hline
- spaces
- header' <- option [] . try . fmap (:[]) $
- parseTableRow envname prefsufs <* lbreak <* many1 hline
- spaces
- rows <- sepEndBy (parseTableRow envname prefsufs)
- (lbreak <* optional (skipMany hline))
- spaces
- optional $ controlSeq "caption" *> setCaption
- spaces
- optional label
- spaces
- optional lbreak
- spaces
- lookAhead $ controlSeq "end" -- make sure we're at end
- let th = fixTableHead $ TableHead nullAttr header'
- let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
- let tf = TableFoot nullAttr []
- return $ table emptyCaption (zip aligns widths) th tbs tf
-
-addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
-addTableCaption = walkM go
- where go (Table attr c spec th tb tf) = do
- st <- getState
- let mblabel = sLastLabel st
- capt <- case (sCaption st, mblabel) of
- (Just ils, Nothing) -> return $ caption Nothing (plain ils)
- (Just ils, Just lab) -> do
- num <- getNextNumber sLastTableNum
- setState
- st{ sLastTableNum = num
- , sLabels = M.insert lab
- [Str (renderDottedNum num)]
- (sLabels st) }
- return $ caption Nothing (plain ils) -- add number??
- (Nothing, _) -> return c
- let attr' = case (attr, mblabel) of
- ((_,classes,kvs), Just ident) ->
- (ident,classes,kvs)
- _ -> attr
- return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
- go x = return x
-
--- TODO: For now we add a Div to contain table attributes, since
--- most writers don't do anything yet with attributes on Table.
--- This can be removed when that changes.
-addAttrDiv :: Attr -> Block -> Block
-addAttrDiv ("",[],[]) b = b
-addAttrDiv attr b = Div attr [b]
-
block :: PandocMonad m => LP m Blocks
block = do
res <- (mempty <$ spaces1)
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index c2e10570d..4a9fa03ad 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, comment
, anyTok
, singleChar
+ , tokWith
, specialChars
, endline
, blankline
@@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing
, rawopt
, overlaySpecification
, getNextNumber
+ , label
+ , setCaption
+ , resetCaption
+ , env
) where
import Control.Applicative (many, (<|>))
@@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do
Just n -> [n, 1]
Nothing -> [1]
+label :: PandocMonad m => LP m ()
+label = do
+ controlSeq "label"
+ t <- braced
+ updateState $ \st -> st{ sLastLabel = Just $ untokenize t }
+
+setCaption :: PandocMonad m => LP m Inlines -> LP m ()
+setCaption inline = try $ do
+ skipopts
+ ils <- tokWith inline
+ optional $ try $ spaces *> label
+ updateState $ \st -> st{ sCaption = Just ils }
+
+resetCaption :: PandocMonad m => LP m ()
+resetCaption = updateState $ \st -> st{ sCaption = Nothing
+ , sLastLabel = Nothing }
+
+env :: PandocMonad m => Text -> LP m a -> LP m a
+env name p = p <* end_ name
+
+tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines
+tokWith inlineParser = try $ spaces >>
+ grouped inlineParser
+ <|> (lookAhead anyControlSeq >> inlineParser)
+ <|> singleChar'
+ where singleChar' = do
+ Tok _ _ t <- singleChar
+ return $ str t
diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs
new file mode 100644
index 000000000..2ea9caf58
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs
@@ -0,0 +1,373 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Table
+ ( tableEnvironments )
+where
+
+import Data.Functor (($>))
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Types
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import qualified Data.Text as T
+import Control.Applicative ((<|>), optional, many)
+import Control.Monad (when, void)
+import Text.Pandoc.Shared (safeRead, trim)
+import Text.Pandoc.Logging (LogMessage(SkippedContent))
+import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
+ optional, space, spaces, withRaw, (<|>))
+
+tableEnvironments :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> M.Map Text (LP m Blocks)
+tableEnvironments blocks inline =
+ M.fromList
+ [ ("longtable", env "longtable" $
+ resetCaption *>
+ simpTable blocks inline "longtable" False >>= addTableCaption)
+ , ("table", env "table" $
+ skipopts *> resetCaption *> blocks >>= addTableCaption)
+ , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True)
+ , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True)
+ , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False)
+ ]
+
+hline :: PandocMonad m => LP m ()
+hline = try $ do
+ spaces
+ controlSeq "hline" <|>
+ -- booktabs rules:
+ controlSeq "toprule" <|>
+ controlSeq "bottomrule" <|>
+ controlSeq "midrule" <|>
+ controlSeq "endhead" <|>
+ controlSeq "endfirsthead"
+ spaces
+ optional rawopt
+ return ()
+
+lbreak :: PandocMonad m => LP m Tok
+lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline")
+ <* skipopts <* spaces
+
+amp :: PandocMonad m => LP m Tok
+amp = symbol '&'
+
+-- Split a Word into individual Symbols (for parseAligns)
+splitWordTok :: PandocMonad m => LP m ()
+splitWordTok = do
+ inp <- getInput
+ case inp of
+ (Tok spos Word t : rest) ->
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
+ _ -> return ()
+
+parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))]
+parseAligns = try $ do
+ let maybeBar = skipMany
+ (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced)))
+ let cAlign = AlignCenter <$ symbol 'c'
+ let lAlign = AlignLeft <$ symbol 'l'
+ let rAlign = AlignRight <$ symbol 'r'
+ let parAlign = AlignLeft <$ symbol 'p'
+ -- aligns from tabularx
+ let xAlign = AlignLeft <$ symbol 'X'
+ let mAlign = AlignLeft <$ symbol 'm'
+ let bAlign = AlignLeft <$ symbol 'b'
+ let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign
+ <|> xAlign <|> mAlign <|> bAlign )
+ let alignPrefix = symbol '>' >> braced
+ let alignSuffix = symbol '<' >> braced
+ let colWidth = try $ do
+ symbol '{'
+ ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
+ spaces
+ symbol '}'
+ return $ safeRead ds
+ let alignSpec = do
+ pref <- option [] alignPrefix
+ spaces
+ al <- alignChar
+ width <- colWidth <|> option Nothing (do s <- untokenize <$> braced
+ pos <- getPosition
+ report $ SkippedContent s pos
+ return Nothing)
+ spaces
+ suff <- option [] alignSuffix
+ return (al, width, (pref, suff))
+ let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
+ symbol '*'
+ spaces
+ ds <- trim . untokenize <$> braced
+ spaces
+ spec <- braced
+ case safeRead ds of
+ Just n ->
+ getInput >>= setInput . (mconcat (replicate n spec) ++)
+ Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
+ bgroup
+ spaces
+ maybeBar
+ aligns' <- many $ try $ spaces >> optional starAlign >>
+ (alignSpec <* maybeBar)
+ spaces
+ egroup
+ spaces
+ return $ map toSpec aligns'
+ where
+ toColWidth (Just w) | w > 0 = ColWidth w
+ toColWidth _ = ColWidthDefault
+ toSpec (x, y, z) = (x, toColWidth y, z)
+
+-- N.B. this parser returns a Row that may have erroneous empty cells
+-- in it. See the note above fixTableHead for details.
+parseTableRow :: PandocMonad m
+ => LP m Blocks -- ^ block parser
+ -> LP m Inlines -- ^ inline parser
+ -> Text -- ^ table environment name
+ -> [([Tok], [Tok])] -- ^ pref/suffixes
+ -> LP m Row
+parseTableRow blocks inline envname prefsufs = do
+ notFollowedBy (spaces *> end_ envname)
+ -- add prefixes and suffixes in token stream:
+ let celltoks (pref, suff) = do
+ prefpos <- getPosition
+ contents <- mconcat <$>
+ many ( snd <$> withRaw
+ ((lookAhead (controlSeq "parbox") >>
+ void blocks) -- #5711
+ <|>
+ (lookAhead (controlSeq "begin") >> void inline)
+ <|>
+ (lookAhead (symbol '$') >> void inline))
+ <|>
+ (do notFollowedBy
+ (() <$ amp <|> () <$ lbreak <|> end_ envname)
+ count 1 anyTok) )
+
+ suffpos <- getPosition
+ option [] (count 1 amp)
+ return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff
+ rawcells <- mapM celltoks prefsufs
+ cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells
+ spaces
+ return $ Row nullAttr cells
+
+parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell
+parseTableCell blocks = do
+ spaces
+ updateState $ \st -> st{ sInTableCell = True }
+ cell' <- multicolumnCell blocks
+ <|> multirowCell blocks
+ <|> parseSimpleCell
+ <|> parseEmptyCell
+ updateState $ \st -> st{ sInTableCell = False }
+ spaces
+ return cell'
+ where
+ -- The parsing of empty cells is important in LaTeX, especially when dealing
+ -- with multirow/multicolumn. See #6603.
+ parseEmptyCell = spaces $> emptyCell
+ parseSimpleCell = simpleCell <$> (plainify <$> blocks)
+
+
+cellAlignment :: PandocMonad m => LP m Alignment
+cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|')
+ where
+ alignment = do
+ c <- untoken <$> singleChar
+ return $ case c of
+ "l" -> AlignLeft
+ "r" -> AlignRight
+ "c" -> AlignCenter
+ "*" -> AlignDefault
+ _ -> AlignDefault
+
+plainify :: Blocks -> Blocks
+plainify bs = case toList bs of
+ [Para ils] -> plain (fromList ils)
+ _ -> bs
+
+multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multirowCell blocks = controlSeq "multirow" >> do
+ -- Full prototype for \multirow macro is:
+ -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text}
+ -- However, everything except `nrows` and `text` make
+ -- sense in the context of the Pandoc AST
+ _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position
+ nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related
+ _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width
+ _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning
+ content <- symbol '{' *> (plainify <$> blocks) <* symbol '}'
+ return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content
+
+multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell
+multicolumnCell blocks = controlSeq "multicolumn" >> do
+ span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced
+ alignment <- symbol '{' *> cellAlignment <* symbol '}'
+
+ let singleCell = do
+ content <- plainify <$> blocks
+ return $ cell alignment (RowSpan 1) (ColSpan span') content
+
+ -- Two possible contents: either a \multirow cell, or content.
+ -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}}
+ -- Note that a \multirow cell can be nested in a \multicolumn,
+ -- but not the other way around. See #6603
+ let nestedCell = do
+ (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks
+ return $ cell
+ alignment
+ (RowSpan rs)
+ (ColSpan span')
+ (fromList bs)
+
+ symbol '{' *> (nestedCell <|> singleCell) <* symbol '}'
+
+-- LaTeX tables are stored with empty cells underneath multirow cells
+-- denoting the grid spaces taken up by them. More specifically, if a
+-- cell spans m rows, then it will overwrite all the cells in the
+-- columns it spans for (m-1) rows underneath it, requiring padding
+-- cells in these places. These padding cells need to be removed for
+-- proper table reading. See #6603.
+--
+-- These fixTable functions do not otherwise fix up malformed
+-- input tables: that is left to the table builder.
+fixTableHead :: TableHead -> TableHead
+fixTableHead (TableHead attr rows) = TableHead attr rows'
+ where
+ rows' = fixTableRows rows
+
+fixTableBody :: TableBody -> TableBody
+fixTableBody (TableBody attr rhc th tb)
+ = TableBody attr rhc th' tb'
+ where
+ th' = fixTableRows th
+ tb' = fixTableRows tb
+
+fixTableRows :: [Row] -> [Row]
+fixTableRows = fixTableRows' $ repeat Nothing
+ where
+ fixTableRows' oldHang (Row attr cells : rs)
+ = let (newHang, cells') = fixTableRow oldHang cells
+ rs' = fixTableRows' newHang rs
+ in Row attr cells' : rs'
+ fixTableRows' _ [] = []
+
+-- The overhang is represented as Just (relative cell dimensions) or
+-- Nothing for an empty grid space.
+fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
+fixTableRow oldHang cells
+ -- If there's overhang, drop cells until their total width meets the
+ -- width of the occupied grid spaces (or we run out)
+ | (n, prefHang, restHang) <- splitHang oldHang
+ , n > 0
+ = let cells' = dropToWidth getCellW n cells
+ (restHang', cells'') = fixTableRow restHang cells'
+ in (prefHang restHang', cells'')
+ -- Otherwise record the overhang of a pending cell and fix the rest
+ -- of the row
+ | c@(Cell _ _ h w _):cells' <- cells
+ = let h' = max 1 h
+ w' = max 1 w
+ oldHang' = dropToWidth getHangW w' oldHang
+ (newHang, cells'') = fixTableRow oldHang' cells'
+ in (toHang w' h' <> newHang, c : cells'')
+ | otherwise
+ = (oldHang, [])
+ where
+ getCellW (Cell _ _ _ w _) = w
+ getHangW = maybe 1 fst
+ getCS (ColSpan n) = n
+
+ toHang c r
+ | r > 1 = [Just (c, r)]
+ | otherwise = replicate (getCS c) Nothing
+
+ -- Take the prefix of the overhang list representing filled grid
+ -- spaces. Also return the remainder and the length of this prefix.
+ splitHang = splitHang' 0 id
+
+ splitHang' !n l (Just (c, r):xs)
+ = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
+ splitHang' n l xs = (n, l, xs)
+
+ -- Drop list items until the total width of the dropped items
+ -- exceeds the passed width.
+ dropToWidth _ n l | n < 1 = l
+ dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
+ dropToWidth _ _ [] = []
+
+simpTable :: PandocMonad m
+ => LP m Blocks
+ -> LP m Inlines
+ -> Text
+ -> Bool
+ -> LP m Blocks
+simpTable blocks inline envname hasWidthParameter = try $ do
+ when hasWidthParameter $ () <$ tokWith inline
+ skipopts
+ colspecs <- parseAligns
+ let (aligns, widths, prefsufs) = unzip3 colspecs
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ skipMany hline
+ spaces
+ header' <- option [] . try . fmap (:[]) $
+ parseTableRow blocks inline envname prefsufs <*
+ lbreak <* many1 hline
+ spaces
+ rows <- sepEndBy (parseTableRow blocks inline envname prefsufs)
+ (lbreak <* optional (skipMany hline))
+ spaces
+ optional $ controlSeq "caption" *> setCaption inline
+ spaces
+ optional label
+ spaces
+ optional lbreak
+ spaces
+ lookAhead $ controlSeq "end" -- make sure we're at end
+ let th = fixTableHead $ TableHead nullAttr header'
+ let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
+ let tf = TableFoot nullAttr []
+ return $ table emptyCaption (zip aligns widths) th tbs tf
+
+addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
+addTableCaption = walkM go
+ where go (Table attr c spec th tb tf) = do
+ st <- getState
+ let mblabel = sLastLabel st
+ capt <- case (sCaption st, mblabel) of
+ (Just ils, Nothing) -> return $ caption Nothing (plain ils)
+ (Just ils, Just lab) -> do
+ num <- getNextNumber sLastTableNum
+ setState
+ st{ sLastTableNum = num
+ , sLabels = M.insert lab
+ [Str (renderDottedNum num)]
+ (sLabels st) }
+ return $ caption Nothing (plain ils) -- add number??
+ (Nothing, _) -> return c
+ let attr' = case (attr, mblabel) of
+ ((_,classes,kvs), Just ident) ->
+ (ident,classes,kvs)
+ _ -> attr
+ return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf
+ go x = return x
+
+-- TODO: For now we add a Div to contain table attributes, since
+-- most writers don't do anything yet with attributes on Table.
+-- This can be removed when that changes.
+addAttrDiv :: Attr -> Block -> Block
+addAttrDiv ("",[],[]) b = b
+addAttrDiv attr b = Div attr [b]