From 8c3b5dd3ae10dade9f8a52fcba456f1cd8d085c9 Mon Sep 17 00:00:00 2001 From: "Laurent P. René de Cotret" Date: Thu, 23 Jul 2020 14:23:21 -0400 Subject: Col-span and row-span in LaTeX reader (#6470) Add multirow and multicolumn support in LaTex reader. Partially addresses #6311. --- src/Text/Pandoc/Readers/LaTeX.hs | 76 ++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 19 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index e571da5ad..1c6954279 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -2338,10 +2338,9 @@ parseAligns = try $ do parseTableRow :: PandocMonad m => Text -- ^ table environment name -> [([Tok], [Tok])] -- ^ pref/suffixes - -> LP m [Blocks] + -> LP m Row parseTableRow envname prefsufs = do notFollowedBy (spaces *> end_ envname) - let cols = length prefsufs -- add prefixes and suffixes in token stream: let celltoks (pref, suff) = do prefpos <- getPosition @@ -2360,21 +2359,62 @@ parseTableRow envname prefsufs = do cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells setInput oldInput spaces - let numcells = length cells - guard $ numcells <= cols && numcells >= 1 - guard $ cells /= [mempty] - -- note: a & b in a three-column table leaves an empty 3rd cell: - return $ cells ++ replicate (cols - numcells) mempty + return $ Row nullAttr cells -parseTableCell :: PandocMonad m => LP m Blocks +parseTableCell :: PandocMonad m => LP m Cell parseTableCell = do - let plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs updateState $ \st -> st{ sInTableCell = True } - cells <- plainify <$> blocks + cell' <- parseMultiCell <|> parseSimpleCell updateState $ \st -> st{ sInTableCell = False } - return cells + return cell' + +cellAlignment :: PandocMonad m => LP m Alignment +cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') + where + alignment = do + c <- untoken <$> singleChar + return $ case c of + "l" -> AlignLeft + "r" -> AlignRight + "c" -> AlignCenter + "*" -> AlignDefault + _ -> AlignDefault + +plainify :: Blocks -> Blocks +plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + +parseMultiCell :: PandocMonad m => LP m Cell +parseMultiCell = (controlSeq "multirow" >> parseMultirowCell) + <|> (controlSeq "multicolumn" >> parseMulticolCell) + where + parseMultirowCell = parseMultiXCell RowSpan (const $ ColSpan 1) + parseMulticolCell = parseMultiXCell (const $ RowSpan 1) ColSpan + + parseMultiXCell rowspanf colspanf = do + span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced + alignment <- symbol '{' *> cellAlignment <* symbol '}' + + -- Two possible contents: either a nested \multirow/\multicol, or content. + -- E.g. \multirow{1}{c}{\multicol{1}{c}{content}} + let singleCell = do + content <- plainify <$> blocks + return $ cell alignment (rowspanf span') (colspanf span') content + + let nestedCell = do + (Cell _ _ (RowSpan rs) (ColSpan cs) bs) <- parseMultiCell + return $ cell + alignment + (RowSpan $ max span' rs) + (ColSpan $ max span' cs) + (fromList bs) + + symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' + +-- Parse a simple cell, i.e. not multirow/multicol +parseSimpleCell :: PandocMonad m => LP m Cell +parseSimpleCell = simpleCell <$> (plainify <$> blocks) simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks simpTable envname hasWidthParameter = try $ do @@ -2390,8 +2430,8 @@ simpTable envname hasWidthParameter = try $ do spaces skipMany hline spaces - header' <- option [] $ try (parseTableRow envname prefsufs <* - lbreak <* many1 hline) + header' <- option [] . try . fmap (:[]) $ + parseTableRow envname prefsufs <* lbreak <* many1 hline spaces rows <- sepEndBy (parseTableRow envname prefsufs) (lbreak <* optional (skipMany hline)) @@ -2403,12 +2443,10 @@ simpTable envname hasWidthParameter = try $ do optional lbreak spaces lookAhead $ controlSeq "end" -- make sure we're at end - let toRow = Row nullAttr . map simpleCell - toHeaderRow l = if null l then [] else [toRow l] return $ table emptyCaption (zip aligns widths) - (TableHead nullAttr $ toHeaderRow header') - [TableBody nullAttr 0 [] $ map toRow rows] + (TableHead nullAttr $ header') + [TableBody nullAttr 0 [] rows] (TableFoot nullAttr []) addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -- cgit v1.2.3