diff options
author | Laurent P. René de Cotret <LaurentRDC@users.noreply.github.com> | 2020-08-15 14:40:10 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-08-15 11:40:10 -0700 |
commit | 482a2e50798481f484267bdcfb7b305ea7bd7971 (patch) | |
tree | 1e211ac845ef7e5e576785615f83319c641db32b /src | |
parent | 3766e03c7d2e4fb0378bb9de1420fa4f6c8107a2 (diff) | |
download | pandoc-482a2e50798481f484267bdcfb7b305ea7bd7971.tar.gz |
[Latex Reader] Fixing issues with \multirow and \multicolumn table cells (#6608)
* Added test to replicate (#6596)
* Table cell reader not consuming spaces correctly (#6596)
* Prevented wrong nesting of \multicolumn and \multirow table cells (#6603)
* Parse empty table cells (#6603)
* Support full prototype for multirow macro (#6603)
Closes #6603
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 76 |
1 files changed, 48 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 936dd6740..85e30f538 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -2383,16 +2383,26 @@ parseTableRow envname prefsufs = do cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells setInput oldInput spaces - return $ Row nullAttr cells + -- Because of table normalization performed by Text.Pandoc.Builder.table, + -- we need to remove empty cells + return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells parseTableCell :: PandocMonad m => LP m Cell parseTableCell = do spaces updateState $ \st -> st{ sInTableCell = True } - cell' <- parseMultiCell <|> parseSimpleCell + cell' <- ( multicolumnCell + <|> multirowCell + <|> parseSimpleCell + <|> parseEmptyCell + ) updateState $ \st -> st{ sInTableCell = False } spaces return cell' + where + -- The parsing of empty cells is important in LaTeX, especially when dealing + -- with multirow/multicolumn. See #6603. + parseEmptyCell = optional spaces >> return emptyCell <* optional spaces cellAlignment :: PandocMonad m => LP m Alignment cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') @@ -2411,32 +2421,42 @@ plainify bs = case toList bs of [Para ils] -> plain (fromList ils) _ -> bs -parseMultiCell :: PandocMonad m => LP m Cell -parseMultiCell = (controlSeq "multirow" >> parseMultirowCell) - <|> (controlSeq "multicolumn" >> parseMulticolCell) - where - parseMultirowCell = parseMultiXCell RowSpan (const $ ColSpan 1) - parseMulticolCell = parseMultiXCell (const $ RowSpan 1) ColSpan - - parseMultiXCell rowspanf colspanf = do - span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced - alignment <- symbol '{' *> cellAlignment <* symbol '}' - - -- Two possible contents: either a nested \multirow/\multicol, or content. - -- E.g. \multirow{1}{c}{\multicol{1}{c}{content}} - let singleCell = do - content <- plainify <$> blocks - return $ cell alignment (rowspanf span') (colspanf span') content - - let nestedCell = do - (Cell _ _ (RowSpan rs) (ColSpan cs) bs) <- parseMultiCell - return $ cell - alignment - (RowSpan $ max span' rs) - (ColSpan $ max span' cs) - (fromList bs) - - symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' +multirowCell :: PandocMonad m => LP m Cell +multirowCell = controlSeq "multirow" >> do + -- Full prototype for \multirow macro is: + -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} + -- However, everything except `nrows` and `text` make + -- sense in the context of the Pandoc AST + _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position + nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related + _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning + content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' + return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content + +multicolumnCell :: PandocMonad m => LP m Cell +multicolumnCell = controlSeq "multicolumn" >> do + span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced + alignment <- symbol '{' *> cellAlignment <* symbol '}' + + let singleCell = do + content <- plainify <$> blocks + return $ cell alignment (RowSpan 1) (ColSpan span') content + + -- Two possible contents: either a \multirow cell, or content. + -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} + -- Note that a \multirow cell can be nested in a \multicolumn, + -- but not the other way around. See #6603 + let nestedCell = do + (Cell _ _ (RowSpan rs) _ bs) <- multirowCell + return $ cell + alignment + (RowSpan $ rs) + (ColSpan $ span') + (fromList bs) + + symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' -- Parse a simple cell, i.e. not multirow/multicol parseSimpleCell :: PandocMonad m => LP m Cell |