diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-27 21:40:56 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-27 21:40:56 -0800 |
commit | 08231f5cdd16e31d38d9d6bf59bc5ca12638b438 (patch) | |
tree | 5b67f617d33a5aaad2398eac2d944659a7a0edee | |
parent | 925815bb33b462e1a4c19a8e2c617d403dec0ce7 (diff) | |
download | pandoc-08231f5cdd16e31d38d9d6bf59bc5ca12638b438.tar.gz |
Factor out T.P.Readers.LaTeX.Table.
-rw-r--r-- | pandoc.cabal | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 368 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 33 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Table.hs | 373 |
4 files changed, 412 insertions, 363 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 68edb2b64..9149c4f8f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -631,6 +631,7 @@ library Text.Pandoc.Readers.LaTeX.Lang, Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Accent, + Text.Pandoc.Readers.LaTeX.Table, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 51c031f78..831c5df05 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -32,7 +31,6 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.Functor (($>)) import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) @@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) +import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) import Text.Pandoc.Readers.LaTeX.SIunitx @@ -551,12 +550,8 @@ inlineCommand' = try $ do <|> ignore rawcommand lookupListDefault raw names inlineCommands - tok :: PandocMonad m => LP m Inlines -tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' - where singleChar' = do - Tok _ _ t <- singleChar - return $ str t +tok = tokWith inline opt :: PandocMonad m => LP m Inlines opt = do @@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList , "pagebreak" ] -label :: PandocMonad m => LP m () -label = do - controlSeq "label" - t <- braced - updateState $ \st -> st{ sLastLabel = Just $ untokenize t } - dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced @@ -1421,13 +1410,6 @@ bracketedNum = do Just i -> return i _ -> return 0 -setCaption :: PandocMonad m => LP m () -setCaption = try $ do - skipopts - ils <- tok - optional $ try $ spaces *> label - updateState $ \st -> st{ sCaption = Just ils } - looseItem :: PandocMonad m => LP m Blocks looseItem = do inListItem <- sInListItem <$> getState @@ -1441,10 +1423,6 @@ epigraph = do p2 <- grouped block return $ divWith ("", ["epigraph"], []) (p1 <> p2) -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ sCaption = Nothing - , sLastLabel = Nothing } - section :: PandocMonad m => Attr -> Int -> LP m Blocks section (ident, classes, kvs) lvl = do skipopts @@ -1585,7 +1563,7 @@ blockCommands = M.fromList , ("item", looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", para . trimInlines <$> (skipopts *> tok)) - , ("caption", mempty <$ setCaption) + , ("caption", mempty <$ setCaption inline) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= @@ -1623,7 +1601,8 @@ blockCommands = M.fromList environments :: PandocMonad m => M.Map Text (LP m Blocks) -environments = M.fromList +environments = M.union (tableEnvironments blocks inline) $ + M.fromList [ ("document", env "document" blocks <* skipMany anyTok) , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) , ("sloppypar", env "sloppypar" blocks) @@ -1633,13 +1612,6 @@ environments = M.fromList , ("figure", env "figure" $ skipopts *> figure) , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) , ("center", divWith ("", ["center"], []) <$> env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - skipopts *> resetCaption *> blocks >>= addTableCaption) - , ("tabular*", env "tabular*" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) @@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils] italicize (Plain ils) = Plain [Emph ils] italicize x = x -env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* end_ name - rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions @@ -2045,333 +2014,6 @@ orderedList' = try $ do bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs --- tables - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces - optional opt - return () - -lbreak :: PandocMonad m => LP m Tok -lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") - <* skipopts <* spaces - -amp :: PandocMonad m => LP m Tok -amp = symbol '&' - --- Split a Word into individual Symbols (for parseAligns) -splitWordTok :: PandocMonad m => LP m () -splitWordTok = do - inp <- getInput - case inp of - (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest - _ -> return () - -parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] -parseAligns = try $ do - let maybeBar = skipMany - (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) - let cAlign = AlignCenter <$ symbol 'c' - let lAlign = AlignLeft <$ symbol 'l' - let rAlign = AlignRight <$ symbol 'r' - let parAlign = AlignLeft <$ symbol 'p' - -- aligns from tabularx - let xAlign = AlignLeft <$ symbol 'X' - let mAlign = AlignLeft <$ symbol 'm' - let bAlign = AlignLeft <$ symbol 'b' - let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign ) - let alignPrefix = symbol '>' >> braced - let alignSuffix = symbol '<' >> braced - let colWidth = try $ do - symbol '{' - ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") - spaces - symbol '}' - return $ safeRead ds - let alignSpec = do - pref <- option [] alignPrefix - spaces - al <- alignChar - width <- colWidth <|> option Nothing (do s <- untokenize <$> braced - pos <- getPosition - report $ SkippedContent s pos - return Nothing) - spaces - suff <- option [] alignSuffix - return (al, width, (pref, suff)) - let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro - symbol '*' - spaces - ds <- trim . untokenize <$> braced - spaces - spec <- braced - case safeRead ds of - Just n -> - getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" - bgroup - spaces - maybeBar - aligns' <- many $ try $ spaces >> optional starAlign >> - (alignSpec <* maybeBar) - spaces - egroup - spaces - return $ map toSpec aligns' - where - toColWidth (Just w) | w > 0 = ColWidth w - toColWidth _ = ColWidthDefault - toSpec (x, y, z) = (x, toColWidth y, z) - --- N.B. this parser returns a Row that may have erroneous empty cells --- in it. See the note above fixTableHead for details. -parseTableRow :: PandocMonad m - => Text -- ^ table environment name - -> [([Tok], [Tok])] -- ^ pref/suffixes - -> LP m Row -parseTableRow envname prefsufs = do - notFollowedBy (spaces *> end_ envname) - -- add prefixes and suffixes in token stream: - let celltoks (pref, suff) = do - prefpos <- getPosition - contents <- mconcat <$> - many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711 - <|> - snd <$> withRaw (inlineEnvironment <|> dollarsMath) - <|> - (do notFollowedBy - (() <$ amp <|> () <$ lbreak <|> end_ envname) - count 1 anyTok) ) - - suffpos <- getPosition - option [] (count 1 amp) - return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff - rawcells <- mapM celltoks prefsufs - cells <- mapM (parseFromToks parseTableCell) rawcells - spaces - return $ Row nullAttr cells - -parseTableCell :: PandocMonad m => LP m Cell -parseTableCell = do - spaces - updateState $ \st -> st{ sInTableCell = True } - cell' <- multicolumnCell - <|> multirowCell - <|> parseSimpleCell - <|> parseEmptyCell - updateState $ \st -> st{ sInTableCell = False } - spaces - return cell' - where - -- The parsing of empty cells is important in LaTeX, especially when dealing - -- with multirow/multicolumn. See #6603. - parseEmptyCell = spaces $> emptyCell - -cellAlignment :: PandocMonad m => LP m Alignment -cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') - where - alignment = do - c <- untoken <$> singleChar - return $ case c of - "l" -> AlignLeft - "r" -> AlignRight - "c" -> AlignCenter - "*" -> AlignDefault - _ -> AlignDefault - -plainify :: Blocks -> Blocks -plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - -multirowCell :: PandocMonad m => LP m Cell -multirowCell = controlSeq "multirow" >> do - -- Full prototype for \multirow macro is: - -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} - -- However, everything except `nrows` and `text` make - -- sense in the context of the Pandoc AST - _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position - nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related - _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning - content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' - return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content - -multicolumnCell :: PandocMonad m => LP m Cell -multicolumnCell = controlSeq "multicolumn" >> do - span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced - alignment <- symbol '{' *> cellAlignment <* symbol '}' - - let singleCell = do - content <- plainify <$> blocks - return $ cell alignment (RowSpan 1) (ColSpan span') content - - -- Two possible contents: either a \multirow cell, or content. - -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} - -- Note that a \multirow cell can be nested in a \multicolumn, - -- but not the other way around. See #6603 - let nestedCell = do - (Cell _ _ (RowSpan rs) _ bs) <- multirowCell - return $ cell - alignment - (RowSpan rs) - (ColSpan span') - (fromList bs) - - symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' - --- Parse a simple cell, i.e. not multirow/multicol -parseSimpleCell :: PandocMonad m => LP m Cell -parseSimpleCell = simpleCell <$> (plainify <$> blocks) - --- LaTeX tables are stored with empty cells underneath multirow cells --- denoting the grid spaces taken up by them. More specifically, if a --- cell spans m rows, then it will overwrite all the cells in the --- columns it spans for (m-1) rows underneath it, requiring padding --- cells in these places. These padding cells need to be removed for --- proper table reading. See #6603. --- --- These fixTable functions do not otherwise fix up malformed --- input tables: that is left to the table builder. -fixTableHead :: TableHead -> TableHead -fixTableHead (TableHead attr rows) = TableHead attr rows' - where - rows' = fixTableRows rows - -fixTableBody :: TableBody -> TableBody -fixTableBody (TableBody attr rhc th tb) - = TableBody attr rhc th' tb' - where - th' = fixTableRows th - tb' = fixTableRows tb - -fixTableRows :: [Row] -> [Row] -fixTableRows = fixTableRows' $ repeat Nothing - where - fixTableRows' oldHang (Row attr cells : rs) - = let (newHang, cells') = fixTableRow oldHang cells - rs' = fixTableRows' newHang rs - in Row attr cells' : rs' - fixTableRows' _ [] = [] - --- The overhang is represented as Just (relative cell dimensions) or --- Nothing for an empty grid space. -fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) -fixTableRow oldHang cells - -- If there's overhang, drop cells until their total width meets the - -- width of the occupied grid spaces (or we run out) - | (n, prefHang, restHang) <- splitHang oldHang - , n > 0 - = let cells' = dropToWidth getCellW n cells - (restHang', cells'') = fixTableRow restHang cells' - in (prefHang restHang', cells'') - -- Otherwise record the overhang of a pending cell and fix the rest - -- of the row - | c@(Cell _ _ h w _):cells' <- cells - = let h' = max 1 h - w' = max 1 w - oldHang' = dropToWidth getHangW w' oldHang - (newHang, cells'') = fixTableRow oldHang' cells' - in (toHang w' h' <> newHang, c : cells'') - | otherwise - = (oldHang, []) - where - getCellW (Cell _ _ _ w _) = w - getHangW = maybe 1 fst - getCS (ColSpan n) = n - - toHang c r - | r > 1 = [Just (c, r)] - | otherwise = replicate (getCS c) Nothing - - -- Take the prefix of the overhang list representing filled grid - -- spaces. Also return the remainder and the length of this prefix. - splitHang = splitHang' 0 id - - splitHang' !n l (Just (c, r):xs) - = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs - splitHang' n l xs = (n, l, xs) - - -- Drop list items until the total width of the dropped items - -- exceeds the passed width. - dropToWidth _ n l | n < 1 = l - dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs - dropToWidth _ _ [] = [] - -simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks -simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces >> tok) - skipopts - colspecs <- parseAligns - let (aligns, widths, prefsufs) = unzip3 colspecs - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - skipMany hline - spaces - header' <- option [] . try . fmap (:[]) $ - parseTableRow envname prefsufs <* lbreak <* many1 hline - spaces - rows <- sepEndBy (parseTableRow envname prefsufs) - (lbreak <* optional (skipMany hline)) - spaces - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - lookAhead $ controlSeq "end" -- make sure we're at end - let th = fixTableHead $ TableHead nullAttr header' - let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] - let tf = TableFoot nullAttr [] - return $ table emptyCaption (zip aligns widths) th tbs tf - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table attr c spec th tb tf) = do - st <- getState - let mblabel = sLastLabel st - capt <- case (sCaption st, mblabel) of - (Just ils, Nothing) -> return $ caption Nothing (plain ils) - (Just ils, Just lab) -> do - num <- getNextNumber sLastTableNum - setState - st{ sLastTableNum = num - , sLabels = M.insert lab - [Str (renderDottedNum num)] - (sLabels st) } - return $ caption Nothing (plain ils) -- add number?? - (Nothing, _) -> return c - let attr' = case (attr, mblabel) of - ((_,classes,kvs), Just ident) -> - (ident,classes,kvs) - _ -> attr - return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf - go x = return x - --- TODO: For now we add a Div to contain table attributes, since --- most writers don't do anything yet with attributes on Table. --- This can be removed when that changes. -addAttrDiv :: Attr -> Block -> Block -addAttrDiv ("",[],[]) b = b -addAttrDiv attr b = Div attr [b] - block :: PandocMonad m => LP m Blocks block = do res <- (mempty <$ spaces1) diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index c2e10570d..4a9fa03ad 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , comment , anyTok , singleChar + , tokWith , specialChars , endline , blankline @@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawopt , overlaySpecification , getNextNumber + , label + , setCaption + , resetCaption + , env ) where import Control.Applicative (many, (<|>)) @@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do Just n -> [n, 1] Nothing -> [1] +label :: PandocMonad m => LP m () +label = do + controlSeq "label" + t <- braced + updateState $ \st -> st{ sLastLabel = Just $ untokenize t } + +setCaption :: PandocMonad m => LP m Inlines -> LP m () +setCaption inline = try $ do + skipopts + ils <- tokWith inline + optional $ try $ spaces *> label + updateState $ \st -> st{ sCaption = Just ils } + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing + , sLastLabel = Nothing } + +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name + +tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines +tokWith inlineParser = try $ spaces >> + grouped inlineParser + <|> (lookAhead anyControlSeq >> inlineParser) + <|> singleChar' + where singleChar' = do + Tok _ _ t <- singleChar + return $ str t diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs new file mode 100644 index 000000000..2ea9caf58 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -0,0 +1,373 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Table + ( tableEnvironments ) +where + +import Data.Functor (($>)) +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import qualified Data.Text as T +import Control.Applicative ((<|>), optional, many) +import Control.Monad (when, void) +import Text.Pandoc.Shared (safeRead, trim) +import Text.Pandoc.Logging (LogMessage(SkippedContent)) +import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) + +tableEnvironments :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> M.Map Text (LP m Blocks) +tableEnvironments blocks inline = + M.fromList + [ ("longtable", env "longtable" $ + resetCaption *> + simpTable blocks inline "longtable" False >>= addTableCaption) + , ("table", env "table" $ + skipopts *> resetCaption *> blocks >>= addTableCaption) + , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True) + , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False) + ] + +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional rawopt + return () + +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") + <* skipopts <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' + +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest + _ -> return () + +parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] +parseAligns = try $ do + let maybeBar = skipMany + (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced + let colWidth = try $ do + symbol '{' + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") + spaces + symbol '}' + return $ safeRead ds + let alignSpec = do + pref <- option [] alignPrefix + spaces + al <- alignChar + width <- colWidth <|> option Nothing (do s <- untokenize <$> braced + pos <- getPosition + report $ SkippedContent s pos + return Nothing) + spaces + suff <- option [] alignSuffix + return (al, width, (pref, suff)) + let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro + symbol '*' + spaces + ds <- trim . untokenize <$> braced + spaces + spec <- braced + case safeRead ds of + Just n -> + getInput >>= setInput . (mconcat (replicate n spec) ++) + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" + bgroup + spaces + maybeBar + aligns' <- many $ try $ spaces >> optional starAlign >> + (alignSpec <* maybeBar) + spaces + egroup + spaces + return $ map toSpec aligns' + where + toColWidth (Just w) | w > 0 = ColWidth w + toColWidth _ = ColWidthDefault + toSpec (x, y, z) = (x, toColWidth y, z) + +-- N.B. this parser returns a Row that may have erroneous empty cells +-- in it. See the note above fixTableHead for details. +parseTableRow :: PandocMonad m + => LP m Blocks -- ^ block parser + -> LP m Inlines -- ^ inline parser + -> Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes + -> LP m Row +parseTableRow blocks inline envname prefsufs = do + notFollowedBy (spaces *> end_ envname) + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- mconcat <$> + many ( snd <$> withRaw + ((lookAhead (controlSeq "parbox") >> + void blocks) -- #5711 + <|> + (lookAhead (controlSeq "begin") >> void inline) + <|> + (lookAhead (symbol '$') >> void inline)) + <|> + (do notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + count 1 anyTok) ) + + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff + rawcells <- mapM celltoks prefsufs + cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells + spaces + return $ Row nullAttr cells + +parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell +parseTableCell blocks = do + spaces + updateState $ \st -> st{ sInTableCell = True } + cell' <- multicolumnCell blocks + <|> multirowCell blocks + <|> parseSimpleCell + <|> parseEmptyCell + updateState $ \st -> st{ sInTableCell = False } + spaces + return cell' + where + -- The parsing of empty cells is important in LaTeX, especially when dealing + -- with multirow/multicolumn. See #6603. + parseEmptyCell = spaces $> emptyCell + parseSimpleCell = simpleCell <$> (plainify <$> blocks) + + +cellAlignment :: PandocMonad m => LP m Alignment +cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') + where + alignment = do + c <- untoken <$> singleChar + return $ case c of + "l" -> AlignLeft + "r" -> AlignRight + "c" -> AlignCenter + "*" -> AlignDefault + _ -> AlignDefault + +plainify :: Blocks -> Blocks +plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + +multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell +multirowCell blocks = controlSeq "multirow" >> do + -- Full prototype for \multirow macro is: + -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} + -- However, everything except `nrows` and `text` make + -- sense in the context of the Pandoc AST + _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position + nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related + _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning + content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' + return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content + +multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell +multicolumnCell blocks = controlSeq "multicolumn" >> do + span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced + alignment <- symbol '{' *> cellAlignment <* symbol '}' + + let singleCell = do + content <- plainify <$> blocks + return $ cell alignment (RowSpan 1) (ColSpan span') content + + -- Two possible contents: either a \multirow cell, or content. + -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} + -- Note that a \multirow cell can be nested in a \multicolumn, + -- but not the other way around. See #6603 + let nestedCell = do + (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks + return $ cell + alignment + (RowSpan rs) + (ColSpan span') + (fromList bs) + + symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' + +-- LaTeX tables are stored with empty cells underneath multirow cells +-- denoting the grid spaces taken up by them. More specifically, if a +-- cell spans m rows, then it will overwrite all the cells in the +-- columns it spans for (m-1) rows underneath it, requiring padding +-- cells in these places. These padding cells need to be removed for +-- proper table reading. See #6603. +-- +-- These fixTable functions do not otherwise fix up malformed +-- input tables: that is left to the table builder. +fixTableHead :: TableHead -> TableHead +fixTableHead (TableHead attr rows) = TableHead attr rows' + where + rows' = fixTableRows rows + +fixTableBody :: TableBody -> TableBody +fixTableBody (TableBody attr rhc th tb) + = TableBody attr rhc th' tb' + where + th' = fixTableRows th + tb' = fixTableRows tb + +fixTableRows :: [Row] -> [Row] +fixTableRows = fixTableRows' $ repeat Nothing + where + fixTableRows' oldHang (Row attr cells : rs) + = let (newHang, cells') = fixTableRow oldHang cells + rs' = fixTableRows' newHang rs + in Row attr cells' : rs' + fixTableRows' _ [] = [] + +-- The overhang is represented as Just (relative cell dimensions) or +-- Nothing for an empty grid space. +fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) +fixTableRow oldHang cells + -- If there's overhang, drop cells until their total width meets the + -- width of the occupied grid spaces (or we run out) + | (n, prefHang, restHang) <- splitHang oldHang + , n > 0 + = let cells' = dropToWidth getCellW n cells + (restHang', cells'') = fixTableRow restHang cells' + in (prefHang restHang', cells'') + -- Otherwise record the overhang of a pending cell and fix the rest + -- of the row + | c@(Cell _ _ h w _):cells' <- cells + = let h' = max 1 h + w' = max 1 w + oldHang' = dropToWidth getHangW w' oldHang + (newHang, cells'') = fixTableRow oldHang' cells' + in (toHang w' h' <> newHang, c : cells'') + | otherwise + = (oldHang, []) + where + getCellW (Cell _ _ _ w _) = w + getHangW = maybe 1 fst + getCS (ColSpan n) = n + + toHang c r + | r > 1 = [Just (c, r)] + | otherwise = replicate (getCS c) Nothing + + -- Take the prefix of the overhang list representing filled grid + -- spaces. Also return the remainder and the length of this prefix. + splitHang = splitHang' 0 id + + splitHang' !n l (Just (c, r):xs) + = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs + splitHang' n l xs = (n, l, xs) + + -- Drop list items until the total width of the dropped items + -- exceeds the passed width. + dropToWidth _ n l | n < 1 = l + dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs + dropToWidth _ _ [] = [] + +simpTable :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> Text + -> Bool + -> LP m Blocks +simpTable blocks inline envname hasWidthParameter = try $ do + when hasWidthParameter $ () <$ tokWith inline + skipopts + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + skipMany hline + spaces + header' <- option [] . try . fmap (:[]) $ + parseTableRow blocks inline envname prefsufs <* + lbreak <* many1 hline + spaces + rows <- sepEndBy (parseTableRow blocks inline envname prefsufs) + (lbreak <* optional (skipMany hline)) + spaces + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + lookAhead $ controlSeq "end" -- make sure we're at end + let th = fixTableHead $ TableHead nullAttr header' + let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] + let tf = TableFoot nullAttr [] + return $ table emptyCaption (zip aligns widths) th tbs tf + +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table attr c spec th tb tf) = do + st <- getState + let mblabel = sLastLabel st + capt <- case (sCaption st, mblabel) of + (Just ils, Nothing) -> return $ caption Nothing (plain ils) + (Just ils, Just lab) -> do + num <- getNextNumber sLastTableNum + setState + st{ sLastTableNum = num + , sLabels = M.insert lab + [Str (renderDottedNum num)] + (sLabels st) } + return $ caption Nothing (plain ils) -- add number?? + (Nothing, _) -> return c + let attr' = case (attr, mblabel) of + ((_,classes,kvs), Just ident) -> + (ident,classes,kvs) + _ -> attr + return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf + go x = return x + +-- TODO: For now we add a Div to contain table attributes, since +-- most writers don't do anything yet with attributes on Table. +-- This can be removed when that changes. +addAttrDiv :: Attr -> Block -> Block +addAttrDiv ("",[],[]) b = b +addAttrDiv attr b = Div attr [b] |