{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.HTML.Table Copyright : © 2006-2020 John MacFarlane, 2020 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel Stability : alpha Portability : portable HTML table parser. -} module Text.Pandoc.Readers.HTML.Table (pTable') where import Control.Monad (guard) import Data.Maybe (fromMaybe) import Data.Text (Text) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks) import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Parsing ( (<|>), eof, many, many1, manyTill, option, optional, skipMany, try) import Text.Pandoc.Readers.HTML.Parsing import Text.Pandoc.Readers.HTML.Types (TagParser) import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- | Parses a @@ element, returning the column's width. Defaults to -- @'ColWidthDefault'@ if the width is not set or cannot be determined. pCol :: PandocMonad m => TagParser m ColWidth pCol = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) let attribs = toStringAttr attribs' skipMany pBlank optional $ pSatisfy (matchTagClose "col") skipMany pBlank let width = case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> fromMaybe 0.0 $ safeRead (T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs) _ -> 0.0 Just (T.unsnoc -> Just (xs, '%')) -> fromMaybe 0.0 $ safeRead xs _ -> 0.0 if width > 0.0 then return $ ColWidth $ width / 100.0 else return ColWidthDefault pColgroup :: PandocMonad m => TagParser m [ColWidth] pColgroup = try $ do pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank -- | Parses a simple HTML table pTable' :: PandocMonad m => TagParser m Blocks -- ^ Caption parser -> (Text -> TagParser m [(Alignment, Blocks)]) -- ^ Table cell parser -> TagParser m Blocks pTable' block pCell = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "table" []) let attribs = toAttr attribs' skipMany pBlank caption <- option mempty $ pInTags "caption" block <* skipMany pBlank widths' <- (mconcat <$> many1 pColgroup) <|> many pCol let pTh = option [] $ pInTags "tr" (pCell "th") pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th") pTBody = pInTag True "tbody" $ many1 pTr head'' <- pInTag False "thead" (option [] pTr) <|> pInTag True "thead" pTh head' <- map snd <$> pInTag True "tbody" (if null head'' then pTh else return head'') topfoot <- option [] $ pInTag False "tfoot" $ many pTr rowsLs <- many pTBody bottomfoot <- option [] $ pInTag False "tfoot" $ many pTr TagClose _ <- pSatisfy (matchTagClose "table") let rows'' = concat rowsLs <> topfoot <> bottomfoot let rows''' = map (map snd) rows'' -- fail on empty table guard $ not $ null head' && null rows''' let isSimple = onlySimpleTableCells $ fmap B.toList <$> head':rows''' let cols = if null head' then maximum (map length rows''') else length head' -- add empty cells to short rows let addEmpties r = case cols - length r of n | n > 0 -> r <> replicate n mempty | otherwise -> r let rows = map addEmpties rows''' let aligns = case rows'' of (cs:_) -> take cols $ map fst cs ++ repeat AlignDefault _ -> replicate cols AlignDefault let widths = if null widths' then if isSimple then replicate cols ColWidthDefault else replicate cols (ColWidth (1.0 / fromIntegral cols)) else widths' let toRow = Row nullAttr . map B.simpleCell toHeaderRow l = [toRow l | not (null l)] return $ B.tableWith attribs (B.simpleCaption caption) (zip aligns widths) (TableHead nullAttr $ toHeaderRow head') [TableBody nullAttr 0 [] $ map toRow rows] (TableFoot nullAttr [])