{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns      #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Table
   Copyright   : © 2006-2021 John MacFarlane,
                   2020-2021 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
   Stability   : alpha
   Portability : portable

HTML table parser.
-}
module Text.Pandoc.Readers.HTML.Table (pTable) where

import Control.Applicative ((<|>))
import Data.Maybe (fromMaybe, isJust)
import Data.Either (lefts, rights)
import Data.List.NonEmpty (nonEmpty)
import Data.Text (Text)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks)
import Text.Pandoc.CSS (cssAttributes)
import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
  ( eof, lookAhead, many, many1, manyTill, option, optional
  , optionMaybe, skipMany, try )
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Control.Monad (guard)

-- | Parses a @<col>@ element, returning the column's width.
-- An Either value is used:  Left i means a "relative length" with
-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
-- if the width is not set or cannot be determined.
pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
pCol = try $ do
  TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
  let attribs = toStringAttr attribs'
  skipMany pBlank
  optional $ pSatisfy (matchTagClose "col")
  skipMany pBlank
  return $ case lookup "width" attribs of
                Nothing -> case lookup "style" attribs of
                  Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
                    maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0))
                      $ safeRead (T.filter
                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
                  _ -> Right ColWidthDefault
                Just (T.unsnoc -> Just (xs, '*')) ->
                  maybe (Left 1) Left $ safeRead xs
                Just (T.unsnoc -> Just (xs, '%')) ->
                  maybe (Right ColWidthDefault)
                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
                _ -> Right ColWidthDefault

pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
pColgroup = try $ do
  pSatisfy (matchTagOpen "colgroup" [])
  skipMany pBlank
  manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank

resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
resolveRelativeLengths ws =
  let remaining = 1 - sum (map getColWidth $ rights ws)
      relatives = sum $ lefts ws
      relUnit = remaining / fromIntegral relatives
      toColWidth (Right x) = x
      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
  in  map toColWidth ws

getColWidth :: ColWidth -> Double
getColWidth ColWidthDefault = 0
getColWidth (ColWidth w) = w

data CellType
  = HeaderCell
  | BodyCell
  deriving Eq

pCell :: PandocMonad m
      => TagParser m Blocks
      -> CellType
      -> TagParser m (CellType, Cell)
pCell block celltype = try $ do
  let celltype' = case celltype of
        HeaderCell -> "th"
        BodyCell   -> "td"
  skipMany pBlank
  TagOpen _ attribs <- lookAhead $ pSatisfy (matchTagOpen celltype' [])
  let cssAttribs = maybe [] cssAttributes $ lookup "style" attribs
  let align = case lookup "align" attribs <|>
                   lookup "text-align" cssAttribs of
                Just "left"   -> AlignLeft
                Just "right"  -> AlignRight
                Just "center" -> AlignCenter
                _             -> AlignDefault
  let rowspan = RowSpan . fromMaybe 1 $
                safeRead =<< lookup "rowspan" attribs
  let colspan = ColSpan . fromMaybe 1 $
                safeRead =<< lookup "colspan" attribs
  res <- pInTags celltype' block
  skipMany pBlank
  let handledAttribs = ["align", "colspan", "rowspan", "text-align"]
      attribs' = foldr go [] attribs
      go kv@(k, _) acc = case k of
        "style" -> case filter ((/= "text-align") . fst) cssAttribs of
                     [] -> acc
                     cs -> ("style", toStyleString cs) : acc
        -- drop attrib if it's already handled
        _ | k `elem` handledAttribs -> acc
        _ -> kv : acc
  return (celltype, B.cellWith (toAttr attribs') align rowspan colspan res)

-- | Create a style attribute string from a list of CSS attributes
toStyleString :: [(Text, Text)] -> Text
toStyleString = T.intercalate "; " . map (\(k, v) -> k <> ": " <> v)

-- | Parses a normal table row; returns the row together with the number
-- of header cells at the beginning of the row.
pRow :: PandocMonad m
     => TagParser m Blocks
     -> TagParser m (RowHeadColumns, B.Row)
pRow block = try $ do
  skipMany pBlank
  TagOpen _ attribs <- pSatisfy (matchTagOpen "tr" []) <* skipMany pBlank
  cells <- many (pCell block BodyCell <|> pCell block HeaderCell)
  TagClose _ <- pSatisfy (matchTagClose "tr")
  return ( RowHeadColumns $ length (takeWhile ((== HeaderCell) . fst) cells)
         , Row (toAttr attribs) $ map snd cells
         )

-- | Parses a header row, i.e., a row which containing nothing but
-- @<th>@ elements.
pHeaderRow :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m B.Row
pHeaderRow block = try $ do
  skipMany pBlank
  let pThs = map snd <$> many (pCell block HeaderCell)
  let mkRow (attribs, cells) = Row (toAttr attribs) cells
  mkRow <$> pInTagWithAttribs TagsRequired "tr" pThs

-- | Parses a table head. If there is no @thead@ element, this looks for
-- a row of @<th>@-only elements as the first line of the table.
pTableHead :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableHead
pTableHead block = try $ do
  skipMany pBlank
  let pRows = many (pRow block)
  let pThead = pInTagWithAttribs ClosingTagOptional "thead" pRows
  optionMaybe pThead >>= \case
    Just (attribs, rows) ->
      return $ TableHead (toAttr attribs) $ map snd rows
    Nothing -> mkTableHead <$> optionMaybe (pHeaderRow block)
               where
                 mkTableHead = TableHead nullAttr . \case
                   -- Use row as header only if it's non-empty
                   Just row@(Row _ (_:_)) -> [row]
                   _                      -> []

-- | Parses a table foot
pTableFoot :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableFoot
pTableFoot block = try $ do
  skipMany pBlank
  TagOpen _ attribs <- pSatisfy (matchTagOpen "tfoot" []) <* skipMany pBlank
  rows <- many (fmap snd $ pRow block <* skipMany pBlank)
  optional $ pSatisfy (matchTagClose "tfoot")
  return $ TableFoot (toAttr attribs) rows

-- | Parses a table body
pTableBody :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableBody
pTableBody block = try $ do
  skipMany pBlank
  mbattribs <- option Nothing $ Just . getAttribs <$>
                 pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank
  bodyheads <- many (pHeaderRow block)
  (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank)
  optional $ pSatisfy (matchTagClose "tbody")
  guard $ isJust mbattribs || not (null bodyheads && null rows)
  let attribs = fromMaybe [] mbattribs
  return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows
  where
    getAttribs (TagOpen _ attribs) = attribs
    getAttribs _ = []

-- | Parses a simple HTML table
pTable :: PandocMonad m
       => TagParser m Blocks -- ^ Caption and cell contents parser
       -> TagParser m Blocks
pTable block = try $ do
  TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
  caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
  widths <- resolveRelativeLengths <$>
              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
  thead   <- pTableHead block               <* skipMany pBlank
  topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
  tbodies <- many (pTableBody block)        <* skipMany pBlank
  botfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
  TagClose _ <- pSatisfy (matchTagClose "table")
  let tfoot = fromMaybe (TableFoot nullAttr []) $ topfoot <|> botfoot
  case normalize widths thead tbodies tfoot of
    Left err -> fail err
    Right (colspecs, thead', tbodies', tfoot') -> return $
      B.tableWith (toAttr attribs)
                  (B.simpleCaption caption)
                  colspecs
                  thead'
                  tbodies'
                  tfoot'
data TableType
  = SimpleTable
  | NormalTable

tableType :: [[Cell]] -> TableType
tableType cells =
  if onlySimpleTableCells $ map (map cellContents) cells
  then SimpleTable
  else NormalTable
  where
    cellContents :: Cell -> [Block]
    cellContents (Cell _ _ _ _ bs) = bs

normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
          -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
normalize widths head' bodies foot = do
  let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot
  let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs
  let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells
  let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows
  let tblType = tableType (map rowCells rows)
  -- fail on empty table
  if null rows
    then Left "empty table"
    else Right
         ( zip (calculateAlignments ncols bodies)
               (normalizeColWidths ncols tblType widths)
         , head'
         , bodies
         , foot
         )

normalizeColWidths :: Int -> TableType -> [ColWidth] -> [ColWidth]
normalizeColWidths ncols tblType = \case
  [] -> case tblType of
          SimpleTable -> replicate ncols ColWidthDefault
          NormalTable -> replicate ncols (ColWidth $ 1 / fromIntegral ncols)
  widths -> widths

calculateAlignments :: Int -> [TableBody] -> [Alignment]
calculateAlignments cols tbodies =
  case cells of
    cs:_ -> take cols $ concatMap cellAligns cs ++ repeat AlignDefault
    _    -> replicate cols AlignDefault
  where
    cells :: [[Cell]]
    cells = concatMap bodyRowCells tbodies
    cellAligns :: Cell -> [Alignment]
    cellAligns (Cell _ align _ (ColSpan cs) _) = replicate cs align

bodyRowCells :: TableBody -> [[Cell]]
bodyRowCells = map rowCells . bodyRows

headRows :: TableHead -> [B.Row]
headRows (TableHead _ rows) = rows

bodyRows :: TableBody -> [B.Row]
bodyRows (TableBody _ _ headerRows bodyRows') = headerRows <> bodyRows'

footRows :: TableFoot -> [B.Row]
footRows (TableFoot _ rows) = rows

rowCells :: B.Row -> [Cell]
rowCells (Row _ cells) = cells