From 1af2cfb2873c5bb6ddd9fc00d076088b2e62af30 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 22 May 2021 21:56:10 -0700
Subject: Handle relative lengths (e.g. `2*`) in HTML column widths.

See <https://www.w3.org/TR/html4/types.html#h-6.6>.

"A relative length has the form "i*", where "i" is an integer. When
allotting space among elements competing for that space, user agents
allot pixel and percentage lengths first, then divide up remaining
available space among relative lengths. Each relative length receives a
portion of the available space that is proportional to the integer
preceding the "*". The value "*" is equivalent to "1*". Thus, if 60
pixels of space are available after the user agent allots pixel and
percentage space, and the competing relative lengths are 1*, 2*, and 3*,
the 1* will be alloted 10 pixels, the 2* will be alloted 20 pixels, and
the 3* will be alloted 30 pixels."

Closes #4063.
---
 src/Text/Pandoc/Readers/HTML/Table.hs | 47 ++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 14 deletions(-)

(limited to 'src/Text')

diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index ad0b51253..3a569dd0a 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where
 
 import Control.Applicative ((<|>))
 import Data.Maybe (fromMaybe)
+import Data.Either (lefts, rights)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Text (Text)
 import Text.HTML.TagSoup
@@ -33,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
 
--- | Parses a @<col>@ element, returning the column's width. Defaults to
--- @'ColWidthDefault'@ if the width is not set or cannot be determined.
-pCol :: PandocMonad m => TagParser m ColWidth
+-- | Parses a @<col>@ element, returning the column's width.
+-- An Either value is used:  Left i means a "relative length" with
+-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
+-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
+-- if the width is not set or cannot be determined.
+pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
 pCol = try $ do
   TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" [])
   let attribs = toStringAttr attribs'
   skipMany pBlank
   optional $ pSatisfy (matchTagClose "col")
   skipMany pBlank
-  let width = case lookup "width" attribs of
+  return $ case lookup "width" attribs of
                 Nothing -> case lookup "style" attribs of
                   Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
-                    fromMaybe 0.0 $ safeRead (T.filter
-                      (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
-                  _ -> 0.0
+                    maybe (Right ColWidthDefault) (Right . ColWidth)
+                      $ safeRead (T.filter
+                                   (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+                  _ -> Right ColWidthDefault
+                Just (T.unsnoc -> Just (xs, '*')) ->
+                  maybe (Left 1) Left $ safeRead xs
                 Just (T.unsnoc -> Just (xs, '%')) ->
-                  fromMaybe 0.0 $ safeRead xs
-                _ -> 0.0
-  if width > 0.0
-    then return $ ColWidth $ width / 100.0
-    else return ColWidthDefault
+                  maybe (Right ColWidthDefault)
+                        (Right . ColWidth . (/ 100.0)) $ safeRead xs
+                _ -> Right ColWidthDefault
 
-pColgroup :: PandocMonad m => TagParser m [ColWidth]
+pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
 pColgroup = try $ do
   pSatisfy (matchTagOpen "colgroup" [])
   skipMany pBlank
   manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
 
+resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
+resolveRelativeLengths ws =
+  let remaining = 1 - sum (map getColWidth $ rights ws)
+      relatives = sum $ lefts ws
+      relUnit = remaining / fromIntegral relatives
+      toColWidth (Right x) = x
+      toColWidth (Left i) = ColWidth (fromIntegral i * relUnit)
+  in  map toColWidth ws
+
+getColWidth :: ColWidth -> Double
+getColWidth ColWidthDefault = 0
+getColWidth (ColWidth w) = w
+
 data CellType
   = HeaderCell
   | BodyCell
@@ -182,7 +200,8 @@ pTable :: PandocMonad m
 pTable block = try $ do
   TagOpen _ attribs <- pSatisfy (matchTagOpen "table" [])  <* skipMany pBlank
   caption <- option mempty $ pInTags "caption" block       <* skipMany pBlank
-  widths  <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
+  widths <- resolveRelativeLengths <$>
+              ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank
   thead   <- pTableHead block               <* skipMany pBlank
   topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank
   tbodies <- many (pTableBody block)        <* skipMany pBlank
-- 
cgit v1.2.3