diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-05-22 21:56:10 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-05-22 22:03:54 -0700 |
commit | 1af2cfb2873c5bb6ddd9fc00d076088b2e62af30 (patch) | |
tree | bedfe3a41ffd0fb4f88560d8e834b74a486ccbba | |
parent | 80b4b3fe82a19a4ea1e76fc4a81c9c88676c7ce0 (diff) | |
download | pandoc-1af2cfb2873c5bb6ddd9fc00d076088b2e62af30.tar.gz |
Handle relative lengths (e.g. `2*`) in HTML column widths.
See <https://www.w3.org/TR/html4/types.html#h-6.6>.
"A relative length has the form "i*", where "i" is an integer. When
allotting space among elements competing for that space, user agents
allot pixel and percentage lengths first, then divide up remaining
available space among relative lengths. Each relative length receives a
portion of the available space that is proportional to the integer
preceding the "*". The value "*" is equivalent to "1*". Thus, if 60
pixels of space are available after the user agent allots pixel and
percentage space, and the competing relative lengths are 1*, 2*, and 3*,
the 1* will be alloted 10 pixels, the 2* will be alloted 20 pixels, and
the 3* will be alloted 30 pixels."
Closes #4063.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML/Table.hs | 47 | ||||
-rw-r--r-- | test/command/4063.md | 29 |
2 files changed, 62 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index ad0b51253..3a569dd0a 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) import Data.Maybe (fromMaybe) +import Data.Either (lefts, rights) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import Text.HTML.TagSoup @@ -33,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B --- | Parses a @<col>@ element, returning the column's width. Defaults to --- @'ColWidthDefault'@ if the width is not set or cannot be determined. -pCol :: PandocMonad m => TagParser m ColWidth +-- | Parses a @<col>@ element, returning the column's width. +-- An Either value is used: Left i means a "relative length" with +-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6); +-- Right w means a regular width. Defaults to @'Right ColWidthDefault'@ +-- if the width is not set or cannot be determined. +pCol :: PandocMonad m => TagParser m (Either Int ColWidth) pCol = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) let attribs = toStringAttr attribs' skipMany pBlank optional $ pSatisfy (matchTagClose "col") skipMany pBlank - let width = case lookup "width" attribs of + return $ case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> - fromMaybe 0.0 $ safeRead (T.filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 + maybe (Right ColWidthDefault) (Right . ColWidth) + $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> Right ColWidthDefault + Just (T.unsnoc -> Just (xs, '*')) -> + maybe (Left 1) Left $ safeRead xs Just (T.unsnoc -> Just (xs, '%')) -> - fromMaybe 0.0 $ safeRead xs - _ -> 0.0 - if width > 0.0 - then return $ ColWidth $ width / 100.0 - else return ColWidthDefault + maybe (Right ColWidthDefault) + (Right . ColWidth . (/ 100.0)) $ safeRead xs + _ -> Right ColWidthDefault -pColgroup :: PandocMonad m => TagParser m [ColWidth] +pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth] pColgroup = try $ do pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank +resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth] +resolveRelativeLengths ws = + let remaining = 1 - sum (map getColWidth $ rights ws) + relatives = sum $ lefts ws + relUnit = remaining / fromIntegral relatives + toColWidth (Right x) = x + toColWidth (Left i) = ColWidth (fromIntegral i * relUnit) + in map toColWidth ws + +getColWidth :: ColWidth -> Double +getColWidth ColWidthDefault = 0 +getColWidth (ColWidth w) = w + data CellType = HeaderCell | BodyCell @@ -182,7 +200,8 @@ pTable :: PandocMonad m pTable block = try $ do TagOpen _ attribs <- pSatisfy (matchTagOpen "table" []) <* skipMany pBlank caption <- option mempty $ pInTags "caption" block <* skipMany pBlank - widths <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank + widths <- resolveRelativeLengths <$> + ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank thead <- pTableHead block <* skipMany pBlank topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank tbodies <- many (pTableBody block) <* skipMany pBlank diff --git a/test/command/4063.md b/test/command/4063.md new file mode 100644 index 000000000..838472b46 --- /dev/null +++ b/test/command/4063.md @@ -0,0 +1,29 @@ +``` +% pandoc -f html -t native +<table> +<colgroup> + <col width="30%" /> + <col width="*" /> +</colgroup> +<tr> + <td>1</td> + <td>2</td> +</tr> +</table> +^D +[Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.3) + ,(AlignDefault,ColWidth 0.7)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]]]])] + (TableFoot ("",[],[]) + [])] +``` |