From aa89f6be186e2a442920860e5bf53149aabdac55 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 23 Sep 2021 09:25:37 -0700 Subject: HTML reader: handle empty tbody element in table. Closes #7589. --- src/Text/Pandoc/Readers/HTML/Table.hs | 13 ++++--- test/command/7589.md | 73 +++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 test/command/7589.md diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 6e62e12f5..b23a2abc8 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -16,7 +16,7 @@ HTML table parser. module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isJust) import Data.Either (lefts, rights) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) @@ -27,12 +27,13 @@ import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Parsing ( eof, lookAhead, many, many1, manyTill, option, optional - , optionMaybe, skipMany, try) + , optionMaybe, skipMany, try ) import Text.Pandoc.Readers.HTML.Parsing import Text.Pandoc.Readers.HTML.Types (TagParser) import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B +import Control.Monad (guard) -- | Parses a @@ element, returning the column's width. -- An Either value is used: Left i means a "relative length" with @@ -183,11 +184,13 @@ pTableBody :: PandocMonad m -> TagParser m TableBody pTableBody block = try $ do skipMany pBlank - attribs <- option [] $ getAttribs <$> pSatisfy (matchTagOpen "tbody" []) - <* skipMany pBlank + mbattribs <- option Nothing $ Just . getAttribs <$> + pSatisfy (matchTagOpen "tbody" []) <* skipMany pBlank bodyheads <- many (pHeaderRow block) - (rowheads, rows) <- unzip <$> many1 (pRow block <* skipMany pBlank) + (rowheads, rows) <- unzip <$> many (pRow block <* skipMany pBlank) optional $ pSatisfy (matchTagClose "tbody") + guard $ isJust mbattribs || not (null bodyheads && null rows) + let attribs = fromMaybe [] mbattribs return $ TableBody (toAttr attribs) (foldr max 0 rowheads) bodyheads rows where getAttribs (TagOpen _ attribs) = attribs diff --git a/test/command/7589.md b/test/command/7589.md new file mode 100644 index 000000000..f9e8fb14f --- /dev/null +++ b/test/command/7589.md @@ -0,0 +1,73 @@ +``` +% pandoc -f html -t native + + + + + + + + + + + + +
experienceexpertiseparadigmshaskellnameimage
+^D +[ Table + ( "", [], [] ) + ( Caption Nothing [] ) + [ + ( AlignDefault, ColWidthDefault ) + , + ( AlignDefault, ColWidthDefault ) + , + ( AlignDefault, ColWidthDefault ) + , + ( AlignDefault, ColWidthDefault ) + , + ( AlignDefault, ColWidthDefault ) + , + ( AlignDefault, ColWidthDefault ) + ] + ( TableHead + ( "", [], [] ) + [ Row + ( "", [], [] ) + [ Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "experience" ] ] + , Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "expertise" ] ] + , Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "paradigms" ] ] + , Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "haskell" ] ] + , Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "name" ] ] + , Cell + ( "", [], [] ) AlignDefault + ( RowSpan 1 ) + ( ColSpan 1 ) + [ Plain [ Str "image" ] ] + ] + ] + ) + [ TableBody ( "", [], [] ) ( RowHeadColumns 0 ) [] [] ] + ( TableFoot ( "", [], [] ) [] ) +] +``` -- cgit v1.2.3