From a5cbcdfe3a6e64bffe7bf95ee783f1abdfc207e7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 14 Jan 2011 20:48:10 -0800 Subject: HTML reader: parse simple tables. Resolves Issue #106. Thanks to Rodja Trappe for the idea and some sample code. --- src/Text/Pandoc/Readers/HTML.hs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b1a03a4bd..0cbdf72b0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -85,6 +85,7 @@ block = choice , pCodeBlock , pList , pHrule + , pSimpleTable , pPlain , pRawHtmlBlock ] @@ -194,6 +195,27 @@ pHrule = do pSelfClosing (=="hr") (const True) return [HorizontalRule] +pSimpleTable :: TagParser [Block] +pSimpleTable = try $ do + TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + skipMany pBlank + head' <- option [] $ pInTags "th" pTd + rows <- many1 $ try $ + skipMany pBlank >> pInTags "tr" pTd + skipMany pBlank + TagClose _ <- pSatisfy (~== TagClose "table") + let cols = maximum $ map length rows + let aligns = replicate cols AlignLeft + let widths = replicate cols 0 + return [Table [] aligns widths head' rows] + +pTd :: TagParser [TableCell] +pTd = try $ do + skipMany pBlank + res <- pInTags "td" pPlain + skipMany pBlank + return [res] + pBlockQuote :: TagParser [Block] pBlockQuote = do contents <- pInTags "blockquote" block @@ -437,10 +459,8 @@ _ `closes` "html" = False "a" `closes` "a" = True "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True -"td" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True -"dd" `closes` t | t `elem` ["dt","dd"] = True "hr" `closes` "p" = True "p" `closes` "p" = True "meta" `closes` "meta" = True -- cgit v1.2.3