diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML/Parsing.hs | 22 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML/Table.hs | 15 |
3 files changed, 26 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 95f034521..fa996d2f0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -514,7 +514,7 @@ pFigure = try $ do TagOpen _ _ <- pSatisfy (matchTagOpen "figure" []) skipMany pBlank let pImg = (\x -> (Just x, Nothing)) <$> - (pInTag True "p" pImage <* skipMany pBlank) + (pInTag TagsOmittable "p" pImage <* skipMany pBlank) pCapt = (\x -> (Nothing, Just x)) <$> do bs <- pInTags "figcaption" block return $ blocksToInlines' $ B.toList bs diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index e68e43b25..8788a933e 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -11,7 +11,8 @@ Parsing functions and utilities. -} module Text.Pandoc.Readers.HTML.Parsing - ( pInTags + ( TagOmission (..) + , pInTags , pInTags' , pInTag , pAny @@ -43,6 +44,13 @@ import Text.Pandoc.XML (html5Attributes, html4Attributes, rdfaAttributes) import qualified Data.Set as Set import qualified Data.Text as T +-- | Whether no tag, the closing tag, or both tags can be omitted. +data TagOmission + = TagsRequired -- ^ Opening and closing tags are both required + | ClosingTagOptional -- ^ The closing tag can be omitted + | TagsOmittable -- ^ Both tags, opening and closing, can be omitted. + deriving (Eq) + pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a pInTags tagtype parser = pInTags' tagtype (const True) parser @@ -57,14 +65,18 @@ pInTags' tagtype tagtest parser = try $ do -- parses p, preceded by an opening tag (optional if tagsOptional) -- and followed by a closing tag (optional if tagsOptional) -pInTag :: PandocMonad m => Bool -> Text -> TagParser m a -> TagParser m a -pInTag tagsOptional tagtype p = try $ do +pInTag :: PandocMonad m => TagOmission -> Text -> TagParser m a -> TagParser m a +pInTag tagOmission tagtype p = try $ do skipMany pBlank - (if tagsOptional then optional else void) $ pSatisfy (matchTagOpen tagtype []) + let openingOptional = tagOmission == TagsOmittable + let closingOptional = tagOmission /= TagsRequired + (if openingOptional then optional else void) $ + pSatisfy (matchTagOpen tagtype []) skipMany pBlank x <- p skipMany pBlank - (if tagsOptional then optional else void) $ pSatisfy (matchTagClose tagtype) + (if closingOptional then optional else void) $ + pSatisfy (matchTagClose tagtype) skipMany pBlank return x diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index eba84884f..e6d0a9097 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -69,15 +69,16 @@ pTable' block pCell = try $ do caption <- option mempty $ pInTags "caption" block <* skipMany pBlank widths' <- (mconcat <$> many1 pColgroup) <|> many pCol let pTh = option [] $ pInTags "tr" (pCell "th") - pTr = try $ skipMany pBlank >> - pInTags "tr" (pCell "td" <|> pCell "th") - pTBody = pInTag True "tbody" $ many1 pTr - head'' <- pInTag False "thead" (option [] pTr) <|> pInTag True "thead" pTh - head' <- pInTag True "tbody" + pTr = try $ skipMany pBlank + *> pInTags "tr" (pCell "td" <|> pCell "th") + pTBody = pInTag TagsOmittable "tbody" $ many1 pTr + head'' <- pInTag ClosingTagOptional "thead" (option [] pTr) + <|> pInTag TagsOmittable "thead" pTh + head' <- pInTag TagsOmittable "tbody" (if null head'' then pTh else return head'') - topfoot <- option [] $ pInTag False "tfoot" $ many pTr + topfoot <- option [] $ pInTag TagsRequired "tfoot" $ many pTr rowsLs <- many pTBody - bottomfoot <- option [] $ pInTag False "tfoot" $ many pTr + bottomfoot <- option [] $ pInTag ClosingTagOptional "tfoot" $ many pTr TagClose _ <- pSatisfy (matchTagClose "table") let rows = concat rowsLs <> topfoot <> bottomfoot rows''' = map (map cellContents) rows |