aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2020-11-26 07:22:01 +0100
committerAlbert Krewinkel <albert@zeitkraut.de>2020-11-26 07:22:01 +0100
commit3e01ae405f9bf5f40e1b8e519029825aa4880602 (patch)
tree252046d7cd71e91b380cb0d3c3afcb6f7760e460 /src/Text/Pandoc/Readers
parentce606cb688a703b133ab7188fee45679e356a07d (diff)
downloadpandoc-3e01ae405f9bf5f40e1b8e519029825aa4880602.tar.gz
HTML reader: allow finer grained options for tag omission
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs2
-rw-r--r--src/Text/Pandoc/Readers/HTML/Parsing.hs22
-rw-r--r--src/Text/Pandoc/Readers/HTML/Table.hs15
3 files changed, 26 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 95f034521..fa996d2f0 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -514,7 +514,7 @@ pFigure = try $ do
TagOpen _ _ <- pSatisfy (matchTagOpen "figure" [])
skipMany pBlank
let pImg = (\x -> (Just x, Nothing)) <$>
- (pInTag True "p" pImage <* skipMany pBlank)
+ (pInTag TagsOmittable "p" pImage <* skipMany pBlank)
pCapt = (\x -> (Nothing, Just x)) <$> do
bs <- pInTags "figcaption" block
return $ blocksToInlines' $ B.toList bs
diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs
index e68e43b25..8788a933e 100644
--- a/src/Text/Pandoc/Readers/HTML/Parsing.hs
+++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs
@@ -11,7 +11,8 @@
Parsing functions and utilities.
-}
module Text.Pandoc.Readers.HTML.Parsing
- ( pInTags
+ ( TagOmission (..)
+ , pInTags
, pInTags'
, pInTag
, pAny
@@ -43,6 +44,13 @@ import Text.Pandoc.XML (html5Attributes, html4Attributes, rdfaAttributes)
import qualified Data.Set as Set
import qualified Data.Text as T
+-- | Whether no tag, the closing tag, or both tags can be omitted.
+data TagOmission
+ = TagsRequired -- ^ Opening and closing tags are both required
+ | ClosingTagOptional -- ^ The closing tag can be omitted
+ | TagsOmittable -- ^ Both tags, opening and closing, can be omitted.
+ deriving (Eq)
+
pInTags :: (PandocMonad m, Monoid a) => Text -> TagParser m a -> TagParser m a
pInTags tagtype parser = pInTags' tagtype (const True) parser
@@ -57,14 +65,18 @@ pInTags' tagtype tagtest parser = try $ do
-- parses p, preceded by an opening tag (optional if tagsOptional)
-- and followed by a closing tag (optional if tagsOptional)
-pInTag :: PandocMonad m => Bool -> Text -> TagParser m a -> TagParser m a
-pInTag tagsOptional tagtype p = try $ do
+pInTag :: PandocMonad m => TagOmission -> Text -> TagParser m a -> TagParser m a
+pInTag tagOmission tagtype p = try $ do
skipMany pBlank
- (if tagsOptional then optional else void) $ pSatisfy (matchTagOpen tagtype [])
+ let openingOptional = tagOmission == TagsOmittable
+ let closingOptional = tagOmission /= TagsRequired
+ (if openingOptional then optional else void) $
+ pSatisfy (matchTagOpen tagtype [])
skipMany pBlank
x <- p
skipMany pBlank
- (if tagsOptional then optional else void) $ pSatisfy (matchTagClose tagtype)
+ (if closingOptional then optional else void) $
+ pSatisfy (matchTagClose tagtype)
skipMany pBlank
return x
diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs
index eba84884f..e6d0a9097 100644
--- a/src/Text/Pandoc/Readers/HTML/Table.hs
+++ b/src/Text/Pandoc/Readers/HTML/Table.hs
@@ -69,15 +69,16 @@ pTable' block pCell = try $ do
caption <- option mempty $ pInTags "caption" block <* skipMany pBlank
widths' <- (mconcat <$> many1 pColgroup) <|> many pCol
let pTh = option [] $ pInTags "tr" (pCell "th")
- pTr = try $ skipMany pBlank >>
- pInTags "tr" (pCell "td" <|> pCell "th")
- pTBody = pInTag True "tbody" $ many1 pTr
- head'' <- pInTag False "thead" (option [] pTr) <|> pInTag True "thead" pTh
- head' <- pInTag True "tbody"
+ pTr = try $ skipMany pBlank
+ *> pInTags "tr" (pCell "td" <|> pCell "th")
+ pTBody = pInTag TagsOmittable "tbody" $ many1 pTr
+ head'' <- pInTag ClosingTagOptional "thead" (option [] pTr)
+ <|> pInTag TagsOmittable "thead" pTh
+ head' <- pInTag TagsOmittable "tbody"
(if null head'' then pTh else return head'')
- topfoot <- option [] $ pInTag False "tfoot" $ many pTr
+ topfoot <- option [] $ pInTag TagsRequired "tfoot" $ many pTr
rowsLs <- many pTBody
- bottomfoot <- option [] $ pInTag False "tfoot" $ many pTr
+ bottomfoot <- option [] $ pInTag ClosingTagOptional "tfoot" $ many pTr
TagClose _ <- pSatisfy (matchTagClose "table")
let rows = concat rowsLs <> topfoot <> bottomfoot
rows''' = map (map cellContents) rows