From e56e2b0e0be9256ddef798d28f5d2af6e756508d Mon Sep 17 00:00:00 2001 From: Michael Hoffmann Date: Wed, 7 Jul 2021 01:06:29 +0200 Subject: Recognize data-external when reading HTML img tags (#7429) Preserve all attributes in img tags. If attributes have a `data-` prefix, it will be stripped. In particular, this preserves a `data-external` attribute as an `external` attribute in the pandoc AST. --- src/Text/Pandoc/Readers/HTML.hs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fc4575f2d..fdf4f28e0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -722,17 +722,12 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do - tag <- pSelfClosing (=="img") (isJust . lookup "src") + tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src") url <- canonicalizeUrl $ fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = T.words $ fromAttrib "class" tag - let getAtt k = case fromAttrib k tag of - "" -> [] - v -> [(k, v)] - let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) + let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr' + return $ B.imageWith attr (escapeURI url) title (B.text alt) pSvg :: PandocMonad m => TagParser m Inlines pSvg = do -- cgit v1.2.3