diff options
author | Michael Hoffmann <brennan.brisad@gmail.com> | 2021-07-07 01:06:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-06 16:06:29 -0700 |
commit | e56e2b0e0be9256ddef798d28f5d2af6e756508d (patch) | |
tree | c2b5645a48f2cac569c990eef8659dfd8e0f79c1 | |
parent | e7f8cc57866b61ed354c4c3812aaced33832a0e0 (diff) | |
download | pandoc-e56e2b0e0be9256ddef798d28f5d2af6e756508d.tar.gz |
Recognize data-external when reading HTML img tags (#7429)
Preserve all attributes in img tags. If attributes have a `data-`
prefix, it will be stripped. In particular, this preserves a
`data-external` attribute as an `external` attribute in the pandoc AST.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 11 | ||||
-rw-r--r-- | test/Tests/Readers/HTML.hs | 6 |
2 files changed, 9 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fc4575f2d..fdf4f28e0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -722,17 +722,12 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do - tag <- pSelfClosing (=="img") (isJust . lookup "src") + tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src") url <- canonicalizeUrl $ fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = T.words $ fromAttrib "class" tag - let getAtt k = case fromAttrib k tag of - "" -> [] - v -> [(k, v)] - let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) + let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr' + return $ B.imageWith attr (escapeURI url) title (B.text alt) pSvg :: PandocMonad m => TagParser m Inlines pSvg = do diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 9bf567194..4ed1e44af 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -74,6 +74,12 @@ tests = [ testGroup "base tag" [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?> plain (spanWith ("anchor",[],[]) mempty) ] + , testGroup "img" + [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?> + plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "") + , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?> + plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "") + ] , testGroup "lang" [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?> setMeta "lang" (text "es") (doc (plain (text "hola"))) |