From 052684712b8a36854c622468f5d4ddd367199c1c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 25 Mar 2019 08:43:59 -0700 Subject: HTML reader: read `data-foo` attribute into `foo`. The HTML writer adds the `data-` prefix for HTML5 for nonstandard attributes. But the attributes are represented in the AST without the `data-` prefix, so we should strip this when reading HTML. Closes #5392. --- src/Text/Pandoc/Readers/HTML.hs | 3 ++- test/Tests/Readers/HTML.hs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 45af20ca8..dfc31f7af 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -807,7 +807,8 @@ mathMLToTeXMath s = writeTeX <$> readMathML s toStringAttr :: [(Text, Text)] -> [(String, String)] toStringAttr = map go - where go (x,y) = (T.unpack x, T.unpack y) + where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x), + T.unpack y) pScriptMath :: PandocMonad m => TagParser m Inlines pScriptMath = try $ do diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 69078d955..be5178e07 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -83,7 +83,7 @@ tests = [ testGroup "base tag" , test htmlNativeDivs "
becomes
" $ "
hello
" =?> doc (divWith ("", [], [("role", "foobar")]) (plain (text "hello"))) , test htmlNativeDivs "
has attributes preserved" $ "
hello
" =?> - doc (divWith ("foo", ["bar"], [("role", "main"), ("data-baz", "qux")]) (plain (text "hello"))) + doc (divWith ("foo", ["bar"], [("role", "main"), ("baz", "qux")]) (plain (text "hello"))) , test htmlNativeDivs "
closes

" $ "

hello

main content
" =?> doc (para (text "hello") <> divWith ("", [], [("role", "main")]) (plain (text "main content"))) , test htmlNativeDivs "
followed by text" $ "
main content
non-main content" =?> -- cgit v1.2.3