aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-02-12 08:46:54 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2020-02-12 08:47:32 -0800
commit1433aaa4c35af84fbe00ecf971acd1414da6dea8 (patch)
tree4ea6aab556d7c9bc0164b46645e58f528fe962b0 /src/Text
parentfae8ea36f0452eca093b76a1093e7b5c1cea9a5e (diff)
downloadpandoc-1433aaa4c35af84fbe00ecf971acd1414da6dea8.tar.gz
HTML reader: don't parse `data-id` as `id` attribute.
And similarly don't parse any `data-X` as `X` when `X` is a valid HTML attribute. Reported in comment on #5415.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs10
1 files changed, 9 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index aa73cd9a1..057ff1d31 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -52,6 +52,7 @@ import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
import Text.Pandoc.Readers.LaTeX.Types (Macro)
+import Text.Pandoc.XML (html5Attributes, html4Attributes, rdfaAttributes)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
@@ -835,7 +836,14 @@ mathMLToTeXMath s = writeTeX <$> readMathML s
toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr = map go
- where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y)
+ where
+ go (x,y) =
+ case T.stripPrefix "data-" x of
+ Nothing -> (x,y)
+ Just x' -> if x' `Set.member` (html5Attributes <>
+ html4Attributes <> rdfaAttributes)
+ then (x,y)
+ else (x',y)
pScriptMath :: PandocMonad m => TagParser m Inlines
pScriptMath = try $ do