diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-07-25 13:13:24 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-07-25 13:13:24 +0200 |
commit | d441e656db576f266c4866e65ff9e4705d376381 (patch) | |
tree | f790918d6eb5b920aaba07444559d7b0f1585583 | |
parent | fe0ffd272ea14b3f1a40c891403ef03f09f6294f (diff) | |
download | pandoc-d441e656db576f266c4866e65ff9e4705d376381.tar.gz |
HTML writer: insert data- in front of unsupported attributes.
Thus, a span with attribute 'foo' gets written to HTML5
with 'data-foo', so it is valid HTML5.
HTML4 is not affected.
This will allow us to use custom attributes in pandoc without
producing invalid HTML.
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 357 | ||||
-rw-r--r-- | test/command/custom-attributes.html | 16 |
2 files changed, 325 insertions, 48 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 451123a6d..d09158c42 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -50,6 +50,7 @@ import qualified Data.Text.Lazy as TL import Data.List (intersperse, isPrefixOf) import Data.Maybe (catMaybes, fromMaybe, isJust, isNothing) import Data.Monoid ((<>)) +import qualified Data.Set as Set import Data.String (fromString) import Network.HTTP (urlEncode) import Network.URI (URI (..), parseURIReference, unEscapeString) @@ -434,16 +435,19 @@ elementToHtml slideLevel opts (Sec level num (id',classes,keyvals) title' elemen then H5.section else H.div let attr = (id',classes',keyvals) - return $ if titleSlide - then (if slideVariant == RevealJsSlides - then H5.section - else id) $ mconcat $ - (addAttrs opts attr $ secttag $ header') : innerContents - else if writerSectionDivs opts || slide - then addAttrs opts attr - $ secttag $ inNl $ header' : innerContents - else mconcat $ intersperse (nl opts) - $ addAttrs opts attr header' : innerContents + if titleSlide + then do + t <- addAttrs opts attr $ secttag $ header' + return $ + (if slideVariant == RevealJsSlides + then H5.section + else id) $ mconcat $ t : innerContents + else if writerSectionDivs opts || slide + then addAttrs opts attr + $ secttag $ inNl $ header' : innerContents + else do + t <- addAttrs opts attr header' + return $ mconcat $ intersperse (nl opts) (t : innerContents) -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. @@ -476,9 +480,11 @@ parseMailto s = do _ -> fail "not a mailto: URL" -- | Obfuscate a "mailto:" link. -obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> String -> m Html +obfuscateLink :: PandocMonad m + => WriterOptions -> Attr -> Html -> String + -> StateT WriterState m Html obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = - return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ txt + addAttrs opts attr $ H.a ! A.href (toValue s) $ txt obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = let meth = writerEmailObfuscation opts s' = map toLower (take 7 s) ++ drop 7 s @@ -510,7 +516,7 @@ obfuscateLink opts attr (TL.unpack . renderHtml -> txt) s = linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> H.noscript (preEscapedString $ obfuscateString altText) _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " ++ show meth - _ -> return $ addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email + _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -523,21 +529,34 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . fromEntities -addAttrs :: WriterOptions -> Attr -> Html -> Html -addAttrs opts attr h = foldl (!) h (attrsToHtml opts attr) +addAttrs :: PandocMonad m + => WriterOptions -> Attr -> Html -> StateT WriterState m Html +addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr -toAttrs :: [(String, String)] -> [Attribute] -toAttrs kvs = map (\(x,y) -> customAttribute (fromString x) (toValue y)) kvs - -attrsToHtml :: WriterOptions -> Attr -> [Attribute] -attrsToHtml opts (id',classes',keyvals) = - [prefixedId opts id' | not (null id')] ++ - [A.class_ (toValue $ unwords classes') | not (null classes')] ++ toAttrs keyvals - -imgAttrsToHtml :: WriterOptions -> Attr -> [Attribute] -imgAttrsToHtml opts attr = - attrsToHtml opts (ident,cls,kvs') ++ - toAttrs (dimensionsToAttrList attr) +toAttrs :: PandocMonad m + => [(String, String)] -> StateT WriterState m [Attribute] +toAttrs kvs = do + html5 <- gets stHtml5 + return $ map (\(x,y) -> + customAttribute + (fromString (if not html5 || x `Set.member` html5Attributes + then x + else "data-" ++ x)) (toValue y)) kvs + +attrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +attrsToHtml opts (id',classes',keyvals) = do + attrs <- toAttrs keyvals + return $ + [prefixedId opts id' | not (null id')] ++ + [A.class_ (toValue $ unwords classes') | not (null classes')] ++ attrs + +imgAttrsToHtml :: PandocMonad m + => WriterOptions -> Attr -> StateT WriterState m [Attribute] +imgAttrsToHtml opts attr = do + attrs <- attrsToHtml opts (ident,cls,kvs') + dimattrs <- toAttrs (dimensionsToAttrList attr) + return $ attrs ++ dimattrs where (ident,cls,kvs) = attr kvs' = filter isNotDim kvs @@ -628,15 +647,15 @@ blockToHtml opts (Div attr@(ident, classes, kvs) bs) = do then (H5.section, filter (/= "section") classes) else (H.div, classes) slideVariant <- gets stSlideVariant - return $ - if speakerNotes - then case slideVariant of - RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' - DZSlides -> (addAttrs opts' attr $ H5.div $ contents') - ! (H5.customAttribute "role" "note") - NoSlides -> addAttrs opts' attr $ H.div $ contents' - _ -> mempty - else addAttrs opts (ident, classes', kvs) $ divtag $ contents' + if speakerNotes + then case slideVariant of + RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' + DZSlides -> do + t <- addAttrs opts' attr $ H5.div $ contents' + return $ t ! (H5.customAttribute "role" "note") + NoSlides -> addAttrs opts' attr $ H.div $ contents' + _ -> return mempty + else addAttrs opts (ident, classes', kvs) $ divtag $ contents' blockToHtml opts (RawBlock f str) = do ishtml <- isRawHtml f if ishtml @@ -671,10 +690,10 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do Left msg -> do unless (null msg) $ report $ CouldNotHighlight msg - return $ addAttrs opts (id',classes,keyvals) - $ H.pre $ H.code $ toHtml adjCode + addAttrs opts (id',classes,keyvals) + $ H.pre $ H.code $ toHtml adjCode Right h -> modify (\st -> st{ stHighlighting = True }) >> - return (addAttrs opts (id',[],keyvals) h) + addAttrs opts (id',[],keyvals) h blockToHtml opts (BlockQuote blocks) = do -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; @@ -706,7 +725,7 @@ blockToHtml opts (Header level attr@(_,classes,_) lst) = do $ showSecNum secnum) >> strToHtml " " >> contents else contents inElement <- gets stElement - return $ (if inElement then id else addAttrs opts attr) + (if inElement then return else addAttrs opts attr) $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -880,7 +899,7 @@ inlineToHtml opts inline = do <> strToHtml "\n" (Span (id',classes,kvs) ils) -> inlineListToHtml opts ils >>= - return . addAttrs opts attr' . H.span + addAttrs opts attr' . H.span where attr' = (id',classes',kvs') classes' = filter (`notElem` ["csl-no-emph", "csl-no-strong", @@ -900,11 +919,10 @@ inlineToHtml opts inline = do Left msg -> do unless (null msg) $ report $ CouldNotHighlight msg - return $ addAttrs opts attr - $ H.code $ strToHtml str + addAttrs opts attr $ H.code $ strToHtml str Right h -> do modify $ \st -> st{ stHighlighting = True } - return $ addAttrs opts (id',[],keyvals) h + addAttrs opts (id',[],keyvals) h where (id',_,keyvals) = attr hlCode = if isJust (writerHighlightStyle opts) then highlight @@ -994,7 +1012,7 @@ inlineToHtml opts inline = do return mempty (Link attr txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - lift $ obfuscateLink opts attr linkText s + obfuscateLink opts attr linkText s (Link (ident,classes,kvs) txt (s,tit)) -> do linkText <- inlineListToHtml opts txt slideVariant <- gets stSlideVariant @@ -1008,7 +1026,7 @@ inlineToHtml opts inline = do let attr = if txt == [Str (unEscapeString s)] then (ident, "uri" : classes, kvs) else (ident, classes, kvs) - let link' = addAttrs opts attr link + link' <- addAttrs opts attr link return $ if null tit then link' else link' ! A.title (toValue tit) @@ -1016,6 +1034,7 @@ inlineToHtml opts inline = do let alternate' = stringify txt slideVariant <- gets stSlideVariant let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr let attributes = -- reveal.js uses data-src for lazy loading (if isReveal @@ -1023,19 +1042,20 @@ inlineToHtml opts inline = do else A.src $ toValue s) : [A.title $ toValue tit | not (null tit)] ++ [A.alt $ toValue alternate' | not (null txt)] ++ - imgAttrsToHtml opts attr + attrs let tag = if html5 then H5.img else H.img return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image attr _ (s,tit)) -> do slideVariant <- gets stSlideVariant let isReveal = slideVariant == RevealJsSlides + attrs <- imgAttrsToHtml opts attr let attributes = (if isReveal then customAttribute "data-src" $ toValue s else A.src $ toValue s) : [A.title $ toValue tit | not (null tit)] ++ - imgAttrsToHtml opts attr + attrs return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl (Note contents) -> do @@ -1145,3 +1165,244 @@ isRawHtml f = do html5 <- gets stHtml5 return $ f == Format "html" || ((html5 && f == Format "html5") || f == Format "html4") + +html5Attributes :: Set.Set String +html5Attributes = Set.fromList + [ "abbr" + , "accept" + , "accept-charset" + , "accesskey" + , "action" + , "allowfullscreen" + , "allowpaymentrequest" + , "allowusermedia" + , "alt" + , "as" + , "async" + , "autocomplete" + , "autocomplete" + , "autofocus" + , "autoplay" + , "charset" + , "charset" + , "checked" + , "cite" + , "class" + , "color" + , "cols" + , "colspan" + , "content" + , "contenteditable" + , "controls" + , "coords" + , "crossorigin" + , "data" + , "datetime" + , "datetime" + , "default" + , "defer" + , "dir" + , "dir" + , "dirname" + , "disabled" + , "download" + , "draggable" + , "enctype" + , "for" + , "for" + , "form" + , "formaction" + , "formenctype" + , "formmethod" + , "formnovalidate" + , "formtarget" + , "headers" + , "height" + , "hidden" + , "high" + , "href" + , "href" + , "href" + , "hreflang" + , "http-equiv" + , "id" + , "inputmode" + , "integrity" + , "is" + , "ismap" + , "itemid" + , "itemprop" + , "itemref" + , "itemscope" + , "itemtype" + , "kind" + , "label" + , "lang" + , "list" + , "loop" + , "low" + , "manifest" + , "max" + , "max" + , "maxlength" + , "media" + , "method" + , "min" + , "min" + , "minlength" + , "multiple" + , "muted" + , "name" + , "name" + , "name" + , "name" + , "name" + , "name" + , "name" + , "nomodule" + , "nonce" + , "novalidate" + , "open" + , "open" + , "optimum" + , "pattern" + , "ping" + , "placeholder" + , "playsinline" + , "poster" + , "preload" + , "readonly" + , "referrerpolicy" + , "rel" + , "rel" + , "required" + , "reversed" + , "rows" + , "rowspan" + , "sandbox" + , "scope" + , "scope" + , "selected" + , "shape" + , "size" + , "sizes" + , "sizes" + , "slot" + , "span" + , "spellcheck" + , "src" + , "srcdoc" + , "srclang" + , "srcset" + , "start" + , "step" + , "style" + , "tabindex" + , "target" + , "target" + , "target" + , "title" + , "title" + , "title" + , "title" + , "title" + , "translate" + , "type" + , "type" + , "type" + , "type" + , "type" + , "typemustmatch" + , "updateviacache" + , "usemap" + , "value" + , "value" + , "value" + , "value" + , "value" + , "value" + , "width" + , "workertype" + , "wrap" + , "onabort" + , "onauxclick" + , "onafterprint" + , "onbeforeprint" + , "onbeforeunload" + , "onblur" + , "oncancel" + , "oncanplay" + , "oncanplaythrough" + , "onchange" + , "onclick" + , "onclose" + , "oncontextmenu" + , "oncopy" + , "oncuechange" + , "oncut" + , "ondblclick" + , "ondrag" + , "ondragend" + , "ondragenter" + , "ondragexit" + , "ondragleave" + , "ondragover" + , "ondragstart" + , "ondrop" + , "ondurationchange" + , "onemptied" + , "onended" + , "onerror" + , "onfocus" + , "onhashchange" + , "oninput" + , "oninvalid" + , "onkeydown" + , "onkeypress" + , "onkeyup" + , "onlanguagechange" + , "onload" + , "onloadeddata" + , "onloadedmetadata" + , "onloadend" + , "onloadstart" + , "onmessage" + , "onmessageerror" + , "onmousedown" + , "onmouseenter" + , "onmouseleave" + , "onmousemove" + , "onmouseout" + , "onmouseover" + , "onmouseup" + , "onwheel" + , "onoffline" + , "ononline" + , "onpagehide" + , "onpageshow" + , "onpaste" + , "onpause" + , "onplay" + , "onplaying" + , "onpopstate" + , "onprogress" + , "onratechange" + , "onreset" + , "onresize" + , "onrejectionhandled" + , "onscroll" + , "onsecuritypolicyviolation" + , "onseeked" + , "onseeking" + , "onselect" + , "onstalled" + , "onstorage" + , "onsubmit" + , "onsuspend" + , "ontimeupdate" + , "ontoggle" + , "onunhandledrejection" + , "onunload" + , "onvolumechange" + , "onwaiting" + ] diff --git a/test/command/custom-attributes.html b/test/command/custom-attributes.html new file mode 100644 index 000000000..67dccc1b8 --- /dev/null +++ b/test/command/custom-attributes.html @@ -0,0 +1,16 @@ +Custom attributes should automatically have data- added +in HTML5: +``` +% pandoc -t html5 +[hello]{foo="bar"} +^D +<span data-foo="bar">hello</span> +``` + +but not in HTML4: +``` +% pandoc -t html4 +[hello]{foo="bar"} +^D +<span foo="bar">hello</span> +``` |