diff options
author | bucklereed <horridimpfoobarbaz@chammy.info> | 2017-08-09 17:10:12 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-08-09 09:10:12 -0700 |
commit | db55f7c1b243cbc82c70276c7dfb9c0403e369b0 (patch) | |
tree | 0a4621dc208124353217f4f46cf3e78ac731c28f /src/Text | |
parent | cfa597fc2ac2a6dceb0b3387a2ee885ec75bc7d1 (diff) | |
download | pandoc-db55f7c1b243cbc82c70276c7dfb9c0403e369b0.tar.gz |
HTML reader: parse <main> like <div role=main>. (#3791)
* HTML reader: parse <main> like <div role=main>.
* <main> closes <p> and behaves like a block element generally
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 3a0d6eb14..7b9ab38fd 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -54,7 +54,7 @@ import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M import Data.Foldable ( for_ ) -import Data.Maybe ( fromMaybe, isJust) +import Data.Maybe ( fromMaybe, isJust, isNothing ) import Data.List ( intercalate, isPrefixOf ) import Data.Char ( isDigit, isLetter, isAlphaNum ) import Control.Monad ( guard, mzero, void, unless ) @@ -377,6 +377,7 @@ pDiv = try $ do guardEnabled Ext_native_divs let isDivLike "div" = True isDivLike "section" = True + isDivLike "main" = True isDivLike _ = False TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) let attr = toStringAttr attr' @@ -385,7 +386,10 @@ pDiv = try $ do let classes' = if tag == "section" then "section":classes else classes - return $ B.divWith (ident, classes', kvs) contents + kvs' = if tag == "main" && isNothing (lookup "role" kvs) + then ("role", "main"):kvs + else kvs + return $ B.divWith (ident, classes', kvs') contents pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do @@ -940,7 +944,7 @@ blockHtmlTags = Set.fromList "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html", - "isindex", "menu", "noframes", "ol", "output", "p", "pre", + "isindex", "main", "menu", "noframes", "ol", "output", "p", "pre", "section", "table", "tbody", "textarea", "thead", "tfoot", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", @@ -1022,10 +1026,10 @@ _ `closes` "html" = False "optgroup" `closes` "optgroup" = True "optgroup" `closes` "option" = True "option" `closes` "option" = True --- http://www.w3.org/TR/html-markup/p.html +-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote", "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section", + "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", "p", "pre", "section", "table", "ul"] = True "meta" `closes` "meta" = True "form" `closes` "form" = True @@ -1038,8 +1042,8 @@ t `closes` "select" | t /= "option" = True "tfoot" `closes` t | t `elem` ["thead","colgroup"] = True "tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True t `closes` t2 | - t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && - t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" + t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","main","p"] && + t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" or "main" t1 `closes` t2 | t1 `Set.member` blockTags && t2 `Set.notMember` blockTags && |