diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2014-04-05 23:05:11 -0700 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2014-04-05 23:05:11 -0700 |
commit | 971e4c43647cf28d1de0dbc109a6ec6f269fa563 (patch) | |
tree | ae3991b09edc0c3931ab515e4d73a9a9695c1471 /src/Text/Pandoc | |
parent | 24f438aa5f230464d510fae034c94644c0e181ca (diff) | |
download | pandoc-971e4c43647cf28d1de0dbc109a6ec6f269fa563.tar.gz |
HTML reader: Updated `closes` with rules from HTML5 spec.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 38619d040..2101b2fc2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -469,7 +469,7 @@ pCloses :: String -> TagParser () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag case t of - (TagClose t') | t' == tagtype -> pAnyTag >> return () + (TagClose t') | t' == tagtype -> pAnyTag >> return () (TagOpen t' _) | t' `closes` tagtype -> return () (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () @@ -627,7 +627,7 @@ isCommentTag :: Tag String -> Bool isCommentTag = tagComment (const True) -- taken from HXT and extended - +-- See http://www.w3.org/TR/html5/syntax.html sec 8.1.2.4 optional tags closes :: String -> String -> Bool _ `closes` "body" = False _ `closes` "html" = False @@ -635,11 +635,18 @@ _ `closes` "html" = False "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True +"dd" `closes` t | t `elem` ["dt", "dd"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True -"hr" `closes` "p" = True -"p" `closes` "p" = True +"rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True +"optgroup" `closes` "optgroup" = True +"optgroup" `closes` "option" = True +"option" `closes` "option" = True +-- http://www.w3.org/TR/html-markup/p.html +x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote", + "dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section", + "table", "ul"] = True "meta" `closes` "meta" = True -"colgroup" `closes` "colgroup" = True "form" `closes` "form" = True "label" `closes` "label" = True "map" `closes` "map" = True |