diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-10-11 09:58:25 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-10-11 09:58:25 -0700 |
commit | f5c64c3060ba40181a86d6c82a67b5b975f74446 (patch) | |
tree | ea932c2df1e02c424c7290ea8a016e18b88166a4 /src | |
parent | 3e04c2f3af08aa7927e38469eb594adf554a50ec (diff) | |
download | pandoc-f5c64c3060ba40181a86d6c82a67b5b975f74446.tar.gz |
HTML reader: fix htmlTag and isInlineTag to accept processing instructions.
Fixes regression #3123 (since 2.0). Added regression test.
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b06e07a80..dab3d5db2 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1069,11 +1069,11 @@ instance NamedTag (Tag String) where getTagName _ = Nothing isInlineTag :: NamedTag (Tag a) => Tag a -> Bool -isInlineTag t = isInlineTagName || isCommentTag t - where isInlineTagName = case getTagName t of - Just x -> x - `Set.notMember` blockTags - Nothing -> False +isInlineTag t = + isCommentTag t || case getTagName t of + Nothing -> False + Just x -> x `Set.notMember` blockTags || + T.take 1 x == "?" -- processing instr. isBlockTag :: NamedTag (Tag a) => Tag a -> Bool isBlockTag t = isBlockTagName || isTagComment t @@ -1208,8 +1208,10 @@ htmlTag f = try $ do let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' let isName s = case s of [] -> False - ('?':_) -> True -- processing instruction (c:cs) -> isLetter c && all isNameChar cs + let isPI s = case s of + ('?':_) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos @@ -1225,7 +1227,7 @@ htmlTag f = try $ do let handleTag tagname = do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) - guard $ isName tagname + guard $ isName tagname || isPI tagname guard $ not $ null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] @@ -1245,7 +1247,7 @@ htmlTag f = try $ do else return (next, "<!--" <> s <> "-->") | otherwise -> fail "bogus comment mode, HTML5 parse error" TagOpen tagname attr -> do - guard $ all (isName . fst) attr + guard $ isPI tagname || all (isName . fst) attr handleTag tagname TagClose tagname -> handleTag tagname |