aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-11 09:58:25 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-11 09:58:25 -0700
commitf5c64c3060ba40181a86d6c82a67b5b975f74446 (patch)
treeea932c2df1e02c424c7290ea8a016e18b88166a4 /src/Text/Pandoc/Readers/HTML.hs
parent3e04c2f3af08aa7927e38469eb594adf554a50ec (diff)
downloadpandoc-f5c64c3060ba40181a86d6c82a67b5b975f74446.tar.gz
HTML reader: fix htmlTag and isInlineTag to accept processing instructions.
Fixes regression #3123 (since 2.0). Added regression test.
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs18
1 files changed, 10 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b06e07a80..dab3d5db2 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1069,11 +1069,11 @@ instance NamedTag (Tag String) where
getTagName _ = Nothing
isInlineTag :: NamedTag (Tag a) => Tag a -> Bool
-isInlineTag t = isInlineTagName || isCommentTag t
- where isInlineTagName = case getTagName t of
- Just x -> x
- `Set.notMember` blockTags
- Nothing -> False
+isInlineTag t =
+ isCommentTag t || case getTagName t of
+ Nothing -> False
+ Just x -> x `Set.notMember` blockTags ||
+ T.take 1 x == "?" -- processing instr.
isBlockTag :: NamedTag (Tag a) => Tag a -> Bool
isBlockTag t = isBlockTagName || isTagComment t
@@ -1208,8 +1208,10 @@ htmlTag f = try $ do
let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
let isName s = case s of
[] -> False
- ('?':_) -> True -- processing instruction
(c:cs) -> isLetter c && all isNameChar cs
+ let isPI s = case s of
+ ('?':_) -> True -- processing instruction
+ _ -> False
let endpos = if ln == 1
then setSourceColumn startpos
@@ -1225,7 +1227,7 @@ htmlTag f = try $ do
let handleTag tagname = do
-- basic sanity check, since the parser is very forgiving
-- and finds tags in stuff like x<y)
- guard $ isName tagname
+ guard $ isName tagname || isPI tagname
guard $ not $ null tagname
-- <https://example.org> should NOT be a tag either.
-- tagsoup will parse it as TagOpen "https:" [("example.org","")]
@@ -1245,7 +1247,7 @@ htmlTag f = try $ do
else return (next, "<!--" <> s <> "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
TagOpen tagname attr -> do
- guard $ all (isName . fst) attr
+ guard $ isPI tagname || all (isName . fst) attr
handleTag tagname
TagClose tagname ->
handleTag tagname