From 887fc14f3d6f2909a2201769e4b8a54a8f6c8793 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 15 Sep 2012 15:46:16 -0400 Subject: HTML reader: Modified htmlTag for fewer false positives. A tag must start with `<` followed by `!`,`?`, `/`, or a letter. This makes it more useful in the wikimedia and markdown parsers. --- src/Text/Pandoc/Readers/HTML.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 295171ca8..424d9bdec 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -599,7 +599,7 @@ htmlInBalanced f = try $ do -- | Matches a tag meeting a certain condition. htmlTag :: (Tag String -> Bool) -> Parser [Char] st (Tag String, String) htmlTag f = try $ do - lookAhead (char '<') + lookAhead $ char '<' >> (oneOf "/!?" <|> letter) (next : _) <- getInput >>= return . canonicalizeTags . parseTags guard $ f next -- advance the parser -- cgit v1.2.3