From 99fe8594d94573b8ba8ec1d1e47b57444de4e4cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 10 Jul 2015 10:28:39 -0700 Subject: Avoid parsing partial URLs as HTML tags. Closes #2277. --- src/Text/Pandoc/Readers/HTML.hs | 9 ++++++++- tests/Tests/Readers/Markdown.hs | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f2f97dbc4..361d64361 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -911,8 +911,15 @@ htmlTag :: Monad m htmlTag f = try $ do lookAhead (char '<') inp <- getInput - let (next : _) = canonicalizeTags $ parseTags inp + let hasTagWarning (TagWarning _:_) = True + hasTagWarning _ = False + let (next : rest) = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = True } inp guard $ f next + -- we get a TagWarning on things like + -- + -- which should NOT be parsed as an HTML tag, see #2277 + guard $ not $ hasTagWarning rest case next of TagComment s | "