aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-07-10 10:28:39 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-07-10 10:33:27 -0700
commit99fe8594d94573b8ba8ec1d1e47b57444de4e4cb (patch)
treeaceaded36d6dd830095ef5b759373b8292e9abad /src
parent5f730ee804be35d9ff52adcb929e8b8d7422bfea (diff)
downloadpandoc-99fe8594d94573b8ba8ec1d1e47b57444de4e4cb.tar.gz
Avoid parsing partial URLs as HTML tags.
Closes #2277.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs9
1 files changed, 8 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f2f97dbc4..361d64361 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -911,8 +911,15 @@ htmlTag :: Monad m
htmlTag f = try $ do
lookAhead (char '<')
inp <- getInput
- let (next : _) = canonicalizeTags $ parseTags inp
+ let hasTagWarning (TagWarning _:_) = True
+ hasTagWarning _ = False
+ let (next : rest) = canonicalizeTags $ parseTagsOptions
+ parseOptions{ optTagWarning = True } inp
guard $ f next
+ -- we get a TagWarning on things like
+ -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+ -- which should NOT be parsed as an HTML tag, see #2277
+ guard $ not $ hasTagWarning rest
case next of
TagComment s
| "<!--" `isPrefixOf` inp -> do