diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-07-10 10:28:39 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-07-10 10:33:27 -0700 |
commit | 99fe8594d94573b8ba8ec1d1e47b57444de4e4cb (patch) | |
tree | aceaded36d6dd830095ef5b759373b8292e9abad | |
parent | 5f730ee804be35d9ff52adcb929e8b8d7422bfea (diff) | |
download | pandoc-99fe8594d94573b8ba8ec1d1e47b57444de4e4cb.tar.gz |
Avoid parsing partial URLs as HTML tags.
Closes #2277.
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 9 | ||||
-rw-r--r-- | tests/Tests/Readers/Markdown.hs | 3 |
2 files changed, 11 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f2f97dbc4..361d64361 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -911,8 +911,15 @@ htmlTag :: Monad m htmlTag f = try $ do lookAhead (char '<') inp <- getInput - let (next : _) = canonicalizeTags $ parseTags inp + let hasTagWarning (TagWarning _:_) = True + hasTagWarning _ = False + let (next : rest) = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = True } inp guard $ f next + -- we get a TagWarning on things like + -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> + -- which should NOT be parsed as an HTML tag, see #2277 + guard $ not $ hasTagWarning rest case next of TagComment s | "<!--" `isPrefixOf` inp -> do diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index ed79f9e3d..e08ac1607 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -195,6 +195,9 @@ tests = [ testGroup "inline code" [ "with unicode dash following" =: "<http://foo.bar>\8212" =?> para (autolink "http://foo.bar" <> str "\8212") + , "a partial URL (#2277)" =: + "<www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>" =?> + para (text "<www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>") ] , testGroup "Headers" [ "blank line before header" =: |