From ca51bbbf166b8fd5d835426e1f499d251f2c110c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Nov 2010 20:02:37 -0800 Subject: HTML reader: don't parse raw HTML inside tag. Previously 'x' would be parsed as Code "x", which is not what you want. --- src/Text/Pandoc/Readers/HTML.hs | 4 ++-- tests/html-reader.html | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f47309d3f..5ccbc4fb1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -686,8 +686,8 @@ inline = choice [ charRef code :: GenParser Char ParserState Inline code = try $ do - result <- (htmlOpenTag "code" >> manyTill anyChar (htmlEndTag "code")) - <|> (htmlOpenTag "tt" >> manyTill anyChar (htmlEndTag "tt")) + result <- (htmlOpenTag "code" >> manyTill (noneOf "<>") (htmlEndTag "code")) + <|> (htmlOpenTag "tt" >> manyTill (noneOf "<>") (htmlEndTag "tt")) -- remove internal line breaks, leading and trailing space, -- and decode character references return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ diff --git a/tests/html-reader.html b/tests/html-reader.html index da6c075b3..a51ee3307 100644 --- a/tests/html-reader.html +++ b/tests/html-reader.html @@ -340,7 +340,7 @@ foo

This should just be an HTML comment:

So is this word.

This is strong and em.

So is this word.

-

This is code: >, $, \, \$, <html>.

+

This is code: >, $, \, \$, <html>.


Smart quotes, ellipses, dashes

"Hello," said the spider. "'Shelob' is my name."

@@ -442,8 +442,8 @@ Email link (nobody [at] nowhere.net)

Empty.

An e-mail address: nobody [at] nowhere.net

Blockquoted: http://example.com/

-

Auto-links should not occur here: <http://example.com/>

-
or here: <http://example.com/>
+

Auto-links should not occur here: <http://example.com/>

+
or here: <http://example.com/>
 

Images

-- cgit v1.2.3