From 97c9691696744a6e56a28dea0221e6230b029ce4 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sat, 2 Jan 2016 22:28:07 -0800
Subject: Textile reader: don't allow block HTML tags in inline contexts.
The reader previously did allow this, following redcloth,
which happily parses
Html blocks can be inlined
as well.
as
Html blocks can be
inlined
as well.
This is invalid HTML, and this kind of thing can lead
to parsing problems (stack overflows) as well. So this
commit undoes this behavior. The above sample now produces;
Html blocks can be
as well.
---
src/Text/Pandoc/Readers/Textile.hs | 4 ++--
tests/textile-reader.native | 9 +++++----
tests/textile-reader.textile | 5 +----
3 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 355285f54..dd1d289a3 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -57,7 +57,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
@@ -504,7 +504,7 @@ endline = try $ do
return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
-rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
rawLaTeXInline' :: Parser [Char] ParserState Inlines
diff --git a/tests/textile-reader.native b/tests/textile-reader.native
index df727a8bb..fe2c7be24 100644
--- a/tests/textile-reader.native
+++ b/tests/textile-reader.native
@@ -150,10 +150,11 @@ Pandoc (Meta {unMeta = fromList []})
,RawBlock (Format "html") ""
,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
,RawBlock (Format "html") "
"
-,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "",Str "inlined",RawInline (Format "html") "
",Space,Str "as",Space,Str "well."]
-,BulletList
- [[Plain [Str "this",Space,RawInline (Format "html") "",Space,Str "won't",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "
"]]
- ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") ""]]]
+,Para [Str "Html",Space,Str "blocks",Space,Str "can"]
+,RawBlock (Format "html") ""
+,Para [Str "interrupt",Space,Str "paragraphs"]
+,RawBlock (Format "html") "
"
+,Para [Str "as",Space,Str "well."]
,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
,Header 1 ("acronyms-and-marks",[],[]) [Str "Acronyms",Space,Str "and",Space,Str "marks"]
,Para [Str "PBS (Public Broadcasting System)"]
diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile
index dab73b39f..d5d7378b0 100644
--- a/tests/textile-reader.textile
+++ b/tests/textile-reader.textile
@@ -228,10 +228,7 @@ However, raw HTML inlines are accepted, as well as :
any *Raw HTML Block* with bold
-Html blocks can be inlined
as well.
-
-* this won't produce raw html blocks
-* but this will produce inline html
+Html blocks can interrupt paragraphs
as well.
Can you prove that 2 < 3 ?
--
cgit v1.2.3