aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-11 11:21:49 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-11 11:22:39 -0700
commit6fae136cbb1ed28047e615217b5ce082875f5b19 (patch)
treedbc355f83904bde976f67cbff188ed8cededde70
parent4a535211d8a9ef80e859925694e1b06e76f62196 (diff)
downloadpandoc-6fae136cbb1ed28047e615217b5ce082875f5b19.tar.gz
Textile reader: list and HTML block parsing improvements.
Closes #1513. Lists can now start without an intervening blank line. Also, html block-level tags that don't start a line are parsed as RawInline and don't interrupt paragraphs, as in RedCloth.
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs29
-rw-r--r--tests/textile-reader.native13
-rw-r--r--tests/textile-reader.textile6
3 files changed, 26 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index cd34da942..ee64e8f2a 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,7 +56,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
@@ -133,12 +133,9 @@ blockParsers = [ codeBlock
, rawLaTeXBlock'
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
- , endBlock
+ , mempty <$ blanklines
]
-endBlock :: Parser [Char] ParserState Blocks
-endBlock = string "\n\n" >> return mempty
-
-- | Any block in the order of definition of blockParsers
block :: Parser [Char] ParserState Blocks
block = do
@@ -193,7 +190,7 @@ header = try $ do
attr <- attributes
char '.'
lookAhead whitespace
- name <- trimInlines . mconcat <$> manyTill inline blockBreak
+ name <- trimInlines . mconcat <$> many inline
attr' <- registerHeader attr name
return $ B.headerWith attr' level name
@@ -304,17 +301,12 @@ definitionListItem = try $ do
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
--- | This terminates a block such as a paragraph. Because of raw html
--- blocks support, we have to lookAhead for a rawHtmlBlock.
-blockBreak :: Parser [Char] ParserState ()
-blockBreak = try (newline >> blanklines >> return ()) <|>
- try (optional spaces >> lookAhead rawHtmlBlock >> return ())
-
-- raw content
-- | A raw Html Block, optionally followed by blanklines
rawHtmlBlock :: Parser [Char] ParserState Blocks
rawHtmlBlock = try $ do
+ skipMany spaceChar
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ B.rawBlock "html" b
@@ -328,7 +320,7 @@ rawLaTeXBlock' = do
-- | In textile, paragraphs are separated by blank lines.
para :: Parser [Char] ParserState Blocks
-para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
+para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -505,11 +497,14 @@ whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
endline :: Parser [Char] ParserState Inlines
endline = try $ do
- newline >> notFollowedBy blankline
+ newline
+ notFollowedBy blankline
+ notFollowedBy listStart
+ notFollowedBy rawHtmlBlock
return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
-rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
-- | Raw LaTeX Inline
rawLaTeXInline' :: Parser [Char] ParserState Inlines
@@ -561,7 +556,9 @@ escapedTag = B.str <$>
-- | Any special symbol defined in wordBoundaries
symbol :: Parser [Char] ParserState Inlines
-symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
+symbol = B.str . singleton <$> (notFollowedBy newline *>
+ notFollowedBy rawHtmlBlock *>
+ oneOf wordBoundaries)
-- | Inline code
code :: Parser [Char] ParserState Inlines
diff --git a/tests/textile-reader.native b/tests/textile-reader.native
index f82c4a896..0a0b10bd3 100644
--- a/tests/textile-reader.native
+++ b/tests/textile-reader.native
@@ -67,6 +67,11 @@ Pandoc (Meta {unMeta = fromList []})
,BulletList
[[Plain [Str "one"]]
,[Plain [Str "two",LineBreak,Str "->",Space,Str "and",Space,Str "more"]]]
+,Header 2 ("issue-1513",[],[]) [Str "Issue",Space,Str "#1513"]
+,Para [Str "List:"]
+,BulletList
+ [[Plain [Str "one"]]
+ ,[Plain [Str "two"]]]
,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"]
,DefinitionList
[([Str "coffee"],
@@ -145,13 +150,9 @@ Pandoc (Meta {unMeta = fromList []})
,RawBlock (Format "html") "<div class=\"foobar\">"
,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"]
,RawBlock (Format "html") "</div>"
-,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"]
-,RawBlock (Format "html") "<div>"
-,Para [Str "inlined"]
-,RawBlock (Format "html") "</div>"
-,Para [Str "as",Space,Str "well."]
+,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."]
,BulletList
- [[Plain [Str "this",Space,Str "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "</div>"]]
+ [[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]]
,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]]
,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"]
,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"]
diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile
index c0c0659b7..e1e143531 100644
--- a/tests/textile-reader.textile
+++ b/tests/textile-reader.textile
@@ -123,6 +123,12 @@ h2. Issue #1500
* two
-> and more
+h2. Issue #1513
+
+List:
+* one
+* two
+
h2. Definition List
- coffee := Hot and black