diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-08-11 11:21:49 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-08-11 11:22:39 -0700 |
commit | 6fae136cbb1ed28047e615217b5ce082875f5b19 (patch) | |
tree | dbc355f83904bde976f67cbff188ed8cededde70 | |
parent | 4a535211d8a9ef80e859925694e1b06e76f62196 (diff) | |
download | pandoc-6fae136cbb1ed28047e615217b5ce082875f5b19.tar.gz |
Textile reader: list and HTML block parsing improvements.
Closes #1513.
Lists can now start without an intervening blank line.
Also, html block-level tags that don't start a line are parsed
as RawInline and don't interrupt paragraphs, as in RedCloth.
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 29 | ||||
-rw-r--r-- | tests/textile-reader.native | 13 | ||||
-rw-r--r-- | tests/textile-reader.textile | 6 |
3 files changed, 26 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index cd34da942..ee64e8f2a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -56,7 +56,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) +import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match @@ -133,12 +133,9 @@ blockParsers = [ codeBlock , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para - , endBlock + , mempty <$ blanklines ] -endBlock :: Parser [Char] ParserState Blocks -endBlock = string "\n\n" >> return mempty - -- | Any block in the order of definition of blockParsers block :: Parser [Char] ParserState Blocks block = do @@ -193,7 +190,7 @@ header = try $ do attr <- attributes char '.' lookAhead whitespace - name <- trimInlines . mconcat <$> manyTill inline blockBreak + name <- trimInlines . mconcat <$> many inline attr' <- registerHeader attr name return $ B.headerWith attr' level name @@ -304,17 +301,12 @@ definitionListItem = try $ do ds <- parseFromString parseBlocks (s ++ "\n\n") return [ds] --- | This terminates a block such as a paragraph. Because of raw html --- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: Parser [Char] ParserState () -blockBreak = try (newline >> blanklines >> return ()) <|> - try (optional spaces >> lookAhead rawHtmlBlock >> return ()) - -- raw content -- | A raw Html Block, optionally followed by blanklines rawHtmlBlock :: Parser [Char] ParserState Blocks rawHtmlBlock = try $ do + skipMany spaceChar (_,b) <- htmlTag isBlockTag optional blanklines return $ B.rawBlock "html" b @@ -328,7 +320,7 @@ rawLaTeXBlock' = do -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Blocks -para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak +para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -505,11 +497,14 @@ whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break endline :: Parser [Char] ParserState Inlines endline = try $ do - newline >> notFollowedBy blankline + newline + notFollowedBy blankline + notFollowedBy listStart + notFollowedBy rawHtmlBlock return B.linebreak rawHtmlInline :: Parser [Char] ParserState Inlines -rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag +rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True) -- | Raw LaTeX Inline rawLaTeXInline' :: Parser [Char] ParserState Inlines @@ -561,7 +556,9 @@ escapedTag = B.str <$> -- | Any special symbol defined in wordBoundaries symbol :: Parser [Char] ParserState Inlines -symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) +symbol = B.str . singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code code :: Parser [Char] ParserState Inlines diff --git a/tests/textile-reader.native b/tests/textile-reader.native index f82c4a896..0a0b10bd3 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -67,6 +67,11 @@ Pandoc (Meta {unMeta = fromList []}) ,BulletList [[Plain [Str "one"]] ,[Plain [Str "two",LineBreak,Str "->",Space,Str "and",Space,Str "more"]]] +,Header 2 ("issue-1513",[],[]) [Str "Issue",Space,Str "#1513"] +,Para [Str "List:"] +,BulletList + [[Plain [Str "one"]] + ,[Plain [Str "two"]]] ,Header 2 ("definition-list",[],[]) [Str "Definition",Space,Str "List"] ,DefinitionList [([Str "coffee"], @@ -145,13 +150,9 @@ Pandoc (Meta {unMeta = fromList []}) ,RawBlock (Format "html") "<div class=\"foobar\">" ,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] ,RawBlock (Format "html") "</div>" -,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] -,RawBlock (Format "html") "<div>" -,Para [Str "inlined"] -,RawBlock (Format "html") "</div>" -,Para [Str "as",Space,Str "well."] +,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be",Space,RawInline (Format "html") "<div>",Str "inlined",RawInline (Format "html") "</div>",Space,Str "as",Space,Str "well."] ,BulletList - [[Plain [Str "this",Space,Str "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "</div>"]] + [[Plain [Str "this",Space,RawInline (Format "html") "<div>",Space,Str "won\8217t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,RawInline (Format "html") "</div>"]] ,[Plain [Str "but",Space,Str "this",Space,RawInline (Format "html") "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,RawInline (Format "html") "</strong>"]]] ,Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ,Header 1 ("raw-latex",[],[]) [Str "Raw",Space,Str "LaTeX"] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile index c0c0659b7..e1e143531 100644 --- a/tests/textile-reader.textile +++ b/tests/textile-reader.textile @@ -123,6 +123,12 @@ h2. Issue #1500 * two -> and more +h2. Issue #1513 + +List: +* one +* two + h2. Definition List - coffee := Hot and black |