aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-11 11:21:49 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-11 11:22:39 -0700
commit6fae136cbb1ed28047e615217b5ce082875f5b19 (patch)
treedbc355f83904bde976f67cbff188ed8cededde70 /src/Text
parent4a535211d8a9ef80e859925694e1b06e76f62196 (diff)
downloadpandoc-6fae136cbb1ed28047e615217b5ce082875f5b19.tar.gz
Textile reader: list and HTML block parsing improvements.
Closes #1513. Lists can now start without an intervening blank line. Also, html block-level tags that don't start a line are parsed as RawInline and don't interrupt paragraphs, as in RedCloth.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs29
1 files changed, 13 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index cd34da942..ee64e8f2a 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -56,7 +56,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
+import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
@@ -133,12 +133,9 @@ blockParsers = [ codeBlock
, rawLaTeXBlock'
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
- , endBlock
+ , mempty <$ blanklines
]
-endBlock :: Parser [Char] ParserState Blocks
-endBlock = string "\n\n" >> return mempty
-
-- | Any block in the order of definition of blockParsers
block :: Parser [Char] ParserState Blocks
block = do
@@ -193,7 +190,7 @@ header = try $ do
attr <- attributes
char '.'
lookAhead whitespace
- name <- trimInlines . mconcat <$> manyTill inline blockBreak
+ name <- trimInlines . mconcat <$> many inline
attr' <- registerHeader attr name
return $ B.headerWith attr' level name
@@ -304,17 +301,12 @@ definitionListItem = try $ do
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
--- | This terminates a block such as a paragraph. Because of raw html
--- blocks support, we have to lookAhead for a rawHtmlBlock.
-blockBreak :: Parser [Char] ParserState ()
-blockBreak = try (newline >> blanklines >> return ()) <|>
- try (optional spaces >> lookAhead rawHtmlBlock >> return ())
-
-- raw content
-- | A raw Html Block, optionally followed by blanklines
rawHtmlBlock :: Parser [Char] ParserState Blocks
rawHtmlBlock = try $ do
+ skipMany spaceChar
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ B.rawBlock "html" b
@@ -328,7 +320,7 @@ rawLaTeXBlock' = do
-- | In textile, paragraphs are separated by blank lines.
para :: Parser [Char] ParserState Blocks
-para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak
+para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -505,11 +497,14 @@ whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
endline :: Parser [Char] ParserState Inlines
endline = try $ do
- newline >> notFollowedBy blankline
+ newline
+ notFollowedBy blankline
+ notFollowedBy listStart
+ notFollowedBy rawHtmlBlock
return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
-rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
+rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
-- | Raw LaTeX Inline
rawLaTeXInline' :: Parser [Char] ParserState Inlines
@@ -561,7 +556,9 @@ escapedTag = B.str <$>
-- | Any special symbol defined in wordBoundaries
symbol :: Parser [Char] ParserState Inlines
-symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars)
+symbol = B.str . singleton <$> (notFollowedBy newline *>
+ notFollowedBy rawHtmlBlock *>
+ oneOf wordBoundaries)
-- | Inline code
code :: Parser [Char] ParserState Inlines