diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-08-11 11:21:49 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-08-11 11:22:39 -0700 |
commit | 6fae136cbb1ed28047e615217b5ce082875f5b19 (patch) | |
tree | dbc355f83904bde976f67cbff188ed8cededde70 /src/Text | |
parent | 4a535211d8a9ef80e859925694e1b06e76f62196 (diff) | |
download | pandoc-6fae136cbb1ed28047e615217b5ce082875f5b19.tar.gz |
Textile reader: list and HTML block parsing improvements.
Closes #1513.
Lists can now start without an intervening blank line.
Also, html block-level tags that don't start a line are parsed
as RawInline and don't interrupt paragraphs, as in RedCloth.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 29 |
1 files changed, 13 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index cd34da942..ee64e8f2a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -56,7 +56,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) +import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match @@ -133,12 +133,9 @@ blockParsers = [ codeBlock , rawLaTeXBlock' , maybeExplicitBlock "table" table , maybeExplicitBlock "p" para - , endBlock + , mempty <$ blanklines ] -endBlock :: Parser [Char] ParserState Blocks -endBlock = string "\n\n" >> return mempty - -- | Any block in the order of definition of blockParsers block :: Parser [Char] ParserState Blocks block = do @@ -193,7 +190,7 @@ header = try $ do attr <- attributes char '.' lookAhead whitespace - name <- trimInlines . mconcat <$> manyTill inline blockBreak + name <- trimInlines . mconcat <$> many inline attr' <- registerHeader attr name return $ B.headerWith attr' level name @@ -304,17 +301,12 @@ definitionListItem = try $ do ds <- parseFromString parseBlocks (s ++ "\n\n") return [ds] --- | This terminates a block such as a paragraph. Because of raw html --- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: Parser [Char] ParserState () -blockBreak = try (newline >> blanklines >> return ()) <|> - try (optional spaces >> lookAhead rawHtmlBlock >> return ()) - -- raw content -- | A raw Html Block, optionally followed by blanklines rawHtmlBlock :: Parser [Char] ParserState Blocks rawHtmlBlock = try $ do + skipMany spaceChar (_,b) <- htmlTag isBlockTag optional blanklines return $ B.rawBlock "html" b @@ -328,7 +320,7 @@ rawLaTeXBlock' = do -- | In textile, paragraphs are separated by blank lines. para :: Parser [Char] ParserState Blocks -para = B.para . trimInlines . mconcat <$> manyTill inline blockBreak +para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -505,11 +497,14 @@ whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break endline :: Parser [Char] ParserState Inlines endline = try $ do - newline >> notFollowedBy blankline + newline + notFollowedBy blankline + notFollowedBy listStart + notFollowedBy rawHtmlBlock return B.linebreak rawHtmlInline :: Parser [Char] ParserState Inlines -rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag +rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True) -- | Raw LaTeX Inline rawLaTeXInline' :: Parser [Char] ParserState Inlines @@ -561,7 +556,9 @@ escapedTag = B.str <$> -- | Any special symbol defined in wordBoundaries symbol :: Parser [Char] ParserState Inlines -symbol = B.str . singleton <$> (oneOf wordBoundaries <|> oneOf markupChars) +symbol = B.str . singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code code :: Parser [Char] ParserState Inlines |