aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Textile.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-22 20:25:15 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-30 13:55:40 -0800
commit904050fa36715e18522d80432a2666fcbaacd105 (patch)
tree4745876e797d400539dd80309d31c330a013e969 /src/Text/Pandoc/Readers/Textile.hs
parent220fe5fab89ce84fcb98f0430c4126281ca8362d (diff)
downloadpandoc-904050fa36715e18522d80432a2666fcbaacd105.tar.gz
New HTML reader using tagsoup as a lexer.
* The new reader is faster and more accurate. * API changes for Text.Pandoc.Readers.HTML: - removed rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement, htmlComment - added htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag * tagsoup is a new dependency. * Text.Pandoc.Parsing: Generalized type on readWith. * Benchmark.hs: Added length calculation to force full evaluation. * Updated HTML reader tests. * Updated markdown and textile readers to use the functions from the HTML reader. * Note: The markdown reader now correctly handles some cases it did not before. For example: <hr/> is reproduced without adding a space. <script> a = '<b>'; </script> is parsed correctly.
Diffstat (limited to 'src/Text/Pandoc/Readers/Textile.hs')
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs30
1 files changed, 15 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 52a9e12c8..8362c542c 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -58,10 +58,9 @@ module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks
- rawHtmlBlock, rawHtmlInline )
--- import Text.Pandoc.Readers.Markdown (smartPunctuation)
+import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
import Text.ParserCombinators.Parsec
+import Text.HTML.TagSoup.Match
import Data.Char ( digitToInt, isLetter )
import Control.Monad ( guard, liftM )
@@ -127,7 +126,7 @@ blockParsers = [ codeBlock
, blockQuote
, hrule
, anyList
- , rawHtmlBlock'
+ , rawHtmlBlock
, maybeExplicitBlock "table" table
, maybeExplicitBlock "p" para
, nullBlock ]
@@ -139,8 +138,8 @@ block = choice blockParsers <?> "block"
-- | Code Blocks in Textile are between <pre> and </pre>
codeBlock :: GenParser Char ParserState Block
codeBlock = try $ do
- htmlTag False "pre"
- result' <- manyTill anyChar (try $ htmlEndTag "pre" >> blockBreak)
+ htmlTag (tagOpen (=="pre") null)
+ result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
-- drop leading newline if any
let result'' = case result' of
'\n':xs -> xs
@@ -261,21 +260,19 @@ definitionListItem = try $ do
-- this ++ "\n\n" does not look very good
ds <- parseFromString parseBlocks (s ++ "\n\n")
return [ds]
-
-
+
-- | This terminates a block such as a paragraph. Because of raw html
-- blocks support, we have to lookAhead for a rawHtmlBlock.
blockBreak :: GenParser Char ParserState ()
-blockBreak = try $ choice
- [newline >> blanklines >> return (),
- lookAhead rawHtmlBlock' >> return ()]
+blockBreak = try (newline >> blanklines >> return ()) <|>
+ (lookAhead rawHtmlBlock >> return ())
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = try $ do
- b <- rawHtmlBlock
+rawHtmlBlock :: GenParser Char ParserState Block
+rawHtmlBlock = try $ do
+ (_,b) <- htmlTag isBlockTag
optional blanklines
- return b
+ return $ RawHtml b
-- | In textile, paragraphs are separated by blank lines.
para :: GenParser Char ParserState Block
@@ -450,6 +447,9 @@ endline = try $ do
newline >> notFollowedBy blankline
return LineBreak
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = liftM (HtmlInline . snd) $ htmlTag isInlineTag
+
-- | Textile standard link syntax is label:"target"
link :: GenParser Char ParserState Inline
link = try $ do