diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2013-07-25 09:45:23 -0700 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2013-07-25 09:45:23 -0700 |
commit | 85cc140744b01148da944a58948d9e4a87cb64c4 (patch) | |
tree | 5e455f550c39b8766629d0abcf4194613c859077 | |
parent | 2e5edbb27837372f658b1abbe05371be57415847 (diff) | |
download | pandoc-85cc140744b01148da944a58948d9e4a87cb64c4.tar.gz |
Textile reader: Improved handling of `<pre>` blocks.
* Closed #927 (a bug in which `<pre>` in certain contexts was
not recognized as a code block).
* Remove internal HTML tags in code blocks, rather than printing
them verbatim.
* Parse attributes on `<pre>` tag for code blocks.
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 14 | ||||
-rw-r--r-- | tests/textile-reader.native | 2 |
2 files changed, 11 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a1687a691..9191f6908 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) +import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..)) import Text.HTML.TagSoup.Match import Data.List ( intercalate ) import Data.Char ( digitToInt, isUpper ) @@ -152,8 +153,10 @@ codeBlockBc = try $ do -- | Code Blocks in Textile are between <pre> and </pre> codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do - htmlTag (tagOpen (=="pre") null) - result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) + (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) + result' <- (innerText . parseTags) `fmap` -- remove internal tags + manyTill anyChar (htmlTag (tagClose (=="pre"))) + optional blanklines -- drop leading newline if any let result'' = case result' of '\n':xs -> xs @@ -162,7 +165,10 @@ codeBlockPre = try $ do let result''' = case reverse result'' of '\n':_ -> init result'' _ -> result'' - return $ CodeBlock ("",[],[]) result''' + let classes = words $ fromAttrib "class" t + let ident = fromAttrib "id" t + let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ CodeBlock (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 header :: Parser [Char] ParserState Block @@ -275,7 +281,7 @@ definitionListItem = try $ do -- blocks support, we have to lookAhead for a rawHtmlBlock. blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> - (lookAhead rawHtmlBlock >> return ()) + try (optional spaces >> lookAhead rawHtmlBlock >> return ()) -- raw content diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 22a338d38..d14ae02c8 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -139,7 +139,7 @@ Pandoc (Meta {unMeta = fromList []}) ,Header 1 ("",[],[]) [Str "Raw",Space,Str "HTML"] ,Para [Str "However",Str ",",Space,RawInline "html" "<strong>",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,RawInline "html" "</strong>",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] ,RawBlock "html" "<div class=\"foobar\">" -,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold",LineBreak] +,Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold"] ,RawBlock "html" "</div>" ,Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] ,RawBlock "html" "<div>" |