diff options
-rw-r--r-- | pandoc.cabal | 4 | ||||
-rw-r--r-- | src/Text/Pandoc.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 421 | ||||
-rw-r--r-- | src/pandoc.hs | 2 | ||||
-rw-r--r-- | tests/RunTests.hs | 3 | ||||
-rw-r--r-- | tests/textile-reader.native | 128 | ||||
-rw-r--r-- | tests/textile-reader.textile | 180 |
9 files changed, 743 insertions, 4 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index bacb2790f..d8c281b09 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats Description: Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) - reStructuredText, HTML, and LaTeX, and it can write + reStructuredText, HTML, LaTeX and Textile, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT, RTF, MediaWiki, Textile, groff man pages, EPUB, and S5 and Slidy HTML slide shows. @@ -81,6 +81,7 @@ Extra-Source-Files: tests/latex-reader.native, tests/markdown-reader-more.txt, tests/markdown-reader-more.native, + tests/textile-reader.textile, tests/rst-reader.native, tests/rst-reader.rst, tests/s5.basic.html, @@ -190,6 +191,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.TeXMath, + Text.Pandoc.Readers.Textile, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.HTML, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 6cb8130a4..d11f084a5 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -62,6 +62,7 @@ module Text.Pandoc , readRST , readLaTeX , readHtml + , readTextile -- * Parser state used in readers , ParserState (..) , defaultParserState @@ -104,6 +105,7 @@ import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML +import Text.Pandoc.Readers.Textile import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 462267d89..fdc727170 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -30,12 +30,12 @@ Conversion of HTML to 'Pandoc' document. module Text.Pandoc.Readers.HTML ( readHtml, rawHtmlInline, - rawHtmlBlock, + rawHtmlBlock, + htmlTag, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, - htmlTag, htmlEndTag, extractTagType, htmlBlockElement, diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b664476b4..30012eaa5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -28,7 +28,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Markdown ( - readMarkdown + readMarkdown, + smartPunctuation ) where import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs new file mode 100644 index 000000000..7044cbad4 --- /dev/null +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -0,0 +1,421 @@ +{- +Copyright (C) 2010 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Textile + Copyright : Copyright (C) 2010 Paul Rivier + License : GNU GPL, version 2 or above + + Maintainer : Paul Rivier <paul*rivier#demotera*com> + Stability : alpha + Portability : portable + +Conversion from Textile to 'Pandoc' document, based on the spec +available at http://redcloth.org/textile. + +Implemented and parsed: + - Paragraphs + - Code blocks + - Lists + - blockquote + - Inlines : strong, emph, cite, code, deleted, superscript, + subscript, links, smart punctuation + +Implemented but discarded: + - HTML-specific and CSS-specific attributes + +Left to be implemented: + - Pandoc Meta Information (title, author, date) + - footnotes + - dimension sign + - registered, trademark, and copyright symbols + - acronyms + - uppercase + - definition lists + - continued blocks (ex bq..) + - + + + +TODO : refactor common patterns across readers : + - autolink + - smartPunctuation + - more ... + +-} + + +module Text.Pandoc.Readers.Textile ( + readTextile + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Parsing +import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag, -- find code blocks + rawHtmlBlock, rawHtmlInline ) +-- import Text.Pandoc.Readers.Markdown (smartPunctuation) +import Text.ParserCombinators.Parsec +import Data.Char ( digitToInt, isLetter ) +import Control.Monad ( guard ) + +-- | Parse a Textile text and return a Pandoc document. +readTextile :: ParserState -- ^ Parser state, including options for parser + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTextile state s = (readWith parseTextile) state (s ++ "\n\n") + + +-- +-- Constants and data structure definitions +-- + +-- | Special chars border strings parsing +specialChars :: [Char] +specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%" + +-- | Generate a Pandoc ADT from a textile document +parseTextile :: GenParser Char ParserState Pandoc +parseTextile = do + -- textile allows raw HTML and does smart punctuation by default + updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + many blankline + blocks <- parseBlocks + return $ Pandoc (Meta [Str ""] [[Str ""]] [Str ""]) blocks -- FIXME + +-- | Parse document blocks +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +-- | Block parsers list tried in definition order +blockParsers :: [GenParser Char ParserState Block] +blockParsers = [ codeBlock + , header + , blockQuote + , hrule + , anyList + , rawHtmlBlock' + , maybeExplicitBlock "table" table + , maybeExplicitBlock "p" para + , nullBlock ] + +-- | Any block in the order of definition of blockParsers +block :: GenParser Char ParserState Block +block = choice blockParsers <?> "block" + +-- | Code Blocks in Textile are between <pre> and </pre> +codeBlock :: GenParser Char ParserState Block +codeBlock = try $ do + htmlTag False "pre" + result' <- manyTill anyChar (try $ htmlEndTag "pre" >> blockBreak) + -- drop leading newline if any + let result'' = case result' of + '\n':xs -> xs + _ -> result' + -- drop trailing newline if any + let result''' = case reverse result'' of + '\n':_ -> init result'' + _ -> result'' + return $ CodeBlock ("",[],[]) result''' + +-- | Header of the form "hN. content" with N in 1..6 +header :: GenParser Char ParserState Block +header = try $ do + char 'h' + level <- oneOf "123456" >>= return . digitToInt + optional attributes + char '.' + whitespace + name <- manyTill inline blockBreak + return $ Header level (normalizeSpaces name) + +-- | Blockquote of the form "bq. content" +blockQuote :: GenParser Char ParserState Block +blockQuote = try $ do + string "bq" + optional attributes + char '.' + whitespace + para >>= return . BlockQuote . (:[]) + +-- Horizontal rule + +hrule :: GenParser Char st Block +hrule = try $ do + skipSpaces + start <- oneOf "-*" + count 2 (skipSpaces >> char start) + skipMany (spaceChar <|> char start) + newline + optional blanklines + return HorizontalRule + +-- Lists handling + +-- | Can be a bullet list or an ordered list. This implementation is +-- strict in the nesting, sublist must start at exactly "parent depth +-- plus one" +anyList :: GenParser Char ParserState Block +anyList = try $ do + l <- anyListAtDepth 1 + blanklines + return l + +-- | This allow one type of list to be nested into an other type, +-- provided correct nesting +anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth depth = choice [ bulletListAtDepth depth, + orderedListAtDepth depth ] + +-- | Bullet List of given depth, depth being the number of leading '*' +bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth depth = try $ do + items <- many1 (bulletListItemAtDepth depth) + return (BulletList items) + +-- | Bullet List Item of given depth, depth being the number of +-- leading '*' +bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth depth = try $ do + count depth (char '*') + optional attributes + whitespace + p <- inlines >>= return . Plain + sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[])) + return (p:sublist) + +-- | Ordered List of given depth, depth being the number of +-- leadingĀ '#' +orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth depth = try $ do + items <- many1 (orderedListItemAtDepth depth) + return (OrderedList (1, DefaultStyle, DefaultDelim) items) + +-- | Ordered List Item of given depth, depth being the number of +-- leadingĀ '#' +orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth depth = try $ do + count depth (char '#') + optional attributes + whitespace + p <- inlines >>= return . Plain + sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[])) + return (p:sublist) + +-- | This terminates a block such as a paragraph. Because of raw html +-- blocks support, we have to lookAhead for a rawHtmlBlock. +blockBreak :: GenParser Char ParserState () +blockBreak = try $ choice + [newline >> blanklines >> return (), + lookAhead rawHtmlBlock' >> return ()] + +-- | A raw Html Block, optionally followed by blanklines +rawHtmlBlock' :: GenParser Char ParserState Block +rawHtmlBlock' = try $ do + b <- rawHtmlBlock + optional blanklines + return b + +-- | In textile, paragraphs are separated by blank lines. +para :: GenParser Char ParserState Block +para = try $ do + content <- manyTill inline blockBreak + return $ Para $ normalizeSpaces content + + +-- Tables + +-- | A table cell spans until a pipe | +tableCell :: GenParser Char ParserState TableCell +tableCell = do + c <- many1 (noneOf "|\n") + content <- parseFromString (many1 inline) c + return $ [ Plain $ normalizeSpaces content ] + +-- | A table row is made of many table cells +tableRow :: GenParser Char ParserState [TableCell] +tableRow = try $ do + char '|' + cells <- endBy1 tableCell (char '|') + newline + return cells + +-- | Many table rows +tableRows :: GenParser Char ParserState [[TableCell]] +tableRows = many1 tableRow + +-- | Table headers are made of cells separated by a tag "|_." +tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders = try $ do + let separator = (try $ string "|_.") + separator + headers <- sepBy1 tableCell separator + char '|' + newline + return headers + +-- | A table with an optional header. Current implementation can +-- handle tables with and without header, but will parse cells +-- alignment attributes as content. +table :: GenParser Char ParserState Block +table = try $ do + headers <- option [] tableHeaders + rows <- tableRows + blanklines + let nbOfCols = max (length headers) (length $ head rows) + return $ Table [] + (replicate nbOfCols AlignDefault) + (replicate nbOfCols 0.0) + headers + rows + + +-- | Blocks like 'p' and 'table' do not need explicit block tag. +-- However, they can be used to set HTML/CSS attributes when needed. +maybeExplicitBlock :: String -- ^ block tag name + -> GenParser Char ParserState Block -- ^ implicit block + -> GenParser Char ParserState Block +maybeExplicitBlock name blk = try $ do + optional $ string name >> optional attributes >> char '.' >> + ((try whitespace) <|> endline) + blk + + + +---------- +-- Inlines +---------- + + +-- | Any inline element +inline :: GenParser Char ParserState Inline +inline = choice inlineParsers <?> "inline" + +-- | List of consecutive inlines before a newline +inlines :: GenParser Char ParserState [Inline] +inlines = manyTill inline newline + +-- | Inline parsers tried in order +inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers = [ autoLink + , str + , htmlSpan +-- , smartPunctuation -- from markdown reader + , whitespace + , endline + , rawHtmlInline + , code + , simpleInline (string "??") (Cite []) + , simpleInline (string "**") Strong + , simpleInline (string "__") Emph + , simpleInline (char '*') Strong + , simpleInline (char '_') Emph + , simpleInline (char '-') Strikeout + , simpleInline (char '^') Superscript + , simpleInline (char '~') Subscript + , link + , image + , symbol + ] + +-- | Any string +str :: GenParser Char ParserState Inline +str = do + xs <- many1 (noneOf (specialChars ++ "\t\n ")) + optional $ charsInBalanced '(' ')' -- drop acronym explanation + -- e.g. PBS(Public Broadcasting Service) + -- parse a following hyphen if followed by a letter + -- (this prevents unwanted interpretation as starting a strikeout section) + result <- option xs $ try $ do + guard $ not . null $ xs + char '-' + next <- lookAhead letter + guard $ isLetter (last xs) || isLetter next + return $ xs ++ "-" + return $ Str result + +-- | Textile allows HTML span infos, we discard them +htmlSpan :: GenParser Char ParserState Inline +htmlSpan = try $ do + char '%' + _ <- attributes + content <- manyTill anyChar (char '%') + return $ Str content + +-- | Some number of space chars +whitespace :: GenParser Char ParserState Inline +whitespace = many1 spaceChar >> return Space <?> "whitespace" + +-- | In Textile, an isolated endline character is a line break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline >> notFollowedBy blankline + return LineBreak + +-- | Textile standard link syntax is label:"target" +link :: GenParser Char ParserState Inline +link = try $ do + name <- surrounded (char '"') inline + char ':' + url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline)))) + return $ Link name (url, "") + +-- | Detect plain links to http or email. +autoLink :: GenParser Char ParserState Inline +autoLink = do + (orig, src) <- (try uri <|> try emailAddress) + return $ Link [Str orig] (src, "") + +-- | image embedding +image :: GenParser Char ParserState Inline +image = try $ do + char '!' >> notFollowedBy space + src <- manyTill anyChar (lookAhead $ oneOf "!(") + alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) + char '!' + return $ Image [Str alt] (src, alt) + +-- | Any special symbol defined in specialChars +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- | Inline code +code :: GenParser Char ParserState Inline +code = surrounded (char '@') anyChar >>= + return . Code + +-- | Html / CSS attributes +attributes :: GenParser Char ParserState String +attributes = choice [ enclosed (char '(') (char ')') anyChar, + enclosed (char '{') (char '}') anyChar, + enclosed (char '[') (char ']') anyChar] + +-- | Parses material surrounded by a parser. +surrounded :: GenParser Char st t -- ^ surrounding parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +surrounded border = enclosed border border + +-- | Inlines are most of the time of the same form +simpleInline :: GenParser Char ParserState t -- ^ surrounding parser + -> ([Inline] -> Inline) -- ^ Inline constructor + -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) +simpleInline border construct = surrounded border (inlineWithAttribute) >>= + return . construct . normalizeSpaces + where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/pandoc.hs b/src/pandoc.hs index f73391b6b..0cf694873 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -86,6 +86,7 @@ readers = [("native" , readPandoc) ,("markdown" , readMarkdown) ,("markdown+lhs" , readMarkdown) ,("rst" , readRST) + ,("textile" , readTextile) -- TODO : textile+lhs ,("rst+lhs" , readRST) ,("html" , readHtml) ,("latex" , readLaTeX) @@ -581,6 +582,7 @@ defaultReaderName fallback (x:xs) = ".ltx" -> "latex" ".rst" -> "rst" ".lhs" -> "markdown+lhs" + ".textile" -> "textile" ".native" -> "native" _ -> defaultReaderName fallback xs diff --git a/tests/RunTests.hs b/tests/RunTests.hs index 50ebcfc8e..b56b492ae 100644 --- a/tests/RunTests.hs +++ b/tests/RunTests.hs @@ -105,6 +105,8 @@ main = do "html-reader.html" "html-reader.native" r10 <- runTest "latex reader" ["-r", "latex", "-w", "native", "-s", "-R"] "latex-reader.latex" "latex-reader.native" + rTextile1 <- runTest "textile reader" ["-r", "textile", "-w", "native", "-s"] + "textile-reader.textile" "textile-reader.native" r11 <- runTest "native reader" ["-r", "native", "-w", "native", "-s"] "testsuite.native" "testsuite.native" r14s <- mapM (\style -> runTest ("markdown reader (citations) (" ++ style ++ ")") ["-r", "markdown", "-w", "markdown", "--bibliography", "biblio.bib", "--csl", style ++ ".csl", "--no-wrap"] "markdown-citations.txt" ("markdown-citations." ++ style ++ ".txt")) ["chicago-author-date","ieee","mhra"] @@ -121,6 +123,7 @@ main = do , r8, r8a -- rst , r9 -- html , r10 -- latex + , rTextile1 -- textile , r11 -- native ] ++ r12s ++ r13s ++ r14s if all id results diff --git a/tests/textile-reader.native b/tests/textile-reader.native new file mode 100644 index 000000000..d657521e6 --- /dev/null +++ b/tests/textile-reader.native @@ -0,0 +1,128 @@ +Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) +[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] +, HorizontalRule +, Header 1 [Str "Headers"] +, Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] +, Header 3 [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] +, Header 4 [Str "Level",Space,Str "4"] +, Header 5 [Str "Level",Space,Str "5"] +, Header 6 [Str "Level",Space,Str "6"] +, Header 1 [Str "Paragraphs"] +, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] +, Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] +, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] +, BulletList + [ [ Plain [Str "criminey",Str "."] ] + ] +, Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "paragraph",Space,Str "break",Space,Str "between",Space,Str "here"] +, Para [Str "and",Space,Str "here",Str "."] +, Header 1 [Str "Block",Space,Str "Quotes"] +, BlockQuote + [ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody",Str ".",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",LineBreak,Str "say",Str ",",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines",Str "."] ] + +, Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph",Str "."] +, Header 1 [Str "Code",Space,Str "Blocks"] +, Para [Str "Code",Str ":"] +, CodeBlock ("",[],[]) " ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab" +, Para [Str "And",Str ":"] +, CodeBlock ("",[],[]) " this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{" +, Header 1 [Str "Lists"] +, Header 2 [Str "Unordered"] +, Para [Str "Asterisks",Space,Str "tight",Str ":"] +, BulletList + [ [ Plain [Str "asterisk",Space,Str "1"] ] + , [ Plain [Str "asterisk",Space,Str "2"] ] + , [ Plain [Str "asterisk",Space,Str "3"] ] ] +, Header 2 [Str "Ordered"] +, Para [Str "Tight",Str ":"] +, OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "First"] ] + , [ Plain [Str "Second"] ] + , [ Plain [Str "Third"] ] ] +, Header 2 [Str "Nested"] +, BulletList + [ [ Plain [Str "ui",Space,Str "1"] + , BulletList + [ [ Plain [Str "ui",Space,Str "1",Str ".",Str "1"] + , OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "oi",Space,Str "1",Str ".",Str "1",Str ".",Str "1"] ] + , [ Plain [Str "oi",Space,Str "1",Str ".",Str "1",Str ".",Str "2"] ] ] ], [ Plain [Str "ui",Space,Str "1",Str ".",Str "2"] ] ] ], [ Plain [Str "ui",Space,Str "2"] + , OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "oi",Space,Str "2",Str ".",Str "1"] + , BulletList + [ [ Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "1"] ] + , [ Plain [Str "ui",Space,Str "2",Str ".",Str "1",Str ".",Str "2"] ] ] ] ] ] ] +, Header 1 [Str "Inline",Space,Str "Markup"] +, Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] +, Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] +, Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] +, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "-",Str "-",Space,Str "automatic",Space,Str "dashes",Str "."] +, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str ".",Str ".",Str ".",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] +, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Str "\"",Str "I",Str "'",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you",Str "\"",Space,Str "for",Space,Str "example",Str "."] +, Header 1 [Str "Links"] +, Header 2 [Str "Explicit"] +, Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] +, Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] +, Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Link [Str "http://www.example.com"] ("http://www.example.com",""),Space,Str "and",Space,Link [Str "foobar@example.com"] ("mailto:foobar@example.com",""),Str "."] +, Header 1 [Str "Tables"] +, Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"] +, Header 2 [Str "Without",Space,Str "headers"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ + ] [ + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ], + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ], + [ [ Plain [Str "archie"] ] + , [ Plain [Str "29"] ] + , [ Plain [Str "m"] ] ], + [ [ Plain [Str "bella"] ] + , [ Plain [Str "45"] ] + , [ Plain [Str "f"] ] ] ] +, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str ".",Str ".",Str "."] +, Header 2 [Str "With",Space,Str "headers"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ] [ + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ], + [ [ Plain [Str "archie"] ] + , [ Plain [Str "29"] ] + , [ Plain [Str "m"] ] ], + [ [ Plain [Str "bella"] ] + , [ Plain [Str "45"] ] + , [ Plain [Str "f"] ] ] ] +, Header 1 [Str "Images"] +, Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax",Str ",",Space,Str "like",Space,LineBreak,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),LineBreak,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] +, Header 1 [Str "Attributes"] +, Header 2 [Str "HTML",Space,Str "and",Space,Str "CSS",Space,Str "attributes",Space,Str "are",Space,Str "ignored"] +, Para [Str "as",Space,Str "well",Space,Str "as",Space,Strong [Str "inline",Space,Str "attributes"],Space,Str "of",Space,Str " all kind"] +, Para [Str "and",Space,Str "paragraph",Space,Str "attributes",Str ",",Space,Str "and",Space,Str "table",Space,Str "attributes",Str "."] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ + ] [ + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ], + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ] ] +, Header 1 [Str "Raw",Space,Str "HTML"] +, Para [Str "However",Str ",",Space,HtmlInline "<strong>",Space,Str "raw",Space,Str "HTML",Space,Str "inlines",Space,HtmlInline "</strong>",Space,Str "are",Space,Str "accepted",Str ",",Space,Str "as",Space,Str "well",Space,Str "as",Space,Str ":"] +, RawHtml "<div class=\"foobar\">" +, Para [Str "any",Space,Strong [Str "Raw",Space,Str "HTML",Space,Str "Block"],Space,Str "with",Space,Str "bold",LineBreak] +, RawHtml "</div>" +, Para [Str "Html",Space,Str "blocks",Space,Str "can",Space,Str "be"] +, RawHtml "<div>" +, Para [Str "inlined"] +, RawHtml "</div>" +, Para [Str "as",Space,Str "well",Str "."] +, BulletList + [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ] + , [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ] +, Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] ] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile new file mode 100644 index 000000000..0b65e11bb --- /dev/null +++ b/tests/textile-reader.textile @@ -0,0 +1,180 @@ +This is a set of tests for pandoc Textile Reader. Part of it comes +from John Gruber's markdown test suite. + +----- + +h1. Headers + +h2. Level 2 with an "embeded link":http://www.example.com + +h3. Level 3 with *emphasis* + +h4. Level 4 + +h5. Level 5 + +h6. Level 6 + + +h1. Paragraphs + +Here's a regular paragraph. + +Line breaks are preserved in textile, so you can not wrap your very +long paragraph with your favourite text editor and have it rendered +with no break. + + +Here's one with a bullet. + +* criminey. + +There should be a paragraph break between here + +and here. + +h1. Block Quotes + +bq. This is a famous quote from somebody. He had a lot of things to +say, so the text is really really long and spans on multiple lines. + +And a following paragraph. + +h1. Code Blocks + +Code: + +<pre> + ---- (should be four hyphens) + + sub status { + print "working"; + } + + this code block is indented by one tab +</pre> + +And: + +<pre> + this code block is indented by two tabs + + These should not be escaped: \$ \\ \> \[ \{ +</pre> + + +h1. Lists + +h2. Unordered + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 + +h2. Ordered + +Tight: + +# First +# Second +# Third + +h2. Nested + +* ui 1 +** ui 1.1 +### oi 1.1.1 +### oi 1.1.2 +** ui 1.2 +* ui 2 +## oi 2.1 +*** ui 2.1.1 +*** ui 2.1.2 + + +h1. Inline Markup + +This is _emphasized_, and so __is this__. +This is *strong*, and so **is this**. +A "*strong link*":http://www.foobar.com. + +_*This is strong and em.*_ +So is *_this_* word and __**that one**__. +-This is strikeout and *strong*- + +Superscripts: a^bc^d a^*hello*^ a^hello there^. +Subscripts: H~2~O, H~23~O, H~many of them~O. + +Dashes : How cool -- automatic dashes. + +Elipses : He thought and thought ... and then thought some more. + +Quotes and apostrophes : "I'd like to thank you" for example. + + +h1. Links + +h2. Explicit + +Just a "url":http://www.url.com + +"Email link":mailto:nobody@nowhere.net + +Automatic linking to http://www.example.com and foobar@example.com. + +h1. Tables + +Textile allows tables with and without headers : + +h2. Without headers + +| name | age | sex | +| joan | 24 | f | +| archie | 29 | m | +| bella | 45 | f | + +and some text following ... + +h2. With headers + +|_. name |_. age |_. sex | +| joan | 24 | f | +| archie | 29 | m | +| bella | 45 | f | + + + +h1. Images + +Textile inline image syntax, like +here !this_is_an_image.png(this is the alt text)! +and here !this_is_an_image.png!. + +h1. Attributes + +h2{color:red}. HTML and CSS attributes are ignored + +as well as *(foo)inline attributes* of %{color:red} all kind% + +p{color:green}. and paragraph attributes, and table attributes. + +table{foo:bar}. +| name | age | sex | +| joan | 24 | f | + +h1. Raw HTML + +However, <strong> raw HTML inlines </strong> are accepted, as well as : + +<div class="foobar"> + any *Raw HTML Block* with bold +</div> + +Html blocks can be <div>inlined</div> as well. + +* this <div> won't produce raw html blocks </div> +* but this <strong> will produce inline html </strong> + +Can you prove that 2 < 3 ? |