diff options
-rw-r--r-- | pandoc.cabal | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 61 | ||||
-rw-r--r-- | tests/RunTests.hs | 3 | ||||
-rw-r--r-- | tests/textile-reader.native | 120 | ||||
-rw-r--r-- | tests/textile-reader.textile | 156 |
5 files changed, 326 insertions, 18 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index ddcb94ee0..c49a42ca8 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats Description: Pandoc is a Haskell library for converting from one markup format to another, and a command-line tool that uses this library. It can read markdown and (subsets of) - reStructuredText, HTML, and LaTeX, and it can write + reStructuredText, HTML, LaTeX and Textile, and it can write markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT, RTF, MediaWiki, Textile, groff man pages, EPUB, and S5 and Slidy HTML slide shows. @@ -79,6 +79,7 @@ Extra-Source-Files: tests/latex-reader.native, tests/markdown-reader-more.txt, tests/markdown-reader-more.native, + tests/textile-reader.textile, tests/rst-reader.native, tests/rst-reader.rst, tests/s5.basic.html, @@ -193,6 +194,7 @@ Library Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.RST, Text.Pandoc.Readers.TeXMath, + Text.Pandoc.Readers.Textile, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.HTML, diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 3d759a944..5e4609c01 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -25,15 +25,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Stability : alpha Portability : portable -Conversion from Textile to 'Pandoc' document. +Conversion from Textile to 'Pandoc' document, based on the spec +available at http://redcloth.org/hobix.com/textile/ Implemented : - Paragraphs - Code blocks - Lists - blockquote - - Inlines : strong, emph, cite, code, deleted, inserted, superscript, subscript - + - Inlines : strong, emph, cite, code, deleted, inserted, superscript, + subscript, links Not implemented : - HTML-specific and CSS-specific inlines @@ -189,6 +190,7 @@ tableRow :: GenParser Char ParserState [TableCell] tableRow = try $ do char '|' cells <- endBy1 tableCell (char '|') + -- TODO : don't eat the last newline newline return cells @@ -206,25 +208,27 @@ tableHeaders = try $ do newline return headers --- | A table with an optional header +-- | A table with an optional header. Current implementation can +-- handle tables with and without header, but will parse cells +-- alignment attributes as content. table :: GenParser Char ParserState Block table = try $ do headers <- option [] tableHeaders rows <- tableRows + let nbOfCols = max (length headers) (length $ head rows) return $ Table [] - (replicate (length headers) AlignDefault) - (replicate (length headers) 0.0) + (replicate nbOfCols AlignDefault) + (replicate nbOfCols 0.0) headers rows + ---------- -- Inlines ---------- - - -- | Any inline element inline :: GenParser Char ParserState Inline inline = choice inlineParsers <?> "inline" @@ -248,10 +252,10 @@ inlineParsers = [ str , simpleInline (char '+') Inserted , simpleInline (char '^') Superscript , simpleInline (char '~') Subscript - -- , link - -- , image - -- , math - -- , autoLink + , link + , autoLink + , image + , image , symbol ] @@ -270,6 +274,29 @@ endline = try $ do newline >> notFollowedBy blankline return Space +link :: GenParser Char ParserState Inline +link = try $ do + name <- surrounded (char '"') inline + char ':' + url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline)))) + return $ Link name (url, "") + +-- | Detect plain links to http or email. +autoLink :: GenParser Char ParserState Inline +autoLink = do + (orig, src) <- uri -- (try uri <|> try emailAddress) + return $ Link [Str orig] (src, "") + +-- | image embedding +image :: GenParser Char ParserState Inline +image = try $ do + char '!' >> notFollowedBy space + src <- manyTill anyChar (lookAhead $ oneOf "!(") + alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) + char '!' + return $ Image [Str alt] (src, alt) + + -- | Any special symbol defined in specialChars symbol :: GenParser Char ParserState Inline symbol = do @@ -297,9 +324,9 @@ simpleInline border construct = surrounded border inline >>= -- TODO -- --- - Pandoc Meta Information +-- - Pandoc Meta Information (title, author, date) -- - footnotes --- - hyperlink "label":target --- - tables alignments --- - tests --- - Inserted inline handling in writers
\ No newline at end of file +-- - autolink is not called +-- - should autolink be shared through Parsing.hs ? +-- - Inserted inline handling in writers +-- - table parser is a bit too greedy and require a double newline after tables
\ No newline at end of file diff --git a/tests/RunTests.hs b/tests/RunTests.hs index cf2997a06..bd19c10bd 100644 --- a/tests/RunTests.hs +++ b/tests/RunTests.hs @@ -105,6 +105,8 @@ main = do "html-reader.html" "html-reader.native" r10 <- runTest "latex reader" ["-r", "latex", "-w", "native", "-s", "-R"] "latex-reader.latex" "latex-reader.native" + rTextile1 <- runTest "textile reader" ["-r", "textile", "-w", "native", "-s", "-R"] + "textile-reader.textile" "textile-reader.native" r11 <- runTest "native reader" ["-r", "native", "-w", "native", "-s"] "testsuite.native" "testsuite.native" r12s <- if runLhsTests @@ -119,6 +121,7 @@ main = do , r8, r8a -- rst , r9 -- html , r10 -- latex + , rTextile1 -- textile , r11 -- native ] ++ r12s ++ r13s if all id results diff --git a/tests/textile-reader.native b/tests/textile-reader.native new file mode 100644 index 000000000..788a79e51 --- /dev/null +++ b/tests/textile-reader.native @@ -0,0 +1,120 @@ +Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) +[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite."] +, Para [Strikeout [Str "-"],Str "-",Str "-"] +, Header 1 [Str "Headers"] +, Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] +, Header 3 [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]] +, Header 4 [Str "Level",Space,Str "4"] +, Header 5 [Str "Level",Space,Str "5"] +, Header 6 [Str "Level",Space,Str "6"] +, Header 1 [Str "Paragraphs"] +, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] +, Para [Str "Line",Space,Str "break",Space,Str "are",Space,Str "not",Space,Str "paragraph",Space,Str "break",Space,Str "in",Space,Str "textile,",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "wrap",Space,Str "your",Space,Str "very",Space,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor,",Space,Str "it",Space,Str "will",Space,Str "be",Space,Str "rendered",Space,Str "as",Space,Str "a",Space,Str "single",Space,Str "one."] +, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet."] +, BulletList + [ [ Plain [Str "criminey."] ] + ] +, Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break"] +, Para [Str "here."] +, Header 1 [Str "Block",Space,Str "Quotes"] +, BlockQuote + [ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody.",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",Space,Str "say,",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines."] ] + +, Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] +, Header 1 [Str "Code",Space,Str "Blocks"] +, Para [Str "Code",Str ":"] +, CodeBlock ("",[],[]) "\n ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab\n" +, Para [Str "And",Str ":"] +, CodeBlock ("",[],[]) "\n this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{\n" +, Header 1 [Str "Lists"] +, Header 2 [Str "Unordered"] +, Para [Str "Asterisks",Space,Str "tight",Str ":"] +, BulletList + [ [ Plain [Str "asterisk",Space,Str "1"] ] + , [ Plain [Str "asterisk",Space,Str "2"] ] + , [ Plain [Str "asterisk",Space,Str "3"] ] ] +, Header 2 [Str "Ordered"] +, Para [Str "Tight",Str ":"] +, OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "First"] ] + , [ Plain [Str "Second"] ] + , [ Plain [Str "Third"] ] ] +, Header 2 [Str "Nested"] +, BulletList + [ [ Plain [Str "ui",Space,Str "1"] + , BulletList + [ [ Plain [Str "ui",Space,Str "1.1"] + , OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "oi",Space,Str "1.1.1"] ] + , [ Plain [Str "oi",Space,Str "1.1.2"] ] ] ], [ Plain [Str "ui",Space,Str "1.2"] ] ] ], [ Plain [Str "ui",Space,Str "2"] + , OrderedList (1,DefaultStyle,DefaultDelim) + [ [ Plain [Str "oi",Space,Str "2.1"] + , BulletList + [ [ Plain [Str "ui",Space,Str "2.1.1"] ] + , [ Plain [Str "ui",Space,Str "2.1.2"] ] ] ] ] ] ] +, Header 1 [Str "Inline",Space,Str "Markup"] +, Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."] +, Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."] +, Para [Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] +, Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] +, Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str "."] +, Para [Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] +, Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str "."] +, Para [Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O."] +, Header 1 [Str "Links"] +, Header 2 [Str "Explicit"] +, Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] +, Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")] +, Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Str "http",Str ":",Str "//www.example.com",Space,Str "and",Space,Str "foobar",Str "@",Str "example.com."] +, Header 1 [Str "Tables"] +, Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"] +, Header 2 [Str "Without",Space,Str "headers"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ + ] [ + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ], + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ], + [ [ Plain [Str "archie"] ] + , [ Plain [Str "29"] ] + , [ Plain [Str "m"] ] ], + [ [ Plain [Str "bella"] ] + , [ Plain [Str "45"] ] + , [ Plain [Str "f"] ] ] ] +, Para [Str "And",Space,Str "some",Space,Str "text",Space,Str "..."] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ + ] [ + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ], + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ], + [ [ Plain [Str "archie"] ] + , [ Plain [Str "29"] ] + , [ Plain [Str "m"] ] ], + [ [ Plain [Str "bella"] ] + , [ Plain [Str "45"] ] + , [ Plain [Str "f"] ] ] ] +, Para [] +, Header 2 [Str "With",Space,Str "headers"] +, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] + [ [ Plain [Str "name"] ] + , [ Plain [Str "age"] ] + , [ Plain [Str "sex"] ] ] [ + [ [ Plain [Str "joan"] ] + , [ Plain [Str "24"] ] + , [ Plain [Str "f"] ] ], + [ [ Plain [Str "archie"] ] + , [ Plain [Str "29"] ] + , [ Plain [Str "m"] ] ], + [ [ Plain [Str "bella"] ] + , [ Plain [Str "45"] ] + , [ Plain [Str "f"] ] ] ] +, Para [] +, Header 1 [Str "Images"] +, Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",Space,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),Space,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ] diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile new file mode 100644 index 000000000..b4c10b1b1 --- /dev/null +++ b/tests/textile-reader.textile @@ -0,0 +1,156 @@ +This is a set of tests for pandoc. Most of them are adapted from John +Gruber's markdown test suite. + +----- + +h1. Headers + +h2. Level 2 with an "embeded link":http://www.example.com + +h3. Level 3 with *emphasis* + +h4. Level 4 + +h5. Level 5 + +h6. Level 6 + + +h1. Paragraphs + +Here's a regular paragraph. + +Line break are not paragraph break in textile, so you can wrap your +very long paragraph with your favourite text editor, it will be +rendered as a single one. + +Here's one with a bullet. + +* criminey. + +There should be a hard line break + +here. + +h1. Block Quotes + +bq. This is a famous quote from somebody. He had a lot of things to +say, so the text is really really long and spans on multiple lines. + +And a following paragraph. + +h1. Code Blocks + +Code: + +<pre> + ---- (should be four hyphens) + + sub status { + print "working"; + } + + this code block is indented by one tab +</pre> + +And: + +<pre> + this code block is indented by two tabs + + These should not be escaped: \$ \\ \> \[ \{ +</pre> + + +h1. Lists + +h2. Unordered + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 + +h2. Ordered + +Tight: + +# First +# Second +# Third + +h2. Nested + +* ui 1 +** ui 1.1 +### oi 1.1.1 +### oi 1.1.2 +** ui 1.2 +* ui 2 +## oi 2.1 +*** ui 2.1.1 +*** ui 2.1.2 + + +h1. Inline Markup + +This is _emphasized_, and so __is this__. + +This is *strong*, and so **is this**. + +A "*strong link*":http://www.foobar.com. + +_*This is strong and em.*_ + +So is *_this_* word and __**that one**__. + +-This is strikeout and *strong*- + +Superscripts: a^bc^d a^*hello*^ a^hello there^. + +Subscripts: H~2~O, H~23~O, H~many of them~O. + + +h1. Links + +h2. Explicit + +Just a "url":http://www.url.com + +"Email link":mailto:nobody@nowhere.net + +Automatic linking to http://www.example.com and foobar@example.com. + +h1. Tables + +Textile allows tables with and without headers : + +h2. Without headers + +| name | age | sex | +| joan | 24 | f | +| archie | 29 | m | +| bella | 45 | f | + +And some text ... + +| name | age | sex | +| joan | 24 | f | +| archie| 29 | m | +| bella | 45 | f | + + +h2. With headers + +|_. name |_. age |_. sex | +| joan | 24 | f | +| archie | 29 | m | +| bella | 45 | f | + + +h1. Images + +Textile inline image syntax, like +here !this_is_an_image.png(this is the alt text)! +and here !this_is_an_image.png!. |