aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorpaul.rivier <paul.r.ml@gmail.com>2010-09-16 11:26:12 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-03 23:10:50 -0800
commitd532c72c5b3249ae83733d9fe064764d123603c0 (patch)
treeaac4917b7e23496d37ce5a2033dfb6bc59940044 /src/Text
parent3ffd7246173b8bdc0d25167355f55e3193aaa989 (diff)
downloadpandoc-d532c72c5b3249ae83733d9fe064764d123603c0.tar.gz
Basic Textile Reader
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs2
-rw-r--r--src/Text/Pandoc/Definition.hs1
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs3
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs253
-rw-r--r--src/Text/Pandoc/Shared.hs1
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs2
6 files changed, 261 insertions, 1 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 6cb8130a4..d11f084a5 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -62,6 +62,7 @@ module Text.Pandoc
, readRST
, readLaTeX
, readHtml
+ , readTextile
-- * Parser state used in readers
, ParserState (..)
, defaultParserState
@@ -104,6 +105,7 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Readers.Textile
import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index fffca3b2e..0972d5f4c 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -108,6 +108,7 @@ data Inline
| Emph [Inline] -- ^ Emphasized text (list of inlines)
| Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
| Strikeout [Inline] -- ^ Strikeout text (list of inlines)
+ | Inserted [Inline] -- ^ Underlined text (list of inlines)
| Superscript [Inline] -- ^ Superscripted text (list of inlines)
| Subscript [Inline] -- ^ Subscripted text (list of inlines)
| SmallCaps [Inline] -- ^ Small caps text (list of inlines)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5ccbc4fb1..fdc727170 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -30,7 +30,8 @@ Conversion of HTML to 'Pandoc' document.
module Text.Pandoc.Readers.HTML (
readHtml,
rawHtmlInline,
- rawHtmlBlock,
+ rawHtmlBlock,
+ htmlTag,
anyHtmlBlockTag,
anyHtmlInlineTag,
anyHtmlTag,
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
new file mode 100644
index 000000000..dba92ecd8
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -0,0 +1,253 @@
+{-
+Copyright (C) 2010 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Textile
+ Copyright : Copyright (C) 2010 Paul Rivier
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Paul Rivier <paul*rivier#demotera*com>
+ Stability : alpha
+ Portability : portable
+
+Conversion from Textile to 'Pandoc' document.
+
+Implemented :
+ - Paragraphs
+ - Code blocks
+ - Lists
+ - blockquote
+ - Inlines : strong, emph, cite, code, deleted, inserted, superscript, subscript
+
+
+Not implemented :
+ - HTML-specific and CSS-specific inlines
+-}
+
+
+module Text.Pandoc.Readers.Textile (
+ readTextile
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.HTML ( htmlTag, htmlEndTag )
+import Text.ParserCombinators.Parsec
+import Data.Char ( digitToInt )
+
+-- | Parse a Textile text and return a Pandoc document.
+readTextile :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
+
+
+--
+-- Constants and data structure definitions
+--
+
+-- | Special chars border strings parsing
+specialChars :: [Char]
+specialChars = "\\[]*#_@~<>!?-+^&'\";:"
+
+-- | Generate a Pandoc ADT from a textile document
+parseTextile :: GenParser Char ParserState Pandoc
+parseTextile = do
+ many blankline
+ blocks <- parseBlocks
+ return $ Pandoc (Meta [Str ""] [[Str ""]] [Str ""]) blocks -- FIXME
+
+-- | Parse document blocks
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = manyTill block eof
+
+-- | Block parsers list tried in definition order
+blockParsers :: [GenParser Char ParserState Block]
+blockParsers = [ codeBlock
+ , header
+ , blockQuote
+ , anyList
+ , para
+ , nullBlock ]
+
+-- | Any block in the order of definition of blockParsers
+block :: GenParser Char ParserState Block
+block = choice blockParsers <?> "block"
+
+-- | Code Blocks in Textile are between <pre> and </pre>
+codeBlock :: GenParser Char ParserState Block
+codeBlock = try $ do
+ htmlTag "pre"
+ content <- manyTill anyChar (try $ htmlEndTag "pre" >> blockBreak)
+ return $ CodeBlock ("",[],[]) content
+
+-- | Header of the form "hN. content" with N in 1..6
+header :: GenParser Char ParserState Block
+header = try $ do
+ char 'h'
+ level <- oneOf "123456" >>= return . digitToInt
+ char '.'
+ whitespace
+ name <- manyTill inline blockBreak
+ return $ Header level (normalizeSpaces name)
+
+-- | Blockquote of the form "bq. content"
+blockQuote :: GenParser Char ParserState Block
+blockQuote = try $ do
+ string "bq."
+ whitespace
+ para >>= return . BlockQuote . (:[])
+
+-- Lists handling
+
+-- | Can be a bullet list or an ordered list. This implementation is
+-- strict in the nesting, sublist must start at exactly "parent depth
+-- plus one"
+anyList :: GenParser Char ParserState Block
+anyList = try $ do
+ l <- anyListAtDepth 1
+ blanklines
+ return l
+
+-- | This allow one type of list to be nested into an other type,
+-- provided correct nesting
+anyListAtDepth :: Int -> GenParser Char ParserState Block
+anyListAtDepth depth = choice [ bulletListAtDepth depth,
+ orderedListAtDepth depth ]
+
+-- | Bullet List of given depth, depth being the number of leading '*'
+bulletListAtDepth :: Int -> GenParser Char ParserState Block
+bulletListAtDepth depth = try $ do
+ items <- many1 (bulletListItemAtDepth depth)
+ return (BulletList items)
+
+-- | Bullet List Item of given depth, depth being the number of
+-- leading '*'
+bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+bulletListItemAtDepth depth = try $ do
+ count depth (char '*')
+ whitespace
+ p <- inlines >>= return . Plain
+ sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
+ return (p:sublist)
+
+-- | Ordered List of given depth, depth being the number of
+-- leadingĀ '#'
+orderedListAtDepth :: Int -> GenParser Char ParserState Block
+orderedListAtDepth depth = try $ do
+ items <- many1 (orderedListItemAtDepth depth)
+ return (OrderedList (1, DefaultStyle, DefaultDelim) items)
+
+-- | Ordered List Item of given depth, depth being the number of
+-- leadingĀ '#'
+orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+orderedListItemAtDepth depth = try $ do
+ count depth (char '#')
+ whitespace
+ p <- inlines >>= return . Plain
+ sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
+ return (p:sublist)
+
+-- | This terminates a block such as a paragraph.
+blockBreak :: GenParser Char ParserState ()
+blockBreak = try $ newline >> blanklines >> return ()
+
+-- | In textile, paragraphs are separated by blank lines.
+para :: GenParser Char ParserState Block
+para = try $ do
+ content <- manyTill inline blockBreak
+ return $ Para $ normalizeSpaces content
+
+-- | Any inline element
+inline :: GenParser Char ParserState Inline
+inline = choice inlineParsers <?> "inline"
+
+-- | List of consecutive inlines before a newline
+inlines :: GenParser Char ParserState [Inline]
+inlines = manyTill inline newline
+
+-- | Inline parsers tried in order
+inlineParsers :: [GenParser Char ParserState Inline]
+inlineParsers = [ str
+ , whitespace
+ , endline
+ , code
+ , simpleInline (string "??") (Cite [])
+ , simpleInline (char '*') Strong
+ , simpleInline (char '_') Emph
+ , simpleInline (string "**") Strong
+ , simpleInline (string "__") Emph
+ , simpleInline (char '-') Strikeout
+ , simpleInline (char '+') Inserted
+ , simpleInline (char '^') Superscript
+ , simpleInline (char '~') Subscript
+ -- , link
+ -- , image
+ -- , math
+ -- , autoLink
+ , symbol
+ ]
+
+-- | Any string
+str :: GenParser Char ParserState Inline
+str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+
+-- | Some number of space chars
+whitespace :: GenParser Char ParserState Inline
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+-- | In Textile, an endline character that can be treated as a space,
+-- not a structural break
+endline :: GenParser Char ParserState Inline
+endline = try $ do
+ newline >> notFollowedBy blankline
+ return Space
+
+-- | Any special symbol defined in specialChars
+symbol :: GenParser Char ParserState Inline
+symbol = do
+ result <- oneOf specialChars
+ return $ Str [result]
+
+-- | Inline code
+code :: GenParser Char ParserState Inline
+code = surrounded (char '@') anyChar >>=
+ return . Code
+
+-- | Parses material surrounded by a parser.
+surrounded :: GenParser Char st t -- ^ surrounding parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+surrounded border = enclosed border border
+
+-- | Inlines are most of the time of the same form
+simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
+ -> ([Inline] -> Inline) -- ^ Inline constructor
+ -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
+simpleInline border construct = surrounded border inline >>=
+ return . construct . normalizeSpaces
+
+
+-- TODO
+--
+-- - Pandoc Meta Information
+-- - footnotes
+-- - hyperlink "label":target
+-- - tables
+-- - doc
+-- - tests
+-- - Inserted inline handling in writers \ No newline at end of file
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 0fdaf42f3..006c11c63 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -377,6 +377,7 @@ inlineListToIdentifier =
Str s -> s
Emph lst -> concatMap extractText lst
Strikeout lst -> concatMap extractText lst
+ Inserted lst -> concatMap extractText lst
Superscript lst -> concatMap extractText lst
SmallCaps lst -> concatMap extractText lst
Subscript lst -> concatMap extractText lst
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b8da4bec0..55403af42 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -454,6 +454,8 @@ inlineToHtml opts inline =
(Code str) -> return $ thecode << str
(Strikeout lst) -> inlineListToHtml opts lst >>=
return . (thespan ! [thestyle "text-decoration: line-through;"])
+ (Inserted lst) -> inlineListToHtml opts lst >>=
+ return . (thespan ! [thestyle "text-decoration: underline;"])
(SmallCaps lst) -> inlineListToHtml opts lst >>=
return . (thespan ! [thestyle "font-variant: small-caps;"])
(Superscript lst) -> inlineListToHtml opts lst >>= return . sup