aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs110
-rw-r--r--src/Text/Pandoc/Blocks.hs145
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs327
-rw-r--r--src/Text/Pandoc/Definition.hs116
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs496
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs651
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs909
-rw-r--r--src/Text/Pandoc/Readers/RST.hs640
-rw-r--r--src/Text/Pandoc/Shared.hs792
-rw-r--r--src/Text/Pandoc/UTF8.hs45
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs248
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs299
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs458
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs310
-rw-r--r--src/Text/Pandoc/Writers/Man.hs293
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs373
-rw-r--r--src/Text/Pandoc/Writers/RST.hs325
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs286
18 files changed, 6823 insertions, 0 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
new file mode 100644
index 000000000..7633bf7ef
--- /dev/null
+++ b/src/Text/Pandoc.hs
@@ -0,0 +1,110 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+This helper module exports the main writers, readers, and data
+structure definitions from the Pandoc libraries.
+
+A typical application will chain together a reader and a writer
+to convert strings from one format to another. For example, the
+following simple program will act as a filter converting markdown
+fragments to reStructuredText, using reference-style links instead of
+inline links:
+
+> module Main where
+> import Text.Pandoc
+>
+> markdownToRST :: String -> String
+> markdownToRST = toUTF8 .
+> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
+> (readMarkdown defaultParserState) . fromUTF8
+>
+> main = interact markdownToRST
+
+-}
+
+module Text.Pandoc
+ (
+ -- * Definitions
+ module Text.Pandoc.Definition
+ -- * Readers: converting /to/ Pandoc format
+ , readMarkdown
+ , readRST
+ , readLaTeX
+ , readHtml
+ -- * Parser state used in readers
+ , ParserState (..)
+ , defaultParserState
+ , ParserContext (..)
+ , QuoteContext (..)
+ , KeyTable
+ , NoteTable
+ , HeaderType (..)
+ -- * Writers: converting /from/ Pandoc format
+ , writeMarkdown
+ , writeRST
+ , writeLaTeX
+ , writeConTeXt
+ , writeHtml
+ , writeHtmlString
+ , writeS5
+ , writeS5String
+ , writeDocbook
+ , writeMan
+ , writeRTF
+ , prettyPandoc
+ -- * Writer options used in writers
+ , WriterOptions (..)
+ , defaultWriterOptions
+ -- * Default headers for various output formats
+ , module Text.Pandoc.Writers.DefaultHeaders
+ -- * Functions for converting to and from UTF-8
+ , module Text.Pandoc.UTF8
+ -- * Version
+ , pandocVersion
+ ) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Readers.Markdown
+import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.LaTeX
+import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Writers.Markdown
+import Text.Pandoc.Writers.RST
+import Text.Pandoc.Writers.LaTeX
+import Text.Pandoc.Writers.ConTeXt
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.Docbook
+import Text.Pandoc.Writers.Man
+import Text.Pandoc.Writers.RTF
+import Text.Pandoc.Writers.DefaultHeaders
+import Text.Pandoc.UTF8
+import Text.Pandoc.Shared
+
+-- | Version number of pandoc library.
+pandocVersion :: String
+pandocVersion = "0.45"
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
new file mode 100644
index 000000000..cfc22cb3e
--- /dev/null
+++ b/src/Text/Pandoc/Blocks.hs
@@ -0,0 +1,145 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Blocks
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for the manipulation of fixed-width blocks of text.
+These are used in the construction of plain-text tables.
+-}
+
+module Text.Pandoc.Blocks
+ (
+ TextBlock (..),
+ docToBlock,
+ blockToDoc,
+ widthOfBlock,
+ heightOfBlock,
+ hcatBlocks,
+ hsepBlocks,
+ centerAlignBlock,
+ leftAlignBlock,
+ rightAlignBlock
+ )
+where
+import Text.PrettyPrint
+import Data.List ( intersperse )
+
+-- | A fixed-width block of text. Parameters are width of block,
+-- height of block, and list of lines.
+data TextBlock = TextBlock Int Int [String]
+instance Show TextBlock where
+ show x = show $ blockToDoc x
+
+-- | Break lines in a list of lines so that none are greater than
+-- a given width.
+breakLines :: Int -- ^ Maximum length of lines.
+ -> [String] -- ^ List of lines.
+ -> [String]
+breakLines width [] = []
+breakLines width (l:ls) =
+ if length l > width
+ then (take width l):(breakLines width ((drop width l):ls))
+ else l:(breakLines width ls)
+
+-- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
+docToBlock :: Int -- ^ Width of text block.
+ -> Doc -- ^ @Doc@ to convert.
+ -> TextBlock
+docToBlock width doc =
+ let rendered = renderStyle (style {lineLength = width,
+ ribbonsPerLine = 1}) doc
+ lns = breakLines width $ lines rendered
+ in TextBlock width (length lns) lns
+
+-- | Convert a @TextBlock@ to a @Doc@ element.
+blockToDoc :: TextBlock -> Doc
+blockToDoc (TextBlock _ _ lns) =
+ if null lns
+ then empty
+ else vcat $ map text lns
+
+-- | Returns width of a @TextBlock@ (number of columns).
+widthOfBlock :: TextBlock -> Int
+widthOfBlock (TextBlock width _ _) = width
+
+-- | Returns height of a @TextBlock@ (number of rows).
+heightOfBlock :: TextBlock -> Int
+heightOfBlock (TextBlock _ height _) = height
+
+-- | Pads a string out to a given width using spaces.
+hPad :: Int -- ^ Desired width.
+ -> String -- ^ String to pad.
+ -> String
+hPad width line =
+ let lineLength = length line
+ in if lineLength <= width
+ then line ++ replicate (width - lineLength) ' '
+ else take width line
+
+-- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in
+-- which they appear side by side.
+hcatBlocks :: [TextBlock] -> TextBlock
+hcatBlocks [] = TextBlock 0 0 []
+hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd.
+hcatBlocks ((TextBlock width1 height1 lns1):xs) =
+ let (TextBlock width2 height2 lns2) = hcatBlocks xs
+ height = max height1 height2
+ width = width1 + width2
+ lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) ""
+ lns2' = lns2 ++ replicate (height - height2) ""
+ lns = zipWith (++) lns1' lns2'
+ in TextBlock width height lns
+
+-- | Like @hcatBlocks@, but inserts space between the @TextBlock@s.
+hsepBlocks :: [TextBlock] -> TextBlock
+hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "]))
+
+isWhitespace x = x `elem` " \t"
+
+-- | Left-aligns the contents of a @TextBlock@ within the block.
+leftAlignBlock :: TextBlock -> TextBlock
+leftAlignBlock (TextBlock width height lns) =
+ TextBlock width height $ map (dropWhile isWhitespace) lns
+
+-- | Right-aligns the contents of a @TextBlock@ within the block.
+rightAlignBlock :: TextBlock -> TextBlock
+rightAlignBlock (TextBlock width height lns) =
+ let rightAlignLine ln =
+ let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln
+ in reverse (rest ++ spaces)
+ in TextBlock width height $ map rightAlignLine lns
+
+-- | Centers the contents of a @TextBlock@ within the block.
+centerAlignBlock :: TextBlock -> TextBlock
+centerAlignBlock (TextBlock width height lns) =
+ let centerAlignLine ln =
+ let ln' = hPad width ln
+ (startSpaces, rest) = span isWhitespace ln'
+ endSpaces = takeWhile isWhitespace (reverse ln')
+ numSpaces = length (startSpaces ++ endSpaces)
+ startSpaces' = replicate (quot numSpaces 2) ' '
+ in startSpaces' ++ rest
+ in TextBlock width height $ map centerAlignLine lns
+
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
new file mode 100644
index 000000000..466f5d8f4
--- /dev/null
+++ b/src/Text/Pandoc/CharacterReferences.hs
@@ -0,0 +1,327 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.CharacterReferences
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for parsing character references.
+-}
+module Text.Pandoc.CharacterReferences (
+ characterReference,
+ decodeCharacterReferences,
+ ) where
+import Data.Char ( chr )
+import Text.ParserCombinators.Parsec
+import qualified Data.Map as Map
+
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+ st <- char '&'
+ character <- numRef <|> entity
+ end <- char ';'
+ return character
+
+numRef :: GenParser Char st Char
+numRef = do
+ char '#'
+ num <- hexNum <|> decNum
+ return $ chr $ num
+
+hexNum :: GenParser Char st Int
+hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++)
+
+decNum :: GenParser Char st Int
+decNum = many1 digit >>= return . read
+
+entity :: GenParser Char st Char
+entity = do
+ body <- many1 alphaNum
+ return $ Map.findWithDefault '?' body entityTable
+
+-- | Convert entities in a string to characters.
+decodeCharacterReferences :: String -> String
+decodeCharacterReferences str =
+ case parse (many (characterReference <|> anyChar)) str str of
+ Left err -> error $ "\nError: " ++ show err
+ Right result -> result
+
+entityTable :: Map.Map String Char
+entityTable = Map.fromList entityTableList
+
+entityTableList :: [(String, Char)]
+entityTableList = [
+ ("quot", chr 34),
+ ("amp", chr 38),
+ ("lt", chr 60),
+ ("gt", chr 62),
+ ("nbsp", chr 160),
+ ("iexcl", chr 161),
+ ("cent", chr 162),
+ ("pound", chr 163),
+ ("curren", chr 164),
+ ("yen", chr 165),
+ ("brvbar", chr 166),
+ ("sect", chr 167),
+ ("uml", chr 168),
+ ("copy", chr 169),
+ ("ordf", chr 170),
+ ("laquo", chr 171),
+ ("not", chr 172),
+ ("shy", chr 173),
+ ("reg", chr 174),
+ ("macr", chr 175),
+ ("deg", chr 176),
+ ("plusmn", chr 177),
+ ("sup2", chr 178),
+ ("sup3", chr 179),
+ ("acute", chr 180),
+ ("micro", chr 181),
+ ("para", chr 182),
+ ("middot", chr 183),
+ ("cedil", chr 184),
+ ("sup1", chr 185),
+ ("ordm", chr 186),
+ ("raquo", chr 187),
+ ("frac14", chr 188),
+ ("frac12", chr 189),
+ ("frac34", chr 190),
+ ("iquest", chr 191),
+ ("Agrave", chr 192),
+ ("Aacute", chr 193),
+ ("Acirc", chr 194),
+ ("Atilde", chr 195),
+ ("Auml", chr 196),
+ ("Aring", chr 197),
+ ("AElig", chr 198),
+ ("Ccedil", chr 199),
+ ("Egrave", chr 200),
+ ("Eacute", chr 201),
+ ("Ecirc", chr 202),
+ ("Euml", chr 203),
+ ("Igrave", chr 204),
+ ("Iacute", chr 205),
+ ("Icirc", chr 206),
+ ("Iuml", chr 207),
+ ("ETH", chr 208),
+ ("Ntilde", chr 209),
+ ("Ograve", chr 210),
+ ("Oacute", chr 211),
+ ("Ocirc", chr 212),
+ ("Otilde", chr 213),
+ ("Ouml", chr 214),
+ ("times", chr 215),
+ ("Oslash", chr 216),
+ ("Ugrave", chr 217),
+ ("Uacute", chr 218),
+ ("Ucirc", chr 219),
+ ("Uuml", chr 220),
+ ("Yacute", chr 221),
+ ("THORN", chr 222),
+ ("szlig", chr 223),
+ ("agrave", chr 224),
+ ("aacute", chr 225),
+ ("acirc", chr 226),
+ ("atilde", chr 227),
+ ("auml", chr 228),
+ ("aring", chr 229),
+ ("aelig", chr 230),
+ ("ccedil", chr 231),
+ ("egrave", chr 232),
+ ("eacute", chr 233),
+ ("ecirc", chr 234),
+ ("euml", chr 235),
+ ("igrave", chr 236),
+ ("iacute", chr 237),
+ ("icirc", chr 238),
+ ("iuml", chr 239),
+ ("eth", chr 240),
+ ("ntilde", chr 241),
+ ("ograve", chr 242),
+ ("oacute", chr 243),
+ ("ocirc", chr 244),
+ ("otilde", chr 245),
+ ("ouml", chr 246),
+ ("divide", chr 247),
+ ("oslash", chr 248),
+ ("ugrave", chr 249),
+ ("uacute", chr 250),
+ ("ucirc", chr 251),
+ ("uuml", chr 252),
+ ("yacute", chr 253),
+ ("thorn", chr 254),
+ ("yuml", chr 255),
+ ("OElig", chr 338),
+ ("oelig", chr 339),
+ ("Scaron", chr 352),
+ ("scaron", chr 353),
+ ("Yuml", chr 376),
+ ("fnof", chr 402),
+ ("circ", chr 710),
+ ("tilde", chr 732),
+ ("Alpha", chr 913),
+ ("Beta", chr 914),
+ ("Gamma", chr 915),
+ ("Delta", chr 916),
+ ("Epsilon", chr 917),
+ ("Zeta", chr 918),
+ ("Eta", chr 919),
+ ("Theta", chr 920),
+ ("Iota", chr 921),
+ ("Kappa", chr 922),
+ ("Lambda", chr 923),
+ ("Mu", chr 924),
+ ("Nu", chr 925),
+ ("Xi", chr 926),
+ ("Omicron", chr 927),
+ ("Pi", chr 928),
+ ("Rho", chr 929),
+ ("Sigma", chr 931),
+ ("Tau", chr 932),
+ ("Upsilon", chr 933),
+ ("Phi", chr 934),
+ ("Chi", chr 935),
+ ("Psi", chr 936),
+ ("Omega", chr 937),
+ ("alpha", chr 945),
+ ("beta", chr 946),
+ ("gamma", chr 947),
+ ("delta", chr 948),
+ ("epsilon", chr 949),
+ ("zeta", chr 950),
+ ("eta", chr 951),
+ ("theta", chr 952),
+ ("iota", chr 953),
+ ("kappa", chr 954),
+ ("lambda", chr 955),
+ ("mu", chr 956),
+ ("nu", chr 957),
+ ("xi", chr 958),
+ ("omicron", chr 959),
+ ("pi", chr 960),
+ ("rho", chr 961),
+ ("sigmaf", chr 962),
+ ("sigma", chr 963),
+ ("tau", chr 964),
+ ("upsilon", chr 965),
+ ("phi", chr 966),
+ ("chi", chr 967),
+ ("psi", chr 968),
+ ("omega", chr 969),
+ ("thetasym", chr 977),
+ ("upsih", chr 978),
+ ("piv", chr 982),
+ ("ensp", chr 8194),
+ ("emsp", chr 8195),
+ ("thinsp", chr 8201),
+ ("zwnj", chr 8204),
+ ("zwj", chr 8205),
+ ("lrm", chr 8206),
+ ("rlm", chr 8207),
+ ("ndash", chr 8211),
+ ("mdash", chr 8212),
+ ("lsquo", chr 8216),
+ ("rsquo", chr 8217),
+ ("sbquo", chr 8218),
+ ("ldquo", chr 8220),
+ ("rdquo", chr 8221),
+ ("bdquo", chr 8222),
+ ("dagger", chr 8224),
+ ("Dagger", chr 8225),
+ ("bull", chr 8226),
+ ("hellip", chr 8230),
+ ("permil", chr 8240),
+ ("prime", chr 8242),
+ ("Prime", chr 8243),
+ ("lsaquo", chr 8249),
+ ("rsaquo", chr 8250),
+ ("oline", chr 8254),
+ ("frasl", chr 8260),
+ ("euro", chr 8364),
+ ("image", chr 8465),
+ ("weierp", chr 8472),
+ ("real", chr 8476),
+ ("trade", chr 8482),
+ ("alefsym", chr 8501),
+ ("larr", chr 8592),
+ ("uarr", chr 8593),
+ ("rarr", chr 8594),
+ ("darr", chr 8595),
+ ("harr", chr 8596),
+ ("crarr", chr 8629),
+ ("lArr", chr 8656),
+ ("uArr", chr 8657),
+ ("rArr", chr 8658),
+ ("dArr", chr 8659),
+ ("hArr", chr 8660),
+ ("forall", chr 8704),
+ ("part", chr 8706),
+ ("exist", chr 8707),
+ ("empty", chr 8709),
+ ("nabla", chr 8711),
+ ("isin", chr 8712),
+ ("notin", chr 8713),
+ ("ni", chr 8715),
+ ("prod", chr 8719),
+ ("sum", chr 8721),
+ ("minus", chr 8722),
+ ("lowast", chr 8727),
+ ("radic", chr 8730),
+ ("prop", chr 8733),
+ ("infin", chr 8734),
+ ("ang", chr 8736),
+ ("and", chr 8743),
+ ("or", chr 8744),
+ ("cap", chr 8745),
+ ("cup", chr 8746),
+ ("int", chr 8747),
+ ("there4", chr 8756),
+ ("sim", chr 8764),
+ ("cong", chr 8773),
+ ("asymp", chr 8776),
+ ("ne", chr 8800),
+ ("equiv", chr 8801),
+ ("le", chr 8804),
+ ("ge", chr 8805),
+ ("sub", chr 8834),
+ ("sup", chr 8835),
+ ("nsub", chr 8836),
+ ("sube", chr 8838),
+ ("supe", chr 8839),
+ ("oplus", chr 8853),
+ ("otimes", chr 8855),
+ ("perp", chr 8869),
+ ("sdot", chr 8901),
+ ("lceil", chr 8968),
+ ("rceil", chr 8969),
+ ("lfloor", chr 8970),
+ ("rfloor", chr 8971),
+ ("lang", chr 9001),
+ ("rang", chr 9002),
+ ("loz", chr 9674),
+ ("spades", chr 9824),
+ ("clubs", chr 9827),
+ ("hearts", chr 9829),
+ ("diams", chr 9830)
+ ]
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
new file mode 100644
index 000000000..7d1125c5a
--- /dev/null
+++ b/src/Text/Pandoc/Definition.hs
@@ -0,0 +1,116 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Definition
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definition of 'Pandoc' data structure for format-neutral representation
+of documents.
+-}
+module Text.Pandoc.Definition where
+
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show)
+
+-- | Bibliographic information for the document: title (list of 'Inline'),
+-- authors (list of strings), date (string).
+data Meta = Meta [Inline] -- title
+ [String] -- authors
+ String -- date
+ deriving (Eq, Show, Read)
+
+-- | Alignment of a table column.
+data Alignment = AlignLeft
+ | AlignRight
+ | AlignCenter
+ | AlignDefault deriving (Eq, Show, Read)
+
+-- | List attributes.
+type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
+
+-- | Style of list numbers.
+data ListNumberStyle = DefaultStyle
+ | Decimal
+ | LowerRoman
+ | UpperRoman
+ | LowerAlpha
+ | UpperAlpha deriving (Eq, Show, Read)
+
+-- | Delimiter of list numbers.
+data ListNumberDelim = DefaultDelim
+ | Period
+ | OneParen
+ | TwoParens deriving (Eq, Show, Read)
+
+-- | Block element.
+data Block
+ = Plain [Inline] -- ^ Plain text, not a paragraph
+ | Para [Inline] -- ^ Paragraph
+ | CodeBlock String -- ^ Code block (literal)
+ | RawHtml String -- ^ Raw HTML block (literal)
+ | BlockQuote [Block] -- ^ Block quote (list of blocks)
+ | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
+ -- and a list of items, each a list of blocks)
+ | BulletList [[Block]] -- ^ Bullet list (list of items, each
+ -- a list of blocks)
+ | DefinitionList [([Inline],[Block])] -- ^ Definition list
+ -- (list of items, each a pair of an inline list,
+ -- the term, and a block list)
+ | Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
+ | HorizontalRule -- ^ Horizontal rule
+ | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table,
+ -- with caption, column alignments,
+ -- relative column widths, column headers
+ -- (each a list of blocks), and rows
+ -- (each a list of lists of blocks)
+ | Null -- ^ Nothing
+ deriving (Eq, Read, Show)
+
+-- | Type of quotation marks to use in Quoted inline.
+data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read)
+
+type Target = (String, String) -- ^ Link target (URL, title)
+
+-- | Inline elements.
+data Inline
+ = Str String -- ^ Text (string)
+ | Emph [Inline] -- ^ Emphasized text (list of inlines)
+ | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
+ | Strikeout [Inline] -- ^ Strikeout text (list of inlines)
+ | Superscript [Inline] -- ^ Superscripted text (list of inlines)
+ | Subscript [Inline] -- ^ Subscripted text (list of inlines)
+ | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines)
+ | Code String -- ^ Inline code (literal)
+ | Space -- ^ Inter-word space
+ | EmDash -- ^ Em dash
+ | EnDash -- ^ En dash
+ | Apostrophe -- ^ Apostrophe
+ | Ellipses -- ^ Ellipses
+ | LineBreak -- ^ Hard line break
+ | TeX String -- ^ LaTeX code (literal)
+ | HtmlInline String -- ^ HTML code (literal)
+ | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
+ | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
+ -- and target
+ | Note [Block] -- ^ Footnote or endnote
+ deriving (Show, Eq, Read)
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
new file mode 100644
index 000000000..70a071152
--- /dev/null
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -0,0 +1,496 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.HTML
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of HTML to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.HTML (
+ readHtml,
+ rawHtmlInline,
+ rawHtmlBlock,
+ anyHtmlBlockTag,
+ anyHtmlInlineTag,
+ anyHtmlTag,
+ anyHtmlEndTag,
+ htmlEndTag,
+ extractTagType,
+ htmlBlockElement
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.CharacterReferences ( characterReference,
+ decodeCharacterReferences )
+import Data.Maybe ( fromMaybe )
+import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf )
+import Data.Char ( toUpper, toLower, isAlphaNum )
+
+-- | Convert HTML-formatted string to 'Pandoc' document.
+readHtml :: ParserState -- ^ Parser state
+ -> String -- ^ String to parse
+ -> Pandoc
+readHtml = readWith parseHtml
+
+--
+-- Constants
+--
+
+eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
+ "map", "area", "object", "script"]
+
+inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
+ "br", "cite", "code", "dfn", "em", "font", "i", "img",
+ "input", "kbd", "label", "q", "s", "samp", "select",
+ "small", "span", "strike", "strong", "sub", "sup",
+ "textarea", "tt", "u", "var"] ++ eitherBlockOrInline
+
+blockHtmlTags = ["address", "blockquote", "center", "dir", "div",
+ "dl", "fieldset", "form", "h1", "h2", "h3", "h4",
+ "h5", "h6", "hr", "isindex", "menu", "noframes",
+ "noscript", "ol", "p", "pre", "table", "ul", "dd",
+ "dt", "frameset", "li", "tbody", "td", "tfoot",
+ "th", "thead", "tr"] ++ eitherBlockOrInline
+
+--
+-- HTML utility functions
+--
+
+-- | Read blocks until end tag.
+blocksTilEnd tag = do
+ blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
+ return $ filter (/= Null) blocks
+
+-- | Read inlines until end tag.
+inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
+
+-- | Parse blocks between open and close tag.
+blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+
+-- | Parse inlines between open and close tag.
+inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+
+-- | Extract type from a tag: e.g. @br@ from @\<br\>@
+extractTagType :: String -> String
+extractTagType ('<':rest) =
+ let isSpaceOrSlash c = c `elem` "/ \n\t" in
+ map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
+extractTagType _ = ""
+
+-- | Parse any HTML tag (opening or self-closing) and return text of tag
+anyHtmlTag = try $ do
+ char '<'
+ spaces
+ tag <- many1 alphaNum
+ attribs <- many htmlAttribute
+ spaces
+ ender <- option "" (string "/")
+ let ender' = if null ender then "" else " /"
+ spaces
+ char '>'
+ return $ "<" ++ tag ++
+ concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
+
+anyHtmlEndTag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ tagType <- many1 alphaNum
+ spaces
+ char '>'
+ return $ "</" ++ tagType ++ ">"
+
+htmlTag :: String -> GenParser Char st (String, [(String, String)])
+htmlTag tag = try $ do
+ char '<'
+ spaces
+ stringAnyCase tag
+ attribs <- many htmlAttribute
+ spaces
+ optional (string "/")
+ spaces
+ char '>'
+ return (tag, (map (\(name, content, raw) -> (name, content)) attribs))
+
+-- parses a quoted html attribute value
+quoted quoteChar = do
+ result <- between (char quoteChar) (char quoteChar)
+ (many (noneOf [quoteChar]))
+ return (result, [quoteChar])
+
+htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute
+
+-- minimized boolean attribute
+htmlMinimizedAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ return (name, name, name)
+
+htmlRegularAttribute = try $ do
+ many1 space
+ name <- many1 (choice [letter, oneOf ".-_:"])
+ spaces
+ char '='
+ spaces
+ (content, quoteStr) <- choice [ (quoted '\''),
+ (quoted '"'),
+ (do
+ a <- many (alphaNum <|> (oneOf "-._:"))
+ return (a,"")) ]
+ return (name, content,
+ (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
+
+-- | Parse an end tag of type 'tag'
+htmlEndTag tag = try $ do
+ char '<'
+ spaces
+ char '/'
+ spaces
+ stringAnyCase tag
+ spaces
+ char '>'
+ return $ "</" ++ tag ++ ">"
+
+-- | Returns @True@ if the tag is (or can be) an inline tag.
+isInline tag = (extractTagType tag) `elem` inlineHtmlTags
+
+-- | Returns @True@ if the tag is (or can be) a block tag.
+isBlock tag = (extractTagType tag) `elem` blockHtmlTags
+
+anyHtmlBlockTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isBlock tag then return tag else fail "inline tag"
+
+anyHtmlInlineTag = try $ do
+ tag <- anyHtmlTag <|> anyHtmlEndTag
+ if isInline tag then return tag else fail "not an inline tag"
+
+-- | Parses material between script tags.
+-- Scripts must be treated differently, because they can contain '<>' etc.
+htmlScript = try $ do
+ open <- string "<script"
+ rest <- manyTill anyChar (htmlEndTag "script")
+ return $ open ++ rest ++ "</script>"
+
+htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ]
+
+rawHtmlBlock = try $ do
+ notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+ body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag
+ sp <- many space
+ state <- getState
+ if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null
+
+-- | Parses an HTML comment.
+htmlComment = try $ do
+ string "<!--"
+ comment <- manyTill anyChar (try (string "-->"))
+ return $ "<!--" ++ comment ++ "-->"
+
+--
+-- parsing documents
+--
+
+xmlDec = try $ do
+ string "<?"
+ rest <- manyTill anyChar (char '>')
+ return $ "<?" ++ rest ++ ">"
+
+definition = try $ do
+ string "<!"
+ rest <- manyTill anyChar (char '>')
+ return $ "<!" ++ rest ++ ">"
+
+nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >>
+ ((rawHtmlBlock >> return ' ') <|> anyChar)
+
+parseTitle = try $ do
+ (tag, _) <- htmlTag "title"
+ contents <- inlinesTilEnd tag
+ spaces
+ return contents
+
+-- parse header and return meta-information (for now, just title)
+parseHead = try $ do
+ htmlTag "head"
+ spaces
+ skipMany nonTitleNonHead
+ contents <- option [] parseTitle
+ skipMany nonTitleNonHead
+ htmlTag "/head"
+ return (contents, [], "")
+
+skipHtmlTag tag = optional (htmlTag tag)
+
+-- h1 class="title" representation of title in body
+bodyTitle = try $ do
+ (tag, attribs) <- htmlTag "h1"
+ cl <- case (extractAttribute "class" attribs) of
+ Just "title" -> return ""
+ otherwise -> fail "not title"
+ inlinesTilEnd "h1"
+
+parseHtml = do
+ sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
+ skipHtmlTag "html"
+ spaces
+ (title, authors, date) <- option ([], [], "") parseHead
+ spaces
+ skipHtmlTag "body"
+ spaces
+ optional bodyTitle -- skip title in body, because it's represented in meta
+ blocks <- parseBlocks
+ spaces
+ optional (htmlEndTag "body")
+ spaces
+ optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html>
+ eof
+ return $ Pandoc (Meta title authors date) blocks
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
+
+block = choice [ codeBlock
+ , header
+ , hrule
+ , list
+ , blockQuote
+ , para
+ , plain
+ , rawHtmlBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
+
+headerLevel n = try $ do
+ let level = "h" ++ show n
+ (tag, attribs) <- htmlTag level
+ contents <- inlinesTilEnd level
+ return $ Header n (normalizeSpaces contents)
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ (tag, attribs) <- htmlTag "hr"
+ state <- getState
+ if not (null attribs) && stateParseRaw state
+ then unexpected "attributes in hr" -- parse as raw in this case
+ else return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
+-- skipped, because they are not portable to output formats other than HTML.
+codeBlock = try $ do
+ htmlTag "pre"
+ result <- manyTill
+ (many1 (satisfy (/= '<')) <|>
+ ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
+ (htmlEndTag "pre")
+ let result' = concat result
+ -- drop leading newline if any
+ let result'' = if "\n" `isPrefixOf` result'
+ then drop 1 result'
+ else result'
+ -- drop trailing newline if any
+ let result''' = if "\n" `isSuffixOf` result''
+ then init result''
+ else result''
+ return $ CodeBlock $ decodeCharacterReferences result'''
+
+--
+-- block quotes
+--
+
+blockQuote = try $ htmlTag "blockquote" >> spaces >>
+ blocksTilEnd "blockquote" >>= (return . BlockQuote)
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+orderedList = try $ do
+ (_, attribs) <- htmlTag "ol"
+ (start, style) <- option (1, DefaultStyle) $
+ do failIfStrict
+ let sta = fromMaybe "1" $
+ lookup "start" attribs
+ let sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ let sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ return (read sta, sty')
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ol"
+ return $ OrderedList (start, style, DefaultDelim) items
+
+bulletList = try $ do
+ htmlTag "ul"
+ spaces
+ items <- sepEndBy1 (blocksIn "li") spaces
+ htmlEndTag "ul"
+ return $ BulletList items
+
+definitionList = try $ do
+ failIfStrict -- def lists not part of standard markdown
+ tag <- htmlTag "dl"
+ spaces
+ items <- sepEndBy1 definitionListItem spaces
+ htmlEndTag "dl"
+ return $ DefinitionList items
+
+definitionListItem = try $ do
+ terms <- sepEndBy1 (inlinesIn "dt") spaces
+ defs <- sepEndBy1 (blocksIn "dd") spaces
+ let term = joinWithSep [LineBreak] terms
+ return (term, concat defs)
+
+--
+-- paragraph block
+--
+
+para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
+ return . Para . normalizeSpaces
+
+--
+-- plain block
+--
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- inline
+--
+
+inline = choice [ charRef
+ , strong
+ , emph
+ , superscript
+ , subscript
+ , strikeout
+ , spanStrikeout
+ , code
+ , str
+ , linebreak
+ , whitespace
+ , link
+ , image
+ , rawHtmlInline
+ ] <?> "inline"
+
+code = try $ do
+ htmlTag "code"
+ result <- manyTill anyChar (htmlEndTag "code")
+ -- remove internal line breaks, leading and trailing space,
+ -- and decode character references
+ return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
+ joinWithSep " " $ lines result
+
+rawHtmlInline = do
+ result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag
+ state <- getState
+ if stateParseRaw state then return (HtmlInline result) else return (Str "")
+
+betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+ return . normalizeSpaces
+
+emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph
+
+strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
+
+superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
+
+subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
+
+strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
+ return . Strikeout
+
+spanStrikeout = try $ do
+ failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
+ (tag, attributes) <- htmlTag "span"
+ result <- case (extractAttribute "class" attributes) of
+ Just "strikeout" -> inlinesTilEnd "span"
+ _ -> fail "not a strikeout"
+ return $ Strikeout result
+
+whitespace = many1 space >> return Space
+
+-- hard line break
+linebreak = htmlTag "br" >> optional newline >> return LineBreak
+
+str = many1 (noneOf "<& \t\n") >>= return . Str
+
+--
+-- links and images
+--
+
+-- extract contents of attribute (attribute names are case-insensitive)
+extractAttribute name [] = Nothing
+extractAttribute name ((attrName, contents):rest) =
+ let name' = map toLower name
+ attrName' = map toLower attrName
+ in if attrName' == name'
+ then Just (decodeCharacterReferences contents)
+ else extractAttribute name rest
+
+link = try $ do
+ (tag, attributes) <- htmlTag "a"
+ url <- case (extractAttribute "href" attributes) of
+ Just url -> return url
+ Nothing -> fail "no href"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ label <- inlinesTilEnd "a"
+ return $ Link (normalizeSpaces label) (url, title)
+
+image = try $ do
+ (tag, attributes) <- htmlTag "img"
+ url <- case (extractAttribute "src" attributes) of
+ Just url -> return url
+ Nothing -> fail "no src"
+ let title = fromMaybe "" $ extractAttribute "title" attributes
+ let alt = fromMaybe "" (extractAttribute "alt" attributes)
+ return $ Image [Str alt] (url, title)
+
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
new file mode 100644
index 000000000..37cc2bfe4
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -0,0 +1,651 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.LaTeX
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.LaTeX (
+ readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXEnvironment
+ ) where
+
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Data.Maybe ( fromMaybe )
+import Data.Char ( chr )
+import Data.List ( isPrefixOf, isSuffixOf )
+
+-- | Parse LaTeX from string and return 'Pandoc' document.
+readLaTeX :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse
+ -> Pandoc
+readLaTeX = readWith parseLaTeX
+
+-- characters with special meaning
+specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+
+--
+-- utility functions
+--
+
+-- | Returns text between brackets and its matching pair.
+bracketedText openB closeB = do
+ result <- charsInBalanced' openB closeB
+ return $ [openB] ++ result ++ [closeB]
+
+-- | Returns an option or argument of a LaTeX command.
+optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+
+-- | True if the string begins with '{'.
+isArg ('{':rest) = True
+isArg other = False
+
+-- | Returns list of options and arguments of a LaTeX command.
+commandArgs = many optOrArg
+
+-- | Parses LaTeX command, returns (name, star, list of options or arguments).
+command = do
+ char '\\'
+ name <- many1 letter
+ star <- option "" (string "*") -- some commands have starred versions
+ args <- commandArgs
+ return (name, star, args)
+
+begin name = try $ do
+ string $ "\\begin{" ++ name ++ "}"
+ optional commandArgs
+ spaces
+ return name
+
+end name = try $ do
+ string $ "\\end{" ++ name ++ "}"
+ spaces
+ return name
+
+-- | Returns a list of block elements containing the contents of an
+-- environment.
+environment name = try $ begin name >> spaces >> manyTill block (end name)
+
+anyEnvironment = try $ do
+ string "\\begin{"
+ name <- many letter
+ star <- option "" (string "*") -- some environments have starred variants
+ char '}'
+ optional commandArgs
+ spaces
+ contents <- manyTill block (end (name ++ star))
+ return $ BlockQuote contents
+
+--
+-- parsing documents
+--
+
+-- | Process LaTeX preamble, extracting metadata.
+processLaTeXPreamble = try $ manyTill
+ (choice [bibliographic, comment, unknownCommand, nullBlock])
+ (try (string "\\begin{document}")) >>
+ spaces
+
+-- | Parse LaTeX and return 'Pandoc'.
+parseLaTeX = do
+ optional processLaTeXPreamble -- preamble might not be present (fragment)
+ spaces
+ blocks <- parseBlocks
+ spaces
+ optional $ try (string "\\end{document}" >> many anyChar)
+ -- might not be present (fragment)
+ spaces
+ eof
+ state <- getState
+ let blocks' = filter (/= Null) blocks
+ let title' = stateTitle state
+ let authors' = stateAuthors state
+ let date' = stateDate state
+ return $ Pandoc (Meta title' authors' date') blocks'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = spaces >> many block
+
+block = choice [ hrule
+ , codeBlock
+ , header
+ , list
+ , blockQuote
+ , mathBlock
+ , comment
+ , bibliographic
+ , para
+ , specialEnvironment
+ , itemBlock
+ , unknownEnvironment
+ , unknownCommand ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = try $ do
+ char '\\'
+ subs <- many (try (string "sub"))
+ string "section"
+ optional (char '*')
+ char '{'
+ title <- manyTill inline (char '}')
+ spaces
+ return $ Header (length subs + 1) (normalizeSpaces title)
+
+--
+-- hrule block
+--
+
+hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
+ "\\newpage" ] >> spaces >> return HorizontalRule
+
+--
+-- code blocks
+--
+
+codeBlock = codeBlock1 <|> codeBlock2
+
+codeBlock1 = try $ do
+ string "\\begin{verbatim}" -- don't use begin function because it
+ -- gobbles whitespace
+ optional blanklines -- we want to gobble blank lines, but not
+ -- leading space
+ contents <- manyTill anyChar (try (string "\\end{verbatim}"))
+ spaces
+ return $ CodeBlock (stripTrailingNewlines contents)
+
+codeBlock2 = try $ do
+ string "\\begin{Verbatim}" -- used by fancyvrb package
+ option "" blanklines
+ contents <- manyTill anyChar (try (string "\\end{Verbatim}"))
+ spaces
+ return $ CodeBlock (stripTrailingNewlines contents)
+
+--
+-- block quotes
+--
+
+blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
+ return . BlockQuote
+
+--
+-- math block
+--
+
+mathBlock = mathBlockWith (begin "equation") (end "equation") <|>
+ mathBlockWith (begin "displaymath") (end "displaymath") <|>
+ mathBlockWith (string "\\[") (string "\\]") <?> "math block"
+
+mathBlockWith start end = try $ do
+ start
+ spaces
+ result <- manyTill anyChar end
+ spaces
+ return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]]
+
+--
+-- list blocks
+--
+
+list = bulletList <|> orderedList <|> definitionList <?> "list"
+
+listItem = try $ do
+ ("item", _, args) <- command
+ spaces
+ state <- getState
+ let oldParserContext = stateParserContext state
+ updateState (\state -> state {stateParserContext = ListItemState})
+ blocks <- many block
+ updateState (\state -> state {stateParserContext = oldParserContext})
+ opt <- case args of
+ ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
+ parseFromString (many inline) $ tail $ init x
+ _ -> return []
+ return (opt, blocks)
+
+orderedList = try $ do
+ string "\\begin{enumerate}"
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ do failIfStrict
+ char '['
+ res <- anyOrderedListMarker
+ char ']'
+ return res
+ spaces
+ option "" $ try $ do string "\\setlength{\\itemindent}"
+ char '{'
+ manyTill anyChar (char '}')
+ spaces
+ start <- option 1 $ try $ do failIfStrict
+ string "\\setcounter{enum"
+ many1 (oneOf "iv")
+ string "}{"
+ num <- many1 digit
+ char '}'
+ spaces
+ return $ (read num) + 1
+ items <- many listItem
+ end "enumerate"
+ spaces
+ return $ OrderedList (start, style, delim) $ map snd items
+
+bulletList = try $ do
+ begin "itemize"
+ spaces
+ items <- many listItem
+ end "itemize"
+ spaces
+ return (BulletList $ map snd items)
+
+definitionList = try $ do
+ begin "description"
+ spaces
+ items <- many listItem
+ end "description"
+ spaces
+ return (DefinitionList items)
+
+--
+-- paragraph block
+--
+
+para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces
+
+--
+-- title authors date
+--
+
+bibliographic = choice [ maketitle, title, authors, date ]
+
+maketitle = try (string "\\maketitle") >> spaces >> return Null
+
+title = try $ do
+ string "\\title{"
+ tit <- manyTill inline (char '}')
+ spaces
+ updateState (\state -> state { stateTitle = tit })
+ return Null
+
+authors = try $ do
+ string "\\author{"
+ authors <- manyTill anyChar (char '}')
+ spaces
+ let authors' = map removeLeadingTrailingSpace $ lines $
+ substitute "\\\\" "\n" authors
+ updateState (\state -> state { stateAuthors = authors' })
+ return Null
+
+date = try $ do
+ string "\\date{"
+ date' <- manyTill anyChar (char '}')
+ spaces
+ updateState (\state -> state { stateDate = date' })
+ return Null
+
+--
+-- item block
+-- for use in unknown environments that aren't being parsed as raw latex
+--
+
+-- this forces items to be parsed in different blocks
+itemBlock = try $ do
+ ("item", _, args) <- command
+ state <- getState
+ if (stateParserContext state == ListItemState)
+ then fail "item should be handled by list block"
+ else if null args
+ then return Null
+ else return $ Plain [Str (stripFirstAndLast (head args))]
+
+--
+-- raw LaTeX
+--
+
+specialEnvironment = do -- these are always parsed as raw
+ lookAhead (choice (map (\name -> begin name) ["tabular", "figure",
+ "tabbing", "eqnarry", "picture", "table", "verse", "theorem"]))
+ rawLaTeXEnvironment
+
+-- | Parse any LaTeX environment and return a Para block containing
+-- the whole literal environment as raw TeX.
+rawLaTeXEnvironment :: GenParser Char st Block
+rawLaTeXEnvironment = try $ do
+ string "\\begin{"
+ name <- many1 letter
+ star <- option "" (string "*") -- for starred variants
+ let name' = name ++ star
+ char '}'
+ args <- option [] commandArgs
+ let argStr = concat args
+ contents <- manyTill (choice [ (many1 (noneOf "\\")),
+ (do
+ (Para [TeX str]) <- rawLaTeXEnvironment
+ return str),
+ string "\\" ])
+ (end name')
+ spaces
+ return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++
+ concat contents ++ "\\end{" ++ name' ++ "}"]
+
+unknownEnvironment = try $ do
+ state <- getState
+ result <- if stateParseRaw state -- check whether we should include raw TeX
+ then rawLaTeXEnvironment -- if so, get whole raw environment
+ else anyEnvironment -- otherwise just the contents
+ return result
+
+unknownCommand = try $ do
+ notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
+ "document"]
+ (name, star, args) <- command
+ spaces
+ let argStr = concat args
+ state <- getState
+ if name == "item" && (stateParserContext state) == ListItemState
+ then fail "should not be parsed as raw"
+ else string ""
+ if stateParseRaw state
+ then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)]
+ else return $ Plain [Str (joinWithSep " " args)]
+
+-- latex comment
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+
+--
+-- inline
+--
+
+inline = choice [ str
+ , endline
+ , whitespace
+ , quoted
+ , apostrophe
+ , spacer
+ , strong
+ , math
+ , ellipses
+ , emDash
+ , enDash
+ , hyphen
+ , emph
+ , strikeout
+ , superscript
+ , subscript
+ , ref
+ , lab
+ , code
+ , url
+ , link
+ , image
+ , footnote
+ , linebreak
+ , accentedChar
+ , specialChar
+ , rawLaTeXInline
+ , escapedChar
+ , unescapedChar
+ ] <?> "inline"
+
+accentedChar = normalAccentedChar <|> specialAccentedChar
+
+normalAccentedChar = try $ do
+ char '\\'
+ accent <- oneOf "'`^\"~"
+ character <- (try $ char '{' >> letter >>~ char '}') <|> letter
+ let table = fromMaybe [] $ lookup character accentTable
+ let result = case lookup accent table of
+ Just num -> chr num
+ Nothing -> '?'
+ return $ Str [result]
+
+-- an association list of letters and association list of accents
+-- and decimal character numbers.
+accentTable =
+ [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
+ ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
+ ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
+ ('N', [('~', 209)]),
+ ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
+ ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
+ ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
+ ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
+ ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
+ ('n', [('~', 241)]),
+ ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
+ ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
+
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+ oslash, pound, euro, copyright, sect ]
+
+ccedil = try $ do
+ char '\\'
+ letter <- oneOfStrings ["cc", "cC"]
+ let num = if letter == "cc" then 231 else 199
+ return $ Str [chr num]
+
+aring = try $ do
+ char '\\'
+ letter <- oneOfStrings ["aa", "AA"]
+ let num = if letter == "aa" then 229 else 197
+ return $ Str [chr num]
+
+iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 239])
+
+icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >>
+ return (Str [chr 238])
+
+szlig = try (string "\\ss") >> return (Str [chr 223])
+
+oslash = try $ do
+ char '\\'
+ letter <- choice [char 'o', char 'O']
+ let num = if letter == 'o' then 248 else 216
+ return $ Str [chr num]
+
+aelig = try $ do
+ char '\\'
+ letter <- oneOfStrings ["ae", "AE"]
+ let num = if letter == "ae" then 230 else 198
+ return $ Str [chr num]
+
+pound = try (string "\\pounds") >> return (Str [chr 163])
+
+euro = try (string "\\euro") >> return (Str [chr 8364])
+
+copyright = try (string "\\copyright") >> return (Str [chr 169])
+
+sect = try (string "\\S") >> return (Str [chr 167])
+
+escapedChar = do
+ result <- escaped (oneOf " $%&_#{}\n")
+ return $ if result == Str "\n" then Str " " else result
+
+-- ignore standalone, nonescaped special characters
+unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "")
+
+specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ]
+
+backslash = try (string "\\textbackslash") >> return (Str "\\")
+
+tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
+
+caret = try (string "\\^{}") >> return (Str "^")
+
+bar = try (string "\\textbar") >> return (Str "\\")
+
+lt = try (string "\\textless") >> return (Str "<")
+
+gt = try (string "\\textgreater") >> return (Str ">")
+
+doubleQuote = char '"' >> return (Str "\"")
+
+code = code1 <|> code2
+
+code1 = try $ do
+ string "\\verb"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ return $ Code $ removeLeadingTrailingSpace result
+
+code2 = try $ do
+ string "\\texttt{"
+ result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
+ return $ Code result
+
+emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
+ manyTill inline (char '}') >>= return . Emph
+
+strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
+ return . Strikeout
+
+superscript = try $ string "\\textsuperscript{" >>
+ manyTill inline (char '}') >>= return . Superscript
+
+-- note: \textsubscript isn't a standard latex command, but we use
+-- a defined version in pandoc.
+subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
+ return . Subscript
+
+apostrophe = char '\'' >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted
+
+singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+singleQuoteStart = char '`'
+
+singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
+
+doubleQuoteStart = string "``"
+
+doubleQuoteEnd = try $ string "''"
+
+ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >>
+ return Ellipses
+
+enDash = try (string "--") >> return EnDash
+
+emDash = try (string "---") >> return EmDash
+
+hyphen = char '-' >> return (Str "-")
+
+lab = try $ do
+ string "\\label{"
+ result <- manyTill anyChar (char '}')
+ return $ Str $ "(" ++ result ++ ")"
+
+ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
+
+strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
+ return . Strong
+
+whitespace = many1 (oneOf "~ \t") >> return Space
+
+-- hard line break
+linebreak = try (string "\\\\") >> return LineBreak
+
+spacer = try (string "\\,") >> return (Str "")
+
+str = many1 (noneOf specialChars) >>= return . Str
+
+-- endline internal to paragraph
+endline = try $ newline >> notFollowedBy blankline >> return Space
+
+-- math
+math = math1 <|> math2 <?> "math"
+
+math1 = try $ do
+ char '$'
+ result <- many (noneOf "$")
+ char '$'
+ return $ TeX ("$" ++ result ++ "$")
+
+math2 = try $ do
+ string "\\("
+ result <- many (noneOf "$")
+ string "\\)"
+ return $ TeX ("$" ++ result ++ "$")
+
+--
+-- links and images
+--
+
+url = try $ do
+ string "\\url"
+ url <- charsInBalanced '{' '}'
+ return $ Link [Code url] (url, "")
+
+link = try $ do
+ string "\\href{"
+ url <- manyTill anyChar (char '}')
+ char '{'
+ label <- manyTill inline (char '}')
+ return $ Link (normalizeSpaces label) (url, "")
+
+image = try $ do
+ ("includegraphics", _, args) <- command
+ let args' = filter isArg args -- filter out options
+ let src = if null args' then
+ ("", "")
+ else
+ (stripFirstAndLast (head args'), "")
+ return $ Image [Str "image"] src
+
+footnote = try $ do
+ (name, _, (contents:[])) <- command
+ if ((name == "footnote") || (name == "thanks"))
+ then string ""
+ else fail "not a footnote or thanks command"
+ let contents' = stripFirstAndLast contents
+ -- parse the extracted block, which may contain various block elements:
+ rest <- getInput
+ setInput $ contents'
+ blocks <- parseBlocks
+ setInput rest
+ return $ Note blocks
+
+-- | Parse any LaTeX command and return it in a raw TeX inline element.
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = try $ do
+ (name, star, args) <- command
+ state <- getState
+ if ((name == "begin") || (name == "end") || (name == "item"))
+ then fail "not an inline command"
+ else string ""
+ return $ TeX ("\\" ++ name ++ star ++ concat args)
+
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..df84c0ac7
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,909 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Markdown
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown
+ ) where
+
+import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
+import Data.Char ( isAlphaNum )
+import Network.URI ( isURI )
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
+ anyHtmlInlineTag, anyHtmlTag,
+ anyHtmlEndTag, htmlEndTag, extractTagType,
+ htmlBlockElement )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.ParserCombinators.Parsec
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars = " \t"
+bulletListMarkers = "*+-"
+hruleChars = "*-_"
+setextHChars = "=-"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\[]*_~`<>$!^-.&'\""
+
+--
+-- auxiliary functions
+--
+
+indentSpaces = try $ do
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count tabStop (char ' ')) <|>
+ (many (char ' ') >> string "\t") <?> "indentation"
+
+nonindentSpaces = do
+ state <- getState
+ let tabStop = stateTabStop state
+ sps <- many (char ' ')
+ if length sps < tabStop
+ then return sps
+ else unexpected "indented line"
+
+-- | Fail unless we're at beginning of a line.
+failUnlessBeginningOfLine = do
+ pos <- getPosition
+ if sourceColumn pos == 1 then return () else fail "not beginning of line"
+
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+ state <- getState
+ if stateSmart state then return () else fail "Smart typography feature"
+
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do
+ (Str res) <- inline
+ if res == str then return res else fail $ "unexpected Str content"
+
+-- | Parse a sequence of inline elements between a string
+-- @opener@ and a string @closer@, including inlines
+-- between balanced pairs of @opener@ and a @closer@.
+inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
+inlinesInBalanced opener closer = try $ do
+ string opener
+ result <- manyTill ( (do lookAhead (inlineString opener)
+ -- because it might be a link...
+ bal <- inlinesInBalanced opener closer
+ return $ [Str opener] ++ bal ++ [Str closer])
+ <|> (count 1 inline))
+ (try (string closer))
+ return $ concat result
+
+--
+-- document structure
+--
+
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
+
+authorsLine = try $ do
+ char '%'
+ skipSpaces
+ authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+ newline
+ return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
+
+dateLine = try $ do
+ char '%'
+ skipSpaces
+ date <- many (noneOf "\n")
+ newline
+ return $ decodeCharacterReferences $ removeTrailingSpace date
+
+titleBlock = try $ do
+ failIfStrict
+ title <- option [] titleLine
+ author <- option [] authorsLine
+ date <- option "" dateLine
+ optional blanklines
+ return (title, author, date)
+
+parseMarkdown = do
+ -- markdown allows raw HTML
+ updateState (\state -> state { stateParseRaw = True })
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
+ return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ -- go through again for notes unless strict...
+ if stateStrict st
+ then return ()
+ else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
+ return . concat
+ st <- getState
+ let reversedNotes = stateNotes st
+ updateState $ \st -> st { stateNotes = reverse reversedNotes }
+ setInput docMinusNotes
+ setPosition startPos
+ -- now parse it for real...
+ (title, author, date) <- option ([],[],"") titleBlock
+ blocks <- parseBlocks
+ return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+
+--
+-- initial pass for references and notes
+--
+
+referenceKey = try $ do
+ startPos <- getPosition
+ nonindentSpaces
+ label <- reference
+ char ':'
+ skipSpaces
+ optional (char '<')
+ src <- many (noneOf "> \n\t")
+ optional (char '>')
+ tit <- option "" referenceTitle
+ blanklines
+ endPos <- getPosition
+ let newkey = (label, (removeTrailingSpace src, tit))
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \st -> st { stateKeys = newkey : oldkeys }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+referenceTitle = try $ do
+ (many1 spaceChar >> option '\n' newline) <|> newline
+ skipSpaces
+ tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
+ <|> do delim <- char '\'' <|> char '"'
+ manyTill anyChar (try (char delim >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
+
+noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
+
+rawLine = do
+ notFollowedBy blankline
+ notFollowedBy' noteMarker
+ contents <- many1 nonEndline
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
+ return $ contents ++ end
+
+rawLines = many1 rawLine >>= return . concat
+
+noteBlock = try $ do
+ startPos <- getPosition
+ ref <- noteMarker
+ char ':'
+ optional blankline
+ optional indentSpaces
+ raw <- sepBy rawLines (try (blankline >> indentSpaces))
+ optional blanklines
+ endPos <- getPosition
+ -- parse the extracted text, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+ let newnote = (ref, contents)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \st -> st { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ header
+ , table
+ , codeBlock
+ , hrule
+ , list
+ , blockQuote
+ , htmlBlock
+ , rawLaTeXEnvironment'
+ , para
+ , plain
+ , nullBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = atxHeader <|> setextHeader <?> "header"
+
+atxHeader = try $ do
+ level <- many1 (char '#') >>= return . length
+ notFollowedBy (char '.' <|> char ')') -- this would be a list
+ skipSpaces
+ text <- manyTill inline atxClosing >>= return . normalizeSpaces
+ return $ Header level text
+
+atxClosing = try $ skipMany (char '#') >> blanklines
+
+setextHeader = try $ do
+ -- first, see if this block has any chance of being a setextHeader:
+ lookAhead (anyLine >> oneOf setextHChars)
+ text <- many1Till inline newline >>= return . normalizeSpaces
+ level <- choice $ zipWith
+ (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
+ setextHChars [1..(length setextHChars)]
+ return $ Header level text
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ skipSpaces
+ start <- oneOf hruleChars
+ count 2 (skipSpaces >> char start)
+ skipMany (skipSpaces >> char start)
+ newline
+ optional blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
+
+codeBlock = do
+ contents <- many1 (indentedLine <|>
+ try (do b <- blanklines
+ l <- indentedLine
+ return $ b ++ l))
+ optional blanklines
+ return $ CodeBlock $ stripTrailingNewlines $ concat contents
+
+--
+-- block quotes
+--
+
+emacsBoxQuote = try $ do
+ failIfStrict
+ string ",----"
+ manyTill anyChar newline
+ raw <- manyTill
+ (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+ (try (string "`----"))
+ blanklines
+ return raw
+
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
+
+emailBlockQuote = try $ do
+ emailBlockQuoteStart
+ raw <- sepBy (many (nonEndline <|>
+ (try (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n'))))
+ (try (newline >> emailBlockQuoteStart))
+ newline <|> (eof >> return '\n')
+ optional blanklines
+ return raw
+
+blockQuote = do
+ raw <- emailBlockQuote <|> emacsBoxQuote
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+bulletListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy' hrule -- because hrules start out just like lists
+ oneOf bulletListMarkers
+ spaceChar
+ skipSpaces
+
+anyOrderedListStart = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ notFollowedBy $ string "p." >> spaceChar >> digit -- page number
+ state <- getState
+ if stateStrict state
+ then do many1 digit
+ char '.'
+ spaceChar
+ return (1, DefaultStyle, DefaultDelim)
+ else anyOrderedListMarker >>~ spaceChar
+
+orderedListStart style delim = try $ do
+ optional newline -- if preceded by a Plain block in a list context
+ nonindentSpaces
+ state <- getState
+ num <- if stateStrict state
+ then do many1 digit
+ char '.'
+ return 1
+ else orderedListMarker style delim
+ if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+ num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then char '\t' <|> (spaceChar >> spaceChar)
+ else spaceChar
+ skipSpaces
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine start = try $ do
+ notFollowedBy' start
+ notFollowedBy blankline
+ notFollowedBy' (do indentSpaces
+ many (spaceChar)
+ bulletListStart <|> (anyOrderedListStart >> return ()))
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = try $ do
+ start
+ result <- many1 (listLine start)
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+-- continuation of a list item - indented and separated by blankline
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation start = try $ do
+ lookAhead indentSpaces
+ result <- many1 (listContinuationLine start)
+ blanks <- many blankline
+ return $ concat result ++ blanks
+
+listContinuationLine start = try $ do
+ notFollowedBy blankline
+ notFollowedBy' start
+ optional indentSpaces
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+listItem start = try $ do
+ first <- rawListItem start
+ continuations <- many (listContinuation start)
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may contain various block elements:
+ let raw = concat (first:continuations)
+ contents <- parseFromString parseBlocks raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return contents
+
+orderedList = try $ do
+ (start, style, delim) <- lookAhead anyOrderedListStart
+ items <- many1 (listItem (orderedListStart style delim))
+ return $ OrderedList (start, style, delim) $ compactify items
+
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
+
+-- definition lists
+
+definitionListItem = try $ do
+ notFollowedBy blankline
+ notFollowedBy' indentSpaces
+ -- first, see if this has any chance of being a definition list:
+ lookAhead (anyLine >> char ':')
+ term <- manyTill inline newline
+ raw <- many1 defRawBlock
+ state <- getState
+ let oldContext = stateParserContext state
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ concat raw
+ updateState (\st -> st {stateParserContext = oldContext})
+ return ((normalizeSpaces term), contents)
+
+defRawBlock = try $ do
+ char ':'
+ state <- getState
+ let tabStop = stateTabStop state
+ try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
+ firstline <- anyLine
+ rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
+ trailing <- option "" blanklines
+ return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
+
+definitionList = do
+ failIfStrict
+ items <- many1 definitionListItem
+ let (terms, defs) = unzip items
+ let defs' = compactify defs
+ let items' = zip terms defs'
+ return $ DefinitionList items'
+
+--
+-- paragraph block
+--
+
+para = try $ do
+ result <- many1 inline
+ newline
+ blanklines <|> do st <- getState
+ if stateStrict st
+ then lookAhead (blockQuote <|> header) >> return ""
+ else lookAhead emacsBoxQuote >> return ""
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- raw html
+--
+
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+
+htmlBlock = do
+ st <- getState
+ if stateStrict st
+ then try $ do failUnlessBeginningOfLine
+ first <- htmlElement
+ finalSpace <- many (oneOf spaceChars)
+ finalNewlines <- many newline
+ return $ RawHtml $ first ++ finalSpace ++ finalNewlines
+ else rawHtmlBlocks
+
+-- True if tag is self-closing
+isSelfClosing tag =
+ isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+
+strictHtmlBlock = try $ do
+ tag <- anyHtmlBlockTag
+ let tag' = extractTagType tag
+ if isSelfClosing tag || tag' == "hr"
+ then return tag
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ (htmlElement <|> (count 1 anyChar)))
+ end <- htmlEndTag tag'
+ return $ tag ++ concat contents ++ end
+
+rawHtmlBlocks = do
+ htmlBlocks <- many1 rawHtmlBlock
+ let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+ let combined' = if not (null combined) && last combined == '\n'
+ then init combined -- strip extra newline
+ else combined
+ return $ RawHtml combined'
+
+--
+-- LaTeX
+--
+
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
+
+--
+-- Tables
+--
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine ch = do
+ dashes <- many1 (char ch)
+ sp <- many spaceChar
+ return $ (length dashes, length $ dashes ++ sp)
+
+-- Parse a table header with dashed lines of '-' preceded by
+-- one line of text.
+simpleTableHeader = try $ do
+ rawContent <- anyLine
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeads = tail $ splitByIndices (init indices) rawContent
+ let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+ return (rawHeads, aligns, indices)
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+ notFollowedBy' (blanklines <|> tableFooter)
+ line <- many1Till anyChar newline
+ return $ map removeLeadingTrailingSpace $ tail $
+ splitByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = do
+ colLines <- many1 (rawTableLine indices)
+ optional blanklines
+ let cols = map unlines $ transpose colLines
+ mapM (parseFromString (many plain)) cols
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Float] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths = zipWith (-) indices (0:indices)
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- Parses a table caption: inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption = try $ do
+ nonindentSpaces
+ string "Table:"
+ result <- many1 inline
+ blanklines
+ return $ normalizeSpaces result
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith headerParser lineParser footerParser = try $ do
+ (rawHeads, aligns, indices) <- headerParser
+ lines <- many1Till (lineParser indices) footerParser
+ caption <- option [] tableCaption
+ heads <- mapM (parseFromString (many plain)) rawHeads
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- Parse a multiline table: starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+ tableSep
+ rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+ initSp <- nonindentSpaces
+ dashes <- many1 (dashedLine '-')
+ newline
+ let (lengths, lines) = unzip dashes
+ let indices = scanl (+) (length initSp) lines
+ let rawHeadsList = transpose $ map
+ (\ln -> tail $ splitByIndices (init indices) ln)
+ rawContent
+ let rawHeads = map (joinWithSep " ") rawHeadsList
+ let aligns = zipWith alignType rawHeadsList lengths
+ return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType [] len = AlignDefault
+alignType strLst len =
+ let str = head $ sortBy (comparing length) $
+ map removeTrailingSpace strLst
+ leftSpace = if null str then False else (str !! 0) `elem` " \t"
+ rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+ in case (leftSpace, rightSpace) of
+ (True, False) -> AlignRight
+ (False, True) -> AlignLeft
+ (True, True) -> AlignCenter
+ (False, False) -> AlignDefault
+
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
+
+--
+-- inline
+--
+
+inline = choice [ str
+ , smartPunctuation
+ , whitespace
+ , endline
+ , code
+ , charRef
+ , strong
+ , emph
+ , note
+ , inlineNote
+ , link
+ , image
+ , math
+ , strikeout
+ , superscript
+ , subscript
+ , autoLink
+ , rawHtmlInline'
+ , rawLaTeXInline'
+ , escapedChar
+ , symbol
+ , ltSign ] <?> "inline"
+
+escapedChar = do
+ char '\\'
+ state <- getState
+ result <- option '\\' $ if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
+ return $ Str [result]
+
+ltSign = do
+ st <- getState
+ if stateStrict st
+ then char '<'
+ else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+ return $ Str ['<']
+
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol = do
+ result <- oneOf specialCharsMinusLt
+ return $ Str [result]
+
+-- parses inline code, between n `s and n `s
+code = try $ do
+ starts <- many1 (char '`')
+ skipSpaces
+ result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (char '\n' >> return " "))
+ (try (skipSpaces >> count (length starts) (char '`') >>
+ notFollowedBy (char '`')))
+ return $ Code $ removeLeadingTrailingSpace $ concat result
+
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+ (try (char '\\') >>~ notFollowedBy (char '$')))
+
+math = try $ do
+ failIfStrict
+ char '$'
+ notFollowedBy space
+ words <- sepBy1 mathWord (many1 space)
+ char '$'
+ return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
+
+emph = ((enclosed (char '*') (char '*') inline) <|>
+ (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
+ return . Emph . normalizeSpaces
+
+strong = ((enclosed (string "**") (try $ string "**") inline) <|>
+ (enclosed (string "__") (try $ string "__") inline)) >>=
+ return . Strong . normalizeSpaces
+
+strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
+ return . Strikeout . normalizeSpaces
+
+superscript = failIfStrict >> enclosed (char '^') (char '^')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Superscript
+
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+ (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+ return . Subscript
+
+smartPunctuation = failUnlessSmart >>
+ choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted
+
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+ return . Quoted DoubleQuote . normalizeSpaces
+
+failIfInQuoteContext context = do
+ st <- getState
+ if stateQuoteContext st == context
+ then fail "already inside quotes"
+ else return ()
+
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ char '\8216' <|>
+ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum))) -- possess/contraction
+ return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+ (char '"' <|> char '\8220') >>
+ notFollowedBy (oneOf " \t\n")
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+
+dash = enDash <|> emDash
+
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+ skipSpaces >> return EmDash
+
+whitespace = do
+ sps <- many1 (oneOf spaceChars)
+ if length sps >= 2
+ then option Space (endline >> return LineBreak)
+ else return Space <?> "whitespace"
+
+nonEndline = satisfy (/='\n')
+
+strChar = noneOf (specialChars ++ spaceChars ++ "\n")
+
+str = many1 strChar >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ st <- getState
+ if stateStrict st
+ then do notFollowedBy emailBlockQuoteStart
+ notFollowedBy (char '#') -- atx header
+ else return ()
+ -- parse potential list-starts differently if in a list:
+ if stateParserContext st == ListItemState
+ then notFollowedBy' (bulletListStart <|>
+ (anyOrderedListStart >> return ()))
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference = notFollowedBy' (string "[^") >> -- footnote reference
+ inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
+
+-- source for a link, with optional title
+source = try $ do
+ char '('
+ optional (char '<')
+ src <- many (noneOf ")> \t\n")
+ optional (char '>')
+ tit <- option "" linkTitle
+ skipSpaces
+ char ')'
+ return (removeTrailingSpace src, tit)
+
+linkTitle = try $ do
+ (many1 spaceChar >> option '\n' newline) <|> newline
+ skipSpaces
+ delim <- char '\'' <|> char '"'
+ tit <- manyTill anyChar (try (char delim >> skipSpaces >>
+ notFollowedBy (noneOf ")\n")))
+ return $ decodeCharacterReferences tit
+
+link = try $ do
+ label <- reference
+ src <- source <|> referenceLink label
+ return $ Link label src
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink label = do
+ ref <- option [] (try (optional (char ' ') >>
+ optional (newline >> skipSpaces) >> reference))
+ let ref' = if null ref then label else ref
+ state <- getState
+ case lookupKeySrc (stateKeys state) ref' of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+
+emailAddress = try $ do
+ name <- many1 (alphaNum <|> char '+')
+ char '@'
+ first <- many1 alphaNum
+ rest <- many1 (char '.' >> many1 alphaNum)
+ return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
+
+uri = try $ do
+ str <- many1 (noneOf "\n\t >")
+ if isURI str
+ then return str
+ else fail "not a URI"
+
+autoLink = try $ do
+ char '<'
+ src <- uri <|> emailAddress
+ char '>'
+ let src' = if "mailto:" `isPrefixOf` src
+ then drop 7 src
+ else src
+ st <- getState
+ return $ if stateStrict st
+ then Link [Str src'] (src, "")
+ else Link [Code src'] (src, "")
+
+image = try $ do
+ char '!'
+ (Link label src) <- link
+ return $ Image label src
+
+note = try $ do
+ failIfStrict
+ ref <- noteMarker
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just contents -> return $ Note contents
+
+inlineNote = try $ do
+ failIfStrict
+ char '^'
+ contents <- inlinesInBalanced "[" "]"
+ return $ Note [Para contents]
+
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
+
+rawHtmlInline' = do
+ st <- getState
+ result <- choice $ if stateStrict st
+ then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ else [htmlBlockElement, anyHtmlInlineTag]
+ return $ HtmlInline result
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
new file mode 100644
index 000000000..1239eb688
--- /dev/null
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -0,0 +1,640 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.RST
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion from reStructuredText to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.RST (
+ readRST
+ ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.ParserCombinators.Parsec
+import Data.List ( findIndex, delete )
+
+-- | Parse reStructuredText string and return Pandoc document.
+readRST :: ParserState -> String -> Pandoc
+readRST state str = (readWith parseRST) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+---
+
+bulletListMarkers = "*+-"
+underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\`|*_<>$:[-"
+
+--
+-- parsing documents
+--
+
+isAnonKey (ref, src) = ref == [Str "_"]
+
+isHeader :: Int -> Block -> Bool
+isHeader n (Header x _) = x == n
+isHeader _ _ = False
+
+-- | Promote all headers in a list of blocks. (Part of
+-- title transformation for RST.)
+promoteHeaders :: Int -> [Block] -> [Block]
+promoteHeaders num ((Header level text):rest) =
+ (Header (level - num) text):(promoteHeaders num rest)
+promoteHeaders num (other:rest) = other:(promoteHeaders num rest)
+promoteHeaders num [] = []
+
+-- | If list of blocks starts with a header (or a header and subheader)
+-- of level that are not found elsewhere, return it as a title and
+-- promote all the other headers.
+titleTransform :: [Block] -- ^ list of blocks
+ -> ([Block], [Inline]) -- ^ modified list of blocks, title
+titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle
+ if (any (isHeader 1) rest) || (any (isHeader 2) rest)
+ then ((Header 1 head1):(Header 2 head2):rest, [])
+ else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2)
+titleTransform ((Header 1 head1):rest) = -- title, no subtitle
+ if (any (isHeader 1) rest)
+ then ((Header 1 head1):rest, [])
+ else ((promoteHeaders 1 rest), head1)
+titleTransform blocks = (blocks, [])
+
+parseRST = do
+ startPos <- getPosition
+ -- go through once just to get list of reference keys
+ -- docMinusKeys is the raw document with blanks where the keys were...
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+ setInput docMinusKeys
+ setPosition startPos
+ st <- getState
+ let reversedKeys = stateKeys st
+ updateState $ \st -> st { stateKeys = reverse reversedKeys }
+ -- now parse it for real...
+ blocks <- parseBlocks
+ let blocks' = filter (/= Null) blocks
+ state <- getState
+ let (blocks'', title) = if stateStandalone state
+ then titleTransform blocks'
+ else (blocks', [])
+ let authors = stateAuthors state
+ let date = stateDate state
+ let title' = if (null title) then (stateTitle state) else title
+ return $ Pandoc (Meta title' authors date) blocks''
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ codeBlock
+ , rawHtmlBlock
+ , rawLaTeXBlock
+ , fieldList
+ , blockQuote
+ , imageBlock
+ , unknownDirective
+ , header
+ , hrule
+ , list
+ , lineBlock
+ , para
+ , plain
+ , nullBlock ] <?> "block"
+
+--
+-- field list
+--
+
+fieldListItem indent = try $ do
+ string indent
+ char ':'
+ name <- many1 alphaNum
+ string ": "
+ skipSpaces
+ first <- manyTill anyChar newline
+ rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
+ indentedBlock
+ return (name, joinWithSep " " (first:(lines rest)))
+
+fieldList = try $ do
+ indent <- lookAhead $ many (oneOf " \t")
+ items <- many1 $ fieldListItem indent
+ blanklines
+ let authors = case lookup "Authors" items of
+ Just auth -> [auth]
+ Nothing -> map snd (filter (\(x,y) -> x == "Author") items)
+ if null authors
+ then return ()
+ else updateState $ \st -> st {stateAuthors = authors}
+ case (lookup "Date" items) of
+ Just dat -> updateState $ \st -> st {stateDate = dat}
+ Nothing -> return ()
+ case (lookup "Title" items) of
+ Just tit -> parseFromString (many inline) tit >>=
+ \t -> updateState $ \st -> st {stateTitle = t}
+ Nothing -> return ()
+ let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") &&
+ (x /= "Date") && (x /= "Title")) items
+ if null remaining
+ then return Null
+ else do terms <- mapM (return . (:[]) . Str . fst) remaining
+ defs <- mapM (parseFromString (many block) . snd)
+ remaining
+ return $ DefinitionList $ zip terms defs
+
+--
+-- line block
+--
+
+lineBlockLine = try $ do
+ string "| "
+ white <- many (oneOf " \t")
+ line <- manyTill inline newline
+ return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak]
+
+lineBlock = try $ do
+ lines <- many1 lineBlockLine
+ blanklines
+ return $ Para (concat lines)
+
+--
+-- paragraph block
+--
+
+para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph"
+
+codeBlockStart = string "::" >> blankline >> blankline
+
+-- paragraph that ends in a :: starting a code block
+paraBeforeCodeBlock = try $ do
+ result <- many1 (notFollowedBy' codeBlockStart >> inline)
+ lookAhead (string "::")
+ return $ Para $ if last result == Space
+ then normalizeSpaces result
+ else (normalizeSpaces result) ++ [Str ":"]
+
+-- regular paragraph
+paraNormal = try $ do
+ result <- many1 inline
+ newline
+ blanklines
+ return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces
+
+--
+-- image block
+--
+
+imageBlock = try $ do
+ string ".. image:: "
+ src <- manyTill anyChar newline
+ fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
+ many1 $ fieldListItem indent
+ optional blanklines
+ case lookup "alt" fields of
+ Just alt -> return $ Plain [Image [Str alt] (src, alt)]
+ Nothing -> return $ Plain [Image [Str "image"] (src, "")]
+--
+-- header blocks
+--
+
+header = doubleHeader <|> singleHeader <?> "header"
+
+-- a header with lines on top and bottom
+doubleHeader = try $ do
+ c <- oneOf underlineChars
+ rest <- many (char c) -- the top line
+ let lenTop = length (c:rest)
+ skipSpaces
+ newline
+ txt <- many1 (notFollowedBy blankline >> inline)
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ if (len > lenTop) then fail "title longer than border" else return ()
+ blankline -- spaces and newline
+ count lenTop (char c) -- the bottom line
+ blanklines
+ -- check to see if we've had this kind of header before.
+ -- if so, get appropriate level. if not, add to list.
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+-- a header with line on the bottom only
+singleHeader = try $ do
+ notFollowedBy' whitespace
+ txt <- many1 (do {notFollowedBy blankline; inline})
+ pos <- getPosition
+ let len = (sourceColumn pos) - 1
+ blankline
+ c <- oneOf underlineChars
+ rest <- count (len - 1) (char c)
+ many (char c)
+ blanklines
+ state <- getState
+ let headerTable = stateHeaderTable state
+ let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of
+ Just ind -> (headerTable, ind + 1)
+ Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1)
+ setState (state { stateHeaderTable = headerTable' })
+ return $ Header level (normalizeSpaces txt)
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+ chr <- oneOf underlineChars
+ count 3 (char chr)
+ skipMany (char chr)
+ blankline
+ blanklines
+ return HorizontalRule
+
+--
+-- code blocks
+--
+
+-- read a line indented by a given string
+indentedLine indents = try $ do
+ string indents
+ result <- manyTill anyChar newline
+ return $ result ++ "\n"
+
+-- two or more indented lines, possibly separated by blank lines.
+-- any amount of indentation will work.
+indentedBlock = do
+ indents <- lookAhead $ many1 (oneOf " \t")
+ lns <- many $ choice $ [ indentedLine indents,
+ try $ do b <- blanklines
+ l <- indentedLine indents
+ return (b ++ l) ]
+ optional blanklines
+ return $ concat lns
+
+codeBlock = try $ do
+ codeBlockStart
+ result <- indentedBlock
+ return $ CodeBlock $ stripTrailingNewlines result
+
+--
+-- raw html
+--
+
+rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
+ indentedBlock >>= return . RawHtml
+
+--
+-- raw latex
+--
+
+rawLaTeXBlock = try $ do
+ string ".. raw:: latex"
+ blanklines
+ result <- indentedBlock
+ return $ Para [(TeX result)]
+
+--
+-- block quotes
+--
+
+blockQuote = do
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return $ BlockQuote contents
+
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+definitionListItem = try $ do
+ term <- many1Till inline endline
+ raw <- indentedBlock
+ -- parse the extracted block, which may contain various block elements:
+ contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ return (normalizeSpaces term, contents)
+
+definitionList = many1 definitionListItem >>= return . DefinitionList
+
+-- parses bullet list start and returns its length (inc. following whitespace)
+bulletListStart = try $ do
+ notFollowedBy' hrule -- because hrules start out just like lists
+ marker <- oneOf bulletListMarkers
+ white <- many1 spaceChar
+ return $ length (marker:white)
+
+-- parses ordered list start and returns its length (inc following whitespace)
+orderedListStart style delim = try $ do
+ (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
+ white <- many1 spaceChar
+ return $ markerLen + length white
+
+-- parse a line of a list item
+listLine markerLength = try $ do
+ notFollowedBy blankline
+ indentWith markerLength
+ line <- manyTill anyChar newline
+ return $ line ++ "\n"
+
+-- indent by specified number of spaces (or equiv. tabs)
+indentWith num = do
+ state <- getState
+ let tabStop = stateTabStop state
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' ')),
+ (try (char '\t' >> count (num - tabStop) (char ' '))) ]
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = do
+ markerLength <- start
+ firstLine <- manyTill anyChar newline
+ restLines <- many (listLine markerLength)
+ return (markerLength, (firstLine ++ "\n" ++ (concat restLines)))
+
+-- continuation of a list item - indented and separated by blankline or
+-- (in compact lists) endline.
+-- Note: nested lists are parsed as continuations.
+listContinuation markerLength = try $ do
+ blanks <- many1 blankline
+ result <- many1 (listLine markerLength)
+ return $ blanks ++ concat result
+
+listItem start = try $ do
+ (markerLength, first) <- rawListItem start
+ rest <- many (listContinuation markerLength)
+ blanks <- choice [ try (many blankline >>~ lookAhead start),
+ many1 blankline ] -- whole list must end with blank.
+ -- parsing with ListItemState forces markers at beginning of lines to
+ -- count as list item markers, even if not separated by blank space.
+ -- see definition of "endline"
+ state <- getState
+ let oldContext = stateParserContext state
+ setState $ state {stateParserContext = ListItemState}
+ -- parse the extracted block, which may itself contain block elements
+ parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks
+ updateState (\st -> st {stateParserContext = oldContext})
+ return parsed
+
+orderedList = do
+ (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar)
+ items <- many1 (listItem (orderedListStart style delim))
+ let items' = compactify items
+ return $ OrderedList (start, style, delim) items'
+
+bulletList = many1 (listItem bulletListStart) >>=
+ return . BulletList . compactify
+
+--
+-- unknown directive (e.g. comment)
+--
+
+unknownDirective = try $ do
+ string ".. "
+ manyTill anyChar newline
+ many (string " :" >> many1 (noneOf "\n:") >> char ':' >>
+ many1 (noneOf "\n") >> newline)
+ optional blanklines
+ return Null
+
+--
+-- reference key
+--
+
+referenceKey = do
+ startPos <- getPosition
+ key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey]
+ st <- getState
+ let oldkeys = stateKeys st
+ updateState $ \st -> st { stateKeys = key : oldkeys }
+ optional blanklines
+ endPos <- getPosition
+ -- return enough blanks to replace key
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+targetURI = do
+ skipSpaces
+ optional newline
+ contents <- many1 (try (many spaceChar >> newline >>
+ many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n")
+ blanklines
+ return contents
+
+imageKey = try $ do
+ string ".. |"
+ ref <- manyTill inline (char '|')
+ skipSpaces
+ string "image::"
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+anonymousKey = try $ do
+ oneOfStrings [".. __:", "__"]
+ src <- targetURI
+ state <- getState
+ return ([Str "_"], (removeLeadingTrailingSpace src, ""))
+
+regularKeyQuoted = try $ do
+ string ".. _`"
+ ref <- manyTill inline (char '`')
+ char ':'
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+regularKey = try $ do
+ string ".. _"
+ ref <- manyTill inline (char ':')
+ src <- targetURI
+ return (normalizeSpaces ref, (removeLeadingTrailingSpace src, ""))
+
+ --
+ -- inline
+ --
+
+inline = choice [ link
+ , str
+ , whitespace
+ , endline
+ , strong
+ , emph
+ , code
+ , image
+ , hyphens
+ , superscript
+ , subscript
+ , escapedChar
+ , symbol ] <?> "inline"
+
+hyphens = do
+ result <- many1 (char '-')
+ option Space endline
+ -- don't want to treat endline after hyphen or dash as a space
+ return $ Str result
+
+escapedChar = escaped anyChar
+
+symbol = do
+ result <- oneOf specialChars
+ return $ Str [result]
+
+-- parses inline code, between codeStart and codeEnd
+code = try $ do
+ string "``"
+ result <- manyTill anyChar (try (string "``"))
+ return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result
+
+emph = enclosed (char '*') (char '*') inline >>=
+ return . Emph . normalizeSpaces
+
+strong = enclosed (string "**") (try $ string "**") inline >>=
+ return . Strong . normalizeSpaces
+
+interpreted role = try $ do
+ optional $ try $ string "\\ "
+ result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
+ nextChar <- lookAhead anyChar
+ try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
+ return [Str result]
+
+superscript = interpreted "sup" >>= (return . Superscript)
+
+subscript = interpreted "sub" >>= (return . Subscript)
+
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+str = notFollowedBy' oneWordReference >>
+ many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+ newline
+ notFollowedBy blankline
+ -- parse potential list-starts at beginning of line differently in a list:
+ st <- getState
+ if (stateParserContext st) == ListItemState
+ then notFollowedBy (anyOrderedListMarker >> spaceChar) >>
+ notFollowedBy' bulletListStart
+ else return ()
+ return Space
+
+--
+-- links
+--
+
+link = choice [explicitLink, referenceLink, autoLink] <?> "link"
+
+explicitLink = try $ do
+ char '`'
+ notFollowedBy (char '`') -- `` is marks start of inline code
+ label <- manyTill inline (try (do {spaces; char '<'}))
+ src <- manyTill (noneOf ">\n ") (char '>')
+ skipSpaces
+ string "`_"
+ return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "")
+
+reference = try $ do
+ char '`'
+ notFollowedBy (char '`')
+ label <- many1Till inline (char '`')
+ char '_'
+ return label
+
+oneWordReference = do
+ raw <- many1 alphaNum
+ char '_'
+ notFollowedBy alphaNum -- because this_is_not a link
+ return [Str raw]
+
+referenceLink = try $ do
+ label <- reference <|> oneWordReference
+ key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable key of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ -- if anonymous link, remove first anon key so it won't be used again
+ let keyTable' = if (key == [Str "_"]) -- anonymous link?
+ then delete ([Str "_"], src) keyTable -- remove first anon key
+ else keyTable
+ setState $ state { stateKeys = keyTable' }
+ return $ Link (normalizeSpaces label) src
+
+uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://",
+ "mailto:", "news:", "telnet:" ]
+
+uri = try $ do
+ scheme <- uriScheme
+ identifier <- many1 (noneOf " \t\n")
+ return $ scheme ++ identifier
+
+autoURI = do
+ src <- uri
+ return $ Link [Str src] (src, "")
+
+emailChar = alphaNum <|> oneOf "-+_."
+
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return $ addr ++ '@':dom
+
+domainChar = alphaNum <|> char '-'
+
+domain = do
+ first <- many1 domainChar
+ dom <- many1 (try (do{ char '.'; many1 domainChar }))
+ return $ joinWithSep "." (first:dom)
+
+autoEmail = do
+ src <- emailAddress
+ return $ Link [Str src] ("mailto:" ++ src, "")
+
+autoLink = autoURI <|> autoEmail
+
+-- For now, we assume that all substitution references are for images.
+image = try $ do
+ char '|'
+ ref <- manyTill inline (char '|')
+ state <- getState
+ let keyTable = stateKeys state
+ src <- case lookupKeySrc keyTable ref of
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
+ return $ Image (normalizeSpaces ref) src
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..f27c3ae75
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,792 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Shared
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
+ splitByIndices,
+ substitute,
+ joinWithSep,
+ -- * Text processing
+ backslashEscapes,
+ escapeStringUsing,
+ stripTrailingNewlines,
+ removeLeadingTrailingSpace,
+ removeLeadingSpace,
+ removeTrailingSpace,
+ stripFirstAndLast,
+ camelCaseToHyphenated,
+ toRomanNumeral,
+ wrapped,
+ wrapIfNeeded,
+ -- * Parsing
+ (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ lookupKeySrc,
+ refsMatch,
+ -- * Native format prettyprinting
+ prettyPandoc,
+ -- * Pandoc block and inline list processing
+ orderedListMarkers,
+ normalizeSpaces,
+ compactify,
+ Element (..),
+ hierarchicalize,
+ isHeaderBlock,
+ -- * Writer options
+ WriterOptions (..),
+ defaultWriterOptions
+ ) where
+
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Parsec
+import Text.PrettyPrint.HughesPJ ( Doc, fsep )
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
+import Data.List ( find, isPrefixOf )
+import Control.Monad ( join )
+
+--
+-- List processing
+--
+
+-- | Split list by groups of one or more sep.
+splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy sep lst =
+ let (first, rest) = break (== sep) lst
+ rest' = dropWhile (== sep) rest
+ in first:(splitBy sep rest')
+
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
+
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ lst = lst
+substitute target replacement lst =
+ if target `isPrefixOf` lst
+ then replacement ++ (substitute target replacement $ drop (length target) lst)
+ else (head lst):(substitute target replacement $ tail lst)
+
+-- | Joins a list of lists, separated by another list.
+joinWithSep :: [a] -- ^ List to use as separator
+ -> [[a]] -- ^ Lists to join
+ -> [a]
+joinWithSep _ [] = []
+joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
+
+--
+-- Text processing
+--
+
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing _ [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
+
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (`elem` " \n\t")
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x =
+ if x >= 4000 || x < 0
+ then "?"
+ else case x of
+ _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
+ _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
+ _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
+ _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
+ _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
+ _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
+ _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
+ _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
+ _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
+ _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
+ _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
+ _ -> ""
+
+-- | Wrap inlines to line length.
+wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
+wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
+ return . fsep
+
+wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
+ [Inline] -> m Doc
+wrapIfNeeded opts = if writerWrapText opts
+ then wrapped
+ else ($)
+
+--
+-- Parsing
+--
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let charAnyCase c = char (if upperCase then toUpper c else c)
+ let one = charAnyCase 'i'
+ let five = charAnyCase 'v'
+ let ten = charAnyCase 'x'
+ let fifty = charAnyCase 'l'
+ let hundred = charAnyCase 'c'
+ let fivehundred = charAnyCase 'd'
+ let thousand = charAnyCase 'm'
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char st ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
+orderedListMarker style delim = do
+ let num = case style of
+ DefaultStyle -> decimal <|> defaultNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateKeys = [],
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateColumns = 80,
+ stateHeaderTable = [] }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, [Block])]
+
+type KeyTable = [([Inline], Target)]
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case find (refsMatch key . fst) table of
+ Nothing -> Nothing
+ Just (_, src) -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
+--
+-- Native format prettyprinting
+--
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
+ indentBy 2 0 ("[" ++ (joinWithSep ",\n"
+ (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
+ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (joinWithSep ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+prettyPandoc :: Pandoc -> String
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
+--
+-- Pandoc block and inline list processing
+--
+
+-- | Generate infinite lazy list of markers for an ordered list,
+-- depending on list attributes.
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers (start, numstyle, numdelim) =
+ let singleton c = [c]
+ nums = case numstyle of
+ DefaultStyle -> map show [start..]
+ Decimal -> map show [start..]
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
+ UpperRoman -> map toRomanNumeral [start..]
+ LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ inDelim str = case numdelim of
+ DefaultDelim -> str ++ "."
+ Period -> str ++ "."
+ OneParen -> str ++ ")"
+ TwoParens -> "(" ++ str ++ ")"
+ in map inDelim nums
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, collapse double @Space@s into singles, and
+-- remove empty Str elements.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces [] = []
+normalizeSpaces list =
+ let removeDoubles [] = []
+ removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
+ removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
+ removeDoubles ((Str ""):rest) = removeDoubles rest
+ removeDoubles (x:rest) = x:(removeDoubles rest)
+ removeLeading (Space:xs) = removeLeading xs
+ removeLeading x = x
+ removeTrailing [] = []
+ removeTrailing lst = if (last lst == Space)
+ then init lst
+ else lst
+ in removeLeading $ removeTrailing $ removeDoubles list
+
+-- | Change final list item from @Para@ to @Plain@ if the list should
+-- be compact.
+compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
+ -> [[Block]]
+compactify [] = []
+compactify items =
+ let final = last items
+ others = init items
+ in case final of
+ [Para a] -> if any containsPara others
+ then items
+ else others ++ [[Plain a]]
+ _ -> items
+
+containsPara :: [Block] -> Bool
+containsPara [] = False
+containsPara ((Para _):_) = True
+containsPara ((BulletList items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((OrderedList _ items):rest) = any containsPara items ||
+ containsPara rest
+containsPara ((DefinitionList items):rest) = any containsPara (map snd items) ||
+ containsPara rest
+containsPara (_:rest) = containsPara rest
+
+-- | Data structure for defining hierarchical Pandoc documents
+data Element = Blk Block
+ | Sec [Inline] [Element] deriving (Eq, Read, Show)
+
+-- | Returns @True@ on Header block with at least the specified level
+headerAtLeast :: Int -> Block -> Bool
+headerAtLeast level (Header x _) = x <= level
+headerAtLeast _ _ = False
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
+hierarchicalize :: [Block] -> [Element]
+hierarchicalize [] = []
+hierarchicalize (block:rest) =
+ case block of
+ (Header level title) ->
+ let (thisSection, rest') = break (headerAtLeast level) rest
+ in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ x -> (Blk x):(hierarchicalize rest)
+
+-- | True if block is a Header block.
+isHeaderBlock :: Block -> Bool
+isHeaderBlock (Header _ _) = True
+isHeaderBlock _ = False
+
+--
+-- Writer options
+--
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML
+ , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ } deriving Show
+
+-- | Default writer options.
+defaultWriterOptions :: WriterOptions
+defaultWriterOptions =
+ WriterOptions { writerStandalone = False
+ , writerHeader = ""
+ , writerTitlePrefix = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerS5 = False
+ , writerUseASCIIMathML = False
+ , writerASCIIMathMLURL = Nothing
+ , writerIgnoreNotes = False
+ , writerIncremental = False
+ , writerNumberSections = False
+ , writerIncludeBefore = ""
+ , writerIncludeAfter = ""
+ , writerStrictMarkdown = False
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ }
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..16bdb9218
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,45 @@
+-- | Functions for converting Unicode strings to UTF-8 and vice versa.
+--
+-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
+-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
+-- Science University.
+--
+-- Modified by Martin Norbaeck
+-- to pass illegal UTF-8 sequences through unchanged.
+module Text.Pandoc.UTF8 (
+ fromUTF8,
+ toUTF8
+ ) where
+
+-- From the Char module supplied with HBC.
+
+-- | Take a UTF-8 string and decode it into a Unicode string.
+fromUTF8 :: String -> String
+fromUTF8 "" = ""
+fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
+fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
+ '\x80' <= c' && c' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
+fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
+ '\x80' <= c' && c' <= '\xbf' &&
+ '\x80' <= c'' && c'' <= '\xbf' =
+ toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
+fromUTF8 (c:cs) = c : fromUTF8 cs
+
+-- | Take a Unicode string and encode it as a UTF-8 string.
+toUTF8 :: String -> String
+toUTF8 "" = ""
+toUTF8 (c:cs) =
+ if c > '\x0000' && c < '\x0080' then
+ c : toUTF8 cs
+ else if c < toEnum 0x0800 then
+ let i = fromEnum c
+ in toEnum (0xc0 + i `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ toUTF8 cs
+ else
+ let i = fromEnum c
+ in toEnum (0xe0 + i `div` 0x1000) :
+ toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
+ toEnum (0x80 + i `mod` 0x40) :
+ toUTF8 cs
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
new file mode 100644
index 000000000..13912a9f3
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -0,0 +1,248 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ConTeXt
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into ConTeXt.
+-}
+module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( (\\), intersperse )
+import Control.Monad.State
+
+type WriterState = Int -- number of next URL reference
+
+-- | Convert Pandoc to ConTeXt.
+writeConTeXt :: WriterOptions -> Pandoc -> String
+writeConTeXt options document = evalState (pandocToConTeXt options document) 1
+
+pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
+pandocToConTeXt options (Pandoc meta blocks) = do
+ main <- blockListToConTeXt blocks
+ let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options
+ head <- if writerStandalone options
+ then contextHeader options meta
+ else return ""
+ let toc = if writerTableOfContents options
+ then "\\placecontent\n\n"
+ else ""
+ let foot = if writerStandalone options
+ then "\n\\stoptext\n"
+ else ""
+ return $ head ++ toc ++ body ++ foot
+
+-- | Insert bibliographic information into ConTeXt header.
+contextHeader :: WriterOptions -- ^ Options, including ConTeXt header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState String
+contextHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return ""
+ else inlineListToConTeXt title
+ let authorstext = if null authors
+ then ""
+ else if length authors == 1
+ then stringToConTeXt $ head authors
+ else stringToConTeXt $ (joinWithSep ", " $
+ init authors) ++ " & " ++ last authors
+ let datetext = if date == ""
+ then ""
+ else stringToConTeXt date
+ let titleblock = "\\doctitle{" ++ titletext ++ "}\n\
+ \ \\author{" ++ authorstext ++ "}\n\
+ \ \\date{" ++ datetext ++ "}\n\n"
+ let setupheads = if (writerNumberSections options)
+ then "\\setupheads[sectionnumber=yes, style=\\bf]\n"
+ else "\\setupheads[sectionnumber=no, style=\\bf]\n"
+ let header = writerHeader options
+ return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n"
+
+-- escape things as needed for ConTeXt
+
+escapeCharForConTeXt :: Char -> String
+escapeCharForConTeXt ch =
+ case ch of
+ '{' -> "\\letteropenbrace{}"
+ '}' -> "\\letterclosebrace{}"
+ '\\' -> "\\letterbackslash{}"
+ '$' -> "\\$"
+ '|' -> "\\letterbar{}"
+ '^' -> "\\letterhat{}"
+ '%' -> "\\%"
+ '~' -> "\\lettertilde{}"
+ '&' -> "\\&"
+ '#' -> "\\#"
+ '<' -> "\\letterless{}"
+ '>' -> "\\lettermore{}"
+ '_' -> "\\letterunderscore{}"
+ x -> [x]
+
+-- | Escape string for ConTeXt
+stringToConTeXt :: String -> String
+stringToConTeXt = concatMap escapeCharForConTeXt
+
+-- | Convert Pandoc block element to ConTeXt.
+blockToConTeXt :: Block -> State WriterState String
+blockToConTeXt Null = return ""
+blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n")
+blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n")
+blockToConTeXt (BlockQuote lst) = do
+ contents <- blockListToConTeXt lst
+ return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n"
+blockToConTeXt (CodeBlock str) =
+ return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n"
+blockToConTeXt (RawHtml str) = return ""
+blockToConTeXt (BulletList lst) = do
+ contents <- mapM listItemToConTeXt lst
+ return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n"
+blockToConTeXt (OrderedList attribs lst) = case attribs of
+ (1, DefaultStyle, DefaultDelim) -> do
+ contents <- mapM listItemToConTeXt lst
+ return $ "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n"
+ _ -> do
+ let markers = take (length lst) $ orderedListMarkers attribs
+ contents <- zipWithM orderedListItemToConTeXt markers lst
+ let markerWidth = maximum $ map length markers
+ let markerWidth' = if markerWidth < 3
+ then ""
+ else "[width=" ++
+ show ((markerWidth + 2) `div` 2) ++ "em]"
+ return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++
+ "\\stopitemize\n"
+blockToConTeXt (DefinitionList lst) =
+ mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat
+blockToConTeXt HorizontalRule = return "\\thinrule\n\n"
+blockToConTeXt (Header level lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ if level > 0 && level <= 3
+ then "\\" ++ concat (replicate (level - 1) "sub") ++
+ "section{" ++ contents ++ "}\n\n"
+ else contents ++ "\n\n"
+blockToConTeXt (Table caption aligns widths heads rows) = do
+ let colWidths = map printDecimal widths
+ let colDescriptor colWidth alignment = (case alignment of
+ AlignLeft -> 'l'
+ AlignRight -> 'r'
+ AlignCenter -> 'c'
+ AlignDefault -> 'l'):
+ "p(" ++ colWidth ++ "\\textwidth)|"
+ let colDescriptors = "|" ++ (concat $
+ zipWith colDescriptor colWidths aligns)
+ headers <- tableRowToConTeXt heads
+ captionText <- inlineListToConTeXt caption
+ let captionText' = if null caption then "none" else captionText
+ rows' <- mapM tableRowToConTeXt rows
+ return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++
+ colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++
+ concat rows' ++ "\\HL\n\\stoptable\n\n"
+
+printDecimal :: Float -> String
+printDecimal = printf "%.2f"
+
+tableRowToConTeXt cols = do
+ cols' <- mapM blockListToConTeXt cols
+ return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n"
+
+listItemToConTeXt list = do
+ contents <- blockListToConTeXt list
+ return $ "\\item " ++ contents
+
+orderedListItemToConTeXt marker list = do
+ contents <- blockListToConTeXt list
+ return $ "\\sym{" ++ marker ++ "} " ++ contents
+
+defListItemToConTeXt (term, def) = do
+ term' <- inlineListToConTeXt term
+ def' <- blockListToConTeXt def
+ return $ "\\startdescr{" ++ term' ++ "}\n" ++
+ def' ++ "\n\\stopdescr\n"
+
+-- | Convert list of block elements to ConTeXt.
+blockListToConTeXt :: [Block] -> State WriterState String
+blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat
+
+-- | Convert list of inline elements to ConTeXt.
+inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
+ -> State WriterState String
+inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted Apostrophe = True
+isQuoted _ = False
+
+-- | Convert inline element to ConTeXt
+inlineToConTeXt :: Inline -- ^ Inline to convert
+ -> State WriterState String
+inlineToConTeXt (Emph lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "{\\em " ++ contents ++ "}"
+inlineToConTeXt (Strong lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "{\\bf " ++ contents ++ "}"
+inlineToConTeXt (Strikeout lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\overstrikes{" ++ contents ++ "}"
+inlineToConTeXt (Superscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\high{" ++ contents ++ "}"
+inlineToConTeXt (Subscript lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\low{" ++ contents ++ "}"
+inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}"
+inlineToConTeXt (Quoted SingleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quote{" ++ contents ++ "}"
+inlineToConTeXt (Quoted DoubleQuote lst) = do
+ contents <- inlineListToConTeXt lst
+ return $ "\\quotation{" ++ contents ++ "}"
+inlineToConTeXt Apostrophe = return "'"
+inlineToConTeXt EmDash = return "---"
+inlineToConTeXt EnDash = return "--"
+inlineToConTeXt Ellipses = return "\\ldots{}"
+inlineToConTeXt (Str str) = return $ stringToConTeXt str
+inlineToConTeXt (TeX str) = return str
+inlineToConTeXt (HtmlInline str) = return ""
+inlineToConTeXt (LineBreak) = return "\\crlf\n"
+inlineToConTeXt Space = return " "
+inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own
+ inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
+inlineToConTeXt (Link text (src, _)) = do
+ next <- get
+ put (next + 1)
+ let ref = show next
+ label <- inlineListToConTeXt text
+ return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++
+ "]\\from[" ++ ref ++ "]"
+inlineToConTeXt (Image alternate (src, tit)) = do
+ alt <- inlineListToConTeXt alternate
+ return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++
+ tit ++ "}\n{\\externalfigure[" ++ src ++ "]}"
+inlineToConTeXt (Note contents) = do
+ contents' <- blockListToConTeXt contents
+ return $ "\\footnote{" ++ contents' ++ "}"
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
new file mode 100644
index 000000000..13dc8585d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -0,0 +1,299 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docbook
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Docbook XML.
+-}
+module Text.Pandoc.Writers.Docbook ( writeDocbook) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Data.List ( isPrefixOf, drop )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+--
+-- code to format XML
+--
+
+-- | Escape one character as needed for XML.
+escapeCharForXML :: Char -> String
+escapeCharForXML x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '\160' -> "&nbsp;"
+ c -> [c]
+
+-- | True if the character needs to be escaped.
+needsEscaping :: Char -> Bool
+needsEscaping c = c `elem` "&<>\"\160"
+
+-- | Escape string as needed for XML. Entity references are not preserved.
+escapeStringForXML :: String -> String
+escapeStringForXML "" = ""
+escapeStringForXML str =
+ case break needsEscaping str of
+ (okay, "") -> okay
+ (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
+
+-- | Return a text object with a string of formatted XML attributes.
+attributeList :: [(String, String)] -> Doc
+attributeList = text . concatMap
+ (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
+ escapeStringForXML b ++ "\"")
+
+-- | Put the supplied contents between start and end tags of tagType,
+-- with specified attributes and (if specified) indentation.
+inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc
+inTags isIndented tagType attribs contents =
+ let openTag = char '<' <> text tagType <> attributeList attribs <>
+ char '>'
+ closeTag = text "</" <> text tagType <> char '>'
+ in if isIndented
+ then openTag $$ nest 2 contents $$ closeTag
+ else openTag <> contents <> closeTag
+
+-- | Return a self-closing tag of tagType with specified attributes
+selfClosingTag :: String -> [(String, String)] -> Doc
+selfClosingTag tagType attribs =
+ char '<' <> text tagType <> attributeList attribs <> text " />"
+
+-- | Put the supplied contents between start and end tags of tagType.
+inTagsSimple :: String -> Doc -> Doc
+inTagsSimple tagType = inTags False tagType []
+
+-- | Put the supplied contents in indented block btw start and end tags.
+inTagsIndented :: String -> Doc -> Doc
+inTagsIndented tagType = inTags True tagType []
+
+--
+-- Docbook writer
+--
+
+-- | Convert list of authors to a docbook <author> section
+authorToDocbook :: [Char] -> Doc
+authorToDocbook name = inTagsIndented "author" $
+ if ',' `elem` name
+ then -- last name first
+ let (lastname, rest) = break (==',') name
+ firstname = removeLeadingSpace rest in
+ inTagsSimple "firstname" (text $ escapeStringForXML firstname) <>
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+ else -- last name last
+ let namewords = words name
+ lengthname = length namewords
+ (firstname, lastname) = case lengthname of
+ 0 -> ("","")
+ 1 -> ("", name)
+ n -> (joinWithSep " " (take (n-1) namewords), last namewords)
+ in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$
+ inTagsSimple "surname" (text $ escapeStringForXML lastname)
+
+-- | Convert Pandoc document to string in Docbook format.
+writeDocbook :: WriterOptions -> Pandoc -> String
+writeDocbook opts (Pandoc (Meta title authors date) blocks) =
+ let head = if writerStandalone opts
+ then text (writerHeader opts)
+ else empty
+ meta = if writerStandalone opts
+ then inTagsIndented "articleinfo" $
+ (inTagsSimple "title" (wrap opts title)) $$
+ (vcat (map authorToDocbook authors)) $$
+ (inTagsSimple "date" (text $ escapeStringForXML date))
+ else empty
+ elements = hierarchicalize blocks
+ before = writerIncludeBefore opts
+ after = writerIncludeAfter opts
+ body = (if null before then empty else text before) $$
+ vcat (map (elementToDocbook opts) elements) $$
+ (if null after then empty else text after)
+ body' = if writerStandalone opts
+ then inTagsIndented "article" (meta $$ body)
+ else body
+ in render $ head $$ body' $$ text ""
+
+-- | Convert an Element to Docbook.
+elementToDocbook :: WriterOptions -> Element -> Doc
+elementToDocbook opts (Blk block) = blockToDocbook opts block
+elementToDocbook opts (Sec title elements) =
+ -- Docbook doesn't allow sections with no content, so insert some if needed
+ let elements' = if null elements
+ then [Blk (Para [])]
+ else elements
+ in inTagsIndented "section" $
+ inTagsSimple "title" (wrap opts title) $$
+ vcat (map (elementToDocbook opts) elements')
+
+-- | Convert a list of Pandoc blocks to Docbook.
+blocksToDocbook :: WriterOptions -> [Block] -> Doc
+blocksToDocbook opts = vcat . map (blockToDocbook opts)
+
+-- | Auxiliary function to convert Plain block to Para.
+plainToPara (Plain x) = Para x
+plainToPara x = x
+
+-- | Convert a list of pairs of terms and definitions into a list of
+-- Docbook varlistentrys.
+deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc
+deflistItemsToDocbook opts items =
+ vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items
+
+-- | Convert a term and a list of blocks into a Docbook varlistentry.
+deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc
+deflistItemToDocbook opts term def =
+ let def' = map plainToPara def
+ in inTagsIndented "varlistentry" $
+ inTagsIndented "term" (inlinesToDocbook opts term) $$
+ inTagsIndented "listitem" (blocksToDocbook opts def')
+
+-- | Convert a list of lists of blocks to a list of Docbook list items.
+listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc
+listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items
+
+-- | Convert a list of blocks into a Docbook list item.
+listItemToDocbook :: WriterOptions -> [Block] -> Doc
+listItemToDocbook opts item =
+ inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item
+
+-- | Convert a Pandoc block element to Docbook.
+blockToDocbook :: WriterOptions -> Block -> Doc
+blockToDocbook opts Null = empty
+blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
+blockToDocbook opts (BlockQuote blocks) =
+ inTagsIndented "blockquote" $ blocksToDocbook opts blocks
+blockToDocbook opts (CodeBlock str) =
+ text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>"
+blockToDocbook opts (BulletList lst) =
+ inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
+blockToDocbook opts (OrderedList _ []) = empty
+blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) =
+ let attribs = case numstyle of
+ DefaultStyle -> []
+ Decimal -> [("numeration", "arabic")]
+ UpperAlpha -> [("numeration", "upperalpha")]
+ LowerAlpha -> [("numeration", "loweralpha")]
+ UpperRoman -> [("numeration", "upperroman")]
+ LowerRoman -> [("numeration", "lowerroman")]
+ items = if start == 1
+ then listItemsToDocbook opts (first:rest)
+ else (inTags True "listitem" [("override",show start)]
+ (blocksToDocbook opts $ map plainToPara first)) $$
+ listItemsToDocbook opts rest
+ in inTags True "orderedlist" attribs items
+blockToDocbook opts (DefinitionList lst) =
+ inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
+blockToDocbook opts (RawHtml str) = text str -- raw XML block
+blockToDocbook opts HorizontalRule = empty -- not semantic
+blockToDocbook opts (Table caption aligns widths headers rows) =
+ let alignStrings = map alignmentToString aligns
+ captionDoc = if null caption
+ then empty
+ else inTagsIndented "caption"
+ (inlinesToDocbook opts caption)
+ tableType = if isEmpty captionDoc then "informaltable" else "table"
+ in inTagsIndented tableType $ captionDoc $$
+ (colHeadsToDocbook opts alignStrings widths headers) $$
+ (vcat $ map (tableRowToDocbook opts alignStrings) rows)
+
+colHeadsToDocbook opts alignStrings widths headers =
+ let heads = zipWith3 (\align width item ->
+ tableItemToDocbook opts "th" align width item)
+ alignStrings widths headers
+ in inTagsIndented "tr" $ vcat heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToDocbook opts aligns cols = inTagsIndented "tr" $
+ vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols
+
+tableItemToDocbook opts tag align width item =
+ let attrib = [("align", align)] ++
+ if width /= 0
+ then [("style", "{width: " ++
+ show (truncate (100*width)) ++ "%;}")]
+ else []
+ in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
+
+-- | Take list of inline elements and return wrapped doc.
+wrap :: WriterOptions -> [Inline] -> Doc
+wrap opts lst = if writerWrapText opts
+ then fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
+ else inlinesToDocbook opts lst
+
+-- | Convert a list of inline elements to Docbook.
+inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
+inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
+
+-- | Convert an inline element to Docbook.
+inlineToDocbook :: WriterOptions -> Inline -> Doc
+inlineToDocbook opts (Str str) = text $ escapeStringForXML str
+inlineToDocbook opts (Emph lst) =
+ inTagsSimple "emphasis" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strong lst) =
+ inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst
+inlineToDocbook opts (Strikeout lst) =
+ inTags False "emphasis" [("role", "strikethrough")] $
+ inlinesToDocbook opts lst
+inlineToDocbook opts (Superscript lst) =
+ inTagsSimple "superscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Subscript lst) =
+ inTagsSimple "subscript" $ inlinesToDocbook opts lst
+inlineToDocbook opts (Quoted _ lst) =
+ inTagsSimple "quote" $ inlinesToDocbook opts lst
+inlineToDocbook opts Apostrophe = char '\''
+inlineToDocbook opts Ellipses = text "&#8230;"
+inlineToDocbook opts EmDash = text "&#8212;"
+inlineToDocbook opts EnDash = text "&#8211;"
+inlineToDocbook opts (Code str) =
+ inTagsSimple "literal" $ text (escapeStringForXML str)
+inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
+inlineToDocbook opts (HtmlInline str) = empty
+inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>"
+inlineToDocbook opts Space = char ' '
+inlineToDocbook opts (Link txt (src, tit)) =
+ if isPrefixOf "mailto:" src
+ then let src' = drop 7 src
+ emailLink = inTagsSimple "email" $ text $
+ escapeStringForXML $ src'
+ in if txt == [Code src']
+ then emailLink
+ else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
+ char ')'
+ else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
+inlineToDocbook opts (Image alt (src, tit)) =
+ let titleDoc = if null tit
+ then empty
+ else inTagsIndented "objectinfo" $
+ inTagsIndented "title" (text $ escapeStringForXML tit)
+ in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+inlineToDocbook opts (Note contents) =
+ inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
new file mode 100644
index 000000000..7ec95d8ef
--- /dev/null
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -0,0 +1,458 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.HTML
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to HTML.
+-}
+module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.ASCIIMathML
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.Shared
+import Text.Regex ( mkRegex, matchRegex )
+import Numeric ( showHex )
+import Data.Char ( ord, toLower )
+import Data.List ( isPrefixOf, intersperse )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.XHtml.Transitional
+
+data WriterState = WriterState
+ { stNotes :: [Html] -- ^ List of notes
+ , stIds :: [String] -- ^ List of header identifiers
+ , stMath :: Bool -- ^ Math is used in document
+ , stCSS :: S.Set String -- ^ CSS to include in header
+ } deriving Show
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState {stNotes= [], stIds = [],
+ stMath = False, stCSS = S.empty}
+
+-- Helpers to render HTML with the appropriate function.
+render opts = if writerWrapText opts then renderHtml else showHtml
+renderFragment opts = if writerWrapText opts
+ then renderHtmlFragment
+ else showHtmlFragment
+
+-- | Convert Pandoc document to Html string.
+writeHtmlString :: WriterOptions -> Pandoc -> String
+writeHtmlString opts =
+ if writerStandalone opts
+ then render opts . writeHtml opts
+ else renderFragment opts . writeHtml opts
+
+-- | Convert Pandoc document to Html structure.
+writeHtml :: WriterOptions -> Pandoc -> Html
+writeHtml opts (Pandoc (Meta tit authors date) blocks) =
+ let titlePrefix = writerTitlePrefix opts
+ topTitle = evalState (inlineListToHtml opts tit) defaultWriterState
+ topTitle' = if null titlePrefix
+ then topTitle
+ else titlePrefix +++ " - " +++ topTitle
+ metadata = thetitle topTitle' +++
+ meta ! [httpequiv "Content-Type",
+ content "text/html; charset=UTF-8"] +++
+ meta ! [name "generator", content "pandoc"] +++
+ (toHtmlFromList $
+ map (\a -> meta ! [name "author", content a]) authors) +++
+ (if null date
+ then noHtml
+ else meta ! [name "date", content date])
+ titleHeader = if writerStandalone opts && not (null tit) &&
+ not (writerS5 opts)
+ then h1 ! [theclass "title"] $ topTitle
+ else noHtml
+ headerBlocks = filter isHeaderBlock blocks
+ ids = uniqueIdentifiers $
+ map (\(Header _ lst) -> lst) headerBlocks
+ toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks ids
+ else noHtml
+ (blocks', newstate) =
+ runState (blockListToHtml opts blocks)
+ (defaultWriterState {stIds = ids})
+ cssLines = stCSS newstate
+ css = if S.null cssLines
+ then noHtml
+ else style ! [thetype "text/css"] $ primHtml $
+ '\n':(unlines $ S.toList cssLines)
+ math = if stMath newstate
+ then case writerASCIIMathMLURL opts of
+ Just path -> script ! [src path,
+ thetype "text/javascript"] $
+ noHtml
+ Nothing -> primHtml asciiMathMLScript
+ else noHtml
+ head = header $ metadata +++ math +++ css +++
+ primHtml (writerHeader opts)
+ notes = reverse (stNotes newstate)
+ before = primHtml $ writerIncludeBefore opts
+ after = primHtml $ writerIncludeAfter opts
+ thebody = before +++ titleHeader +++ toc +++ blocks' +++
+ footnoteSection opts notes +++ after
+ in if writerStandalone opts
+ then head +++ body thebody
+ else thebody
+
+-- | Construct table of contents from list of header blocks and identifiers.
+-- Assumes there are as many identifiers as header blocks.
+tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
+tableOfContents _ [] _ = noHtml
+tableOfContents opts headers ids =
+ let opts' = opts { writerIgnoreNotes = True }
+ contentsTree = hierarchicalize headers
+ contents = evalState (mapM (elementToListItem opts') contentsTree)
+ (defaultWriterState {stIds = ids})
+ in thediv ! [identifier "toc"] $ unordList contents
+
+-- | Converts an Element to a list item for a table of contents,
+-- retrieving the appropriate identifier from state.
+elementToListItem :: WriterOptions -> Element -> State WriterState Html
+elementToListItem opts (Blk _) = return noHtml
+elementToListItem opts (Sec headerText subsecs) = do
+ st <- get
+ let ids = stIds st
+ let (id, rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ txt <- inlineListToHtml opts headerText
+ subHeads <- mapM (elementToListItem opts) subsecs
+ let subList = if null subHeads
+ then noHtml
+ else unordList subHeads
+ return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++
+ subList
+
+-- | Convert list of Note blocks to a footnote <div>.
+-- Assumes notes are sorted.
+footnoteSection :: WriterOptions -> [Html] -> Html
+footnoteSection opts notes =
+ if null notes
+ then noHtml
+ else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes)
+
+-- | Obfuscate a "mailto:" link using Javascript.
+obfuscateLink :: WriterOptions -> String -> String -> Html
+obfuscateLink opts text src =
+ let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$"
+ src' = map toLower src
+ in case (matchRegex emailRegex src') of
+ (Just [name, domain]) ->
+ let domain' = substitute "." " dot " domain
+ at' = obfuscateChar '@'
+ (linkText, altText) =
+ if text == drop 7 src' -- autolink
+ then ("'<code>'+e+'</code>'", name ++ " at " ++ domain')
+ else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++
+ domain' ++ ")")
+ in if writerStrictMarkdown opts
+ then -- need to use primHtml or &'s are escaped to &amp; in URL
+ primHtml $ "<a href=\"" ++ (obfuscateString src')
+ ++ "\">" ++ (obfuscateString text) ++ "</a>"
+ else (script ! [thetype "text/javascript"] $
+ primHtml ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ obfuscateString name ++ "';e=n+a+h;\n" ++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
+ noscript (primHtml $ obfuscateString altText)
+ _ -> anchor ! [href src] $ primHtml text -- malformed email
+
+-- | Obfuscate character as entity.
+obfuscateChar :: Char -> String
+obfuscateChar char =
+ let num = ord char
+ numstr = if even num then show num else "x" ++ showHex num ""
+ in "&#" ++ numstr ++ ";"
+
+-- | Obfuscate string using entities.
+obfuscateString :: String -> String
+obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
+
+-- | Add CSS for document header.
+addToCSS :: String -> State WriterState ()
+addToCSS item = do
+ st <- get
+ let current = stCSS st
+ put $ st {stCSS = S.insert item current}
+
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) =
+ xAsText ++ inlineListToIdentifier xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier lst
+ Strikeout lst -> inlineListToIdentifier lst
+ Superscript lst -> inlineListToIdentifier lst
+ Subscript lst -> inlineListToIdentifier lst
+ Strong lst -> inlineListToIdentifier lst
+ Quoted _ lst -> inlineListToIdentifier lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier lst
+ Image lst _ -> inlineListToIdentifier lst
+ Note _ -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+ let addIdentifier (nonuniqueIds, uniqueIds) l =
+ let new = inlineListToIdentifier l
+ matches = length $ filter (== new) nonuniqueIds
+ new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+ in (new:nonuniqueIds, new':uniqueIds)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
+
+-- | Convert Pandoc block element to HTML.
+blockToHtml :: WriterOptions -> Block -> State WriterState Html
+blockToHtml opts Null = return $ noHtml
+blockToHtml opts (Plain lst) = inlineListToHtml opts lst
+blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
+blockToHtml opts (RawHtml str) = return $ primHtml str
+blockToHtml opts (HorizontalRule) = return $ hr
+blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n")
+ -- the final \n for consistency with Markdown.pl
+blockToHtml opts (BlockQuote blocks) =
+ -- in S5, treat list in blockquote specially
+ -- if default is incremental, make it nonincremental;
+ -- otherwise incremental
+ if writerS5 opts
+ then let inc = not (writerIncremental opts) in
+ case blocks of
+ [BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
+ (BulletList lst)
+ [OrderedList attribs lst] ->
+ blockToHtml (opts {writerIncremental = inc})
+ (OrderedList attribs lst)
+ otherwise -> blockListToHtml opts blocks >>=
+ (return . blockquote)
+ else blockListToHtml opts blocks >>= (return . blockquote)
+blockToHtml opts (Header level lst) = do
+ contents <- inlineListToHtml opts lst
+ st <- get
+ let ids = stIds st
+ let (id, rest) = if null ids
+ then ("", [])
+ else (head ids, tail ids)
+ put $ st {stIds = rest}
+ let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
+ then []
+ else [identifier id]
+ let contents' = if writerTableOfContents opts
+ then anchor ! [href ("#TOC-" ++ id)] $ contents
+ else contents
+ return $ case level of
+ 1 -> h1 contents' ! attribs
+ 2 -> h2 contents' ! attribs
+ 3 -> h3 contents' ! attribs
+ 4 -> h4 contents' ! attribs
+ 5 -> h5 contents' ! attribs
+ 6 -> h6 contents' ! attribs
+ _ -> paragraph contents' ! attribs
+blockToHtml opts (BulletList lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ unordList ! attribs $ contents
+blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
+ contents <- mapM (blockListToHtml opts) lst
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ let attribs = (if writerIncremental opts
+ then [theclass "incremental"]
+ else []) ++
+ (if startnum /= 1
+ then [start startnum]
+ else []) ++
+ (if numstyle /= DefaultStyle
+ then [theclass numstyle']
+ else [])
+ if numstyle /= DefaultStyle
+ then addToCSS $ "ol." ++ numstyle' ++
+ " { list-style-type: " ++
+ numstyle' ++ "; }"
+ else return ()
+ return $ ordList ! attribs $ contents
+blockToHtml opts (DefinitionList lst) = do
+ contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term
+ def' <- blockListToHtml opts def
+ return $ (term', def')) lst
+ let attribs = if writerIncremental opts
+ then [theclass "incremental"]
+ else []
+ return $ defList ! attribs $ contents
+blockToHtml opts (Table capt aligns widths headers rows) = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return noHtml
+ else inlineListToHtml opts capt >>= return . caption
+ colHeads <- colHeadsToHtml opts alignStrings
+ widths headers
+ rows' <- mapM (tableRowToHtml opts alignStrings) rows
+ return $ table $ captionDoc +++ colHeads +++ rows'
+
+colHeadsToHtml opts alignStrings widths headers = do
+ heads <- sequence $ zipWith3
+ (\align width item -> tableItemToHtml opts th align width item)
+ alignStrings widths headers
+ return $ tr $ toHtmlFromList heads
+
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableRowToHtml opts aligns cols =
+ (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>=
+ return . tr . toHtmlFromList
+
+tableItemToHtml opts tag align' width item = do
+ contents <- blockListToHtml opts item
+ let attrib = [align align'] ++
+ if width /= 0
+ then [thestyle ("width: " ++ show (truncate (100*width)) ++
+ "%;")]
+ else []
+ return $ tag ! attrib $ contents
+
+blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
+blockListToHtml opts lst =
+ mapM (blockToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert list of Pandoc inline elements to HTML.
+inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
+inlineListToHtml opts lst =
+ mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
+
+-- | Convert Pandoc inline element to HTML.
+inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
+inlineToHtml opts inline =
+ case inline of
+ (Str str) -> return $ stringToHtml str
+ (Space) -> return $ stringToHtml " "
+ (LineBreak) -> return $ br
+ (EmDash) -> return $ primHtmlChar "mdash"
+ (EnDash) -> return $ primHtmlChar "ndash"
+ (Ellipses) -> return $ primHtmlChar "hellip"
+ (Apostrophe) -> return $ primHtmlChar "rsquo"
+ (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
+ (Strong lst) -> inlineListToHtml opts lst >>= return . strong
+ (Code str) -> return $ thecode << str
+ (Strikeout lst) -> addToCSS
+ ".strikeout { text-decoration: line-through; }" >>
+ inlineListToHtml opts lst >>=
+ return . (thespan ! [theclass "strikeout"])
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
+ (Quoted quoteType lst) ->
+ let (leftQuote, rightQuote) = case quoteType of
+ SingleQuote -> (primHtmlChar "lsquo",
+ primHtmlChar "rsquo")
+ DoubleQuote -> (primHtmlChar "ldquo",
+ primHtmlChar "rdquo")
+ in do contents <- inlineListToHtml opts lst
+ return $ leftQuote +++ contents +++ rightQuote
+ (TeX str) -> (if writerUseASCIIMathML opts
+ then modify (\st -> st {stMath = True})
+ else return ()) >> return (stringToHtml str)
+ (HtmlInline str) -> return $ primHtml str
+ (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src ->
+ return $ obfuscateLink opts str src
+ (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do
+ linkText <- inlineListToHtml opts txt
+ return $ obfuscateLink opts (show linkText) src
+ (Link txt (src,tit)) -> do
+ linkText <- inlineListToHtml opts txt
+ return $ anchor ! ([href src] ++
+ if null tit then [] else [title tit]) $
+ linkText
+ (Image txt (source,tit)) -> do
+ alternate <- inlineListToHtml opts txt
+ let alternate' = renderFragment opts alternate
+ let attributes = [src source] ++
+ (if null tit
+ then []
+ else [title tit]) ++
+ if null txt
+ then []
+ else [alt alternate']
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
+ st <- get
+ let notes = stNotes st
+ let number = (length notes) + 1
+ let ref = show number
+ htmlContents <- blockListToNote opts ref contents
+ -- push contents onto front of notes
+ put $ st {stNotes = (htmlContents:notes)}
+ return $ anchor ! [href ("#fn" ++ ref),
+ theclass "footnoteRef",
+ identifier ("fnref" ++ ref)] <<
+ sup << ref
+
+blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
+blockListToNote opts ref blocks =
+ -- If last block is Para or Plain, include the backlink at the end of
+ -- that block. Otherwise, insert a new Plain block with the backlink.
+ let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++
+ "\" class=\"footnoteBackLink\"" ++
+ " title=\"Jump back to footnote " ++ ref ++ "\">&#8617;</a>"]
+ blocks' = if null blocks
+ then []
+ else let lastBlock = last blocks
+ otherBlocks = init blocks
+ in case lastBlock of
+ (Para lst) -> otherBlocks ++
+ [Para (lst ++ backlink)]
+ (Plain lst) -> otherBlocks ++
+ [Plain (lst ++ backlink)]
+ _ -> otherBlocks ++ [lastBlock,
+ Plain backlink]
+ in do contents <- blockListToHtml opts blocks'
+ return $ li ! [identifier ("fn" ++ ref)] $ contents
+
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
new file mode 100644
index 000000000..f64e06e24
--- /dev/null
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -0,0 +1,310 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.LaTeX
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' format into LaTeX.
+-}
+module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse )
+import Data.Char ( toLower )
+import qualified Data.Set as S
+import Control.Monad.State
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+
+data WriterState =
+ WriterState { stIncludes :: S.Set String -- strings to include in header
+ , stInNote :: Bool -- @True@ if we're in a note
+ , stOLLevel :: Int } -- level of ordered list nesting
+
+-- | Add line to header.
+addToHeader :: String -> State WriterState ()
+addToHeader str = do
+ st <- get
+ let includes = stIncludes st
+ put st {stIncludes = S.insert str includes}
+
+-- | Convert Pandoc to LaTeX.
+writeLaTeX :: WriterOptions -> Pandoc -> String
+writeLaTeX options document =
+ render $ evalState (pandocToLaTeX options document) $
+ WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 }
+
+pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToLaTeX options (Pandoc meta blocks) = do
+ main <- blockListToLaTeX blocks
+ head <- if writerStandalone options
+ then latexHeader options meta
+ else return empty
+ let before = if null (writerIncludeBefore options)
+ then empty
+ else text (writerIncludeBefore options)
+ let after = if null (writerIncludeAfter options)
+ then empty
+ else text (writerIncludeAfter options)
+ let body = before $$ main $$ after
+ let toc = if writerTableOfContents options
+ then text "\\tableofcontents\n"
+ else empty
+ let foot = if writerStandalone options
+ then text "\\end{document}"
+ else empty
+ return $ head $$ toc $$ body $$ foot
+
+-- | Insert bibliographic information into LaTeX header.
+latexHeader :: WriterOptions -- ^ Options, including LaTeX header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState Doc
+latexHeader options (Meta title authors date) = do
+ titletext <- if null title
+ then return empty
+ else inlineListToLaTeX title >>= return . inCmd "title"
+ headerIncludes <- get >>= return . S.toList . stIncludes
+ let extras = text $ unlines headerIncludes
+ let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes
+ then text "\\VerbatimFootnotes % allows verbatim text in footnotes"
+ else empty
+ let authorstext = text $ "\\author{" ++
+ joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}"
+ let datetext = if date == ""
+ then empty
+ else text $ "\\date{" ++ stringToLaTeX date ++ "}"
+ let maketitle = if null title then empty else text "\\maketitle"
+ let secnumline = if (writerNumberSections options)
+ then empty
+ else text "\\setcounter{secnumdepth}{0}"
+ let baseHeader = text $ writerHeader options
+ let header = baseHeader $$ extras
+ return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$
+ datetext $$ text "\\begin{document}" $$ maketitle $$ text ""
+
+-- escape things as needed for LaTeX
+
+stringToLaTeX :: String -> String
+stringToLaTeX = escapeStringUsing latexEscapes
+ where latexEscapes = backslashEscapes "{}$%&_#" ++
+ [ ('^', "\\^{}")
+ , ('\\', "\\textbackslash{}")
+ , ('~', "\\ensuremath{\\sim}")
+ , ('|', "\\textbar{}")
+ , ('<', "\\textless{}")
+ , ('>', "\\textgreater{}")
+ ]
+
+-- | Puts contents into LaTeX command.
+inCmd :: String -> Doc -> Doc
+inCmd cmd contents = char '\\' <> text cmd <> braces contents
+
+-- | Remove all code elements from list of inline elements
+-- (because it's illegal to have verbatim inside some command arguments)
+deVerb :: [Inline] -> [Inline]
+deVerb [] = []
+deVerb ((Code str):rest) =
+ (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
+deVerb (other:rest) = other:(deVerb rest)
+
+-- | Convert Pandoc block element to LaTeX.
+blockToLaTeX :: Block -- ^ Block to convert
+ -> State WriterState Doc
+blockToLaTeX Null = return empty
+blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return
+blockToLaTeX (Para lst) =
+ wrapped inlineListToLaTeX lst >>= return . (<> char '\n')
+blockToLaTeX (BlockQuote lst) = do
+ contents <- blockListToLaTeX lst
+ return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}"
+blockToLaTeX (CodeBlock str) = do
+ st <- get
+ env <- if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ return "Verbatim"
+ else return "verbatim"
+ return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>
+ text ("\n\\end{" ++ env ++ "}")
+blockToLaTeX (RawHtml str) = return empty
+blockToLaTeX (BulletList lst) = do
+ items <- mapM listItemToLaTeX lst
+ return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}"
+blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
+ st <- get
+ let oldlevel = stOLLevel st
+ put $ st {stOLLevel = oldlevel + 1}
+ items <- mapM listItemToLaTeX lst
+ modify (\st -> st {stOLLevel = oldlevel})
+ exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
+ then do addToHeader "\\usepackage{enumerate}"
+ return $ char '[' <>
+ text (head (orderedListMarkers (1, numstyle,
+ numdelim))) <> char ']'
+ else return empty
+ let resetcounter = if start /= 1 && oldlevel <= 4
+ then text $ "\\setcounter{enum" ++
+ map toLower (toRomanNumeral oldlevel) ++
+ "}{" ++ show (start - 1) ++ "}"
+ else empty
+ return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+ vcat items $$ text "\\end{enumerate}"
+blockToLaTeX (DefinitionList lst) = do
+ items <- mapM defListItemToLaTeX lst
+ return $ text "\\begin{description}" $$ vcat items $$
+ text "\\end{description}"
+blockToLaTeX HorizontalRule = return $ text $
+ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n"
+blockToLaTeX (Header level lst) = do
+ txt <- inlineListToLaTeX (deVerb lst)
+ return $ if (level > 0) && (level <= 3)
+ then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++
+ "section{") <> txt <> text "}\n"
+ else txt <> char '\n'
+blockToLaTeX (Table caption aligns widths heads rows) = do
+ headers <- tableRowToLaTeX heads
+ captionText <- inlineListToLaTeX caption
+ rows' <- mapM tableRowToLaTeX rows
+ let colWidths = map (printf "%.2f") widths
+ let colDescriptors = concat $ zipWith
+ (\width align -> ">{\\PBS" ++
+ (case align of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright") ++
+ "\\hspace{0pt}}p{" ++ width ++
+ "\\columnwidth}")
+ colWidths aligns
+ let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
+ headers $$ text "\\hline" $$ vcat rows' $$
+ text "\\end{tabular}"
+ let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}"
+ addToHeader "\\usepackage{array}\n\
+ \% This is needed because raggedright in table elements redefines \\\\:\n\
+ \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\
+ \\\let\\PBS=\\PreserveBackslash"
+ return $ if isEmpty captionText
+ then centered tableBody <> char '\n'
+ else text "\\begin{table}[h]" $$ centered tableBody $$
+ inCmd "caption" captionText $$ text "\\end{table}\n"
+
+blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
+
+tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=
+ return . ($$ text "\\\\") . foldl (\row item -> row $$
+ (if isEmpty row then empty else text " & ") <> item) empty
+
+listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item " $$) .
+ (nest 2)
+
+defListItemToLaTeX (term, def) = do
+ term' <- inlineListToLaTeX $ deVerb term
+ def' <- blockListToLaTeX def
+ return $ text "\\item[" <> term' <> text "]" $$ def'
+
+-- | Convert list of inline elements to LaTeX.
+inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
+ -> State WriterState Doc
+inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
+
+isQuoted :: Inline -> Bool
+isQuoted (Quoted _ _) = True
+isQuoted Apostrophe = True
+isQuoted _ = False
+
+-- | Convert inline element to LaTeX
+inlineToLaTeX :: Inline -- ^ Inline to convert
+ -> State WriterState Doc
+inlineToLaTeX (Emph lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph"
+inlineToLaTeX (Strong lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"
+inlineToLaTeX (Strikeout lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ addToHeader "\\usepackage[normalem]{ulem}"
+ return $ inCmd "sout" contents
+inlineToLaTeX (Superscript lst) =
+ inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript"
+inlineToLaTeX (Subscript lst) = do
+ contents <- inlineListToLaTeX $ deVerb lst
+ -- oddly, latex includes \textsuperscript but not \textsubscript
+ -- so we have to define it:
+ addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}"
+ return $ inCmd "textsubscript" contents
+inlineToLaTeX (Code str) = do
+ st <- get
+ if stInNote st
+ then do addToHeader "\\usepackage{fancyvrb}"
+ else return ()
+ let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
+inlineToLaTeX (Quoted SingleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ char '`' <> s1 <> contents <> s2 <> char '\''
+inlineToLaTeX (Quoted DoubleQuote lst) = do
+ contents <- inlineListToLaTeX lst
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then text "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then text "\\,"
+ else empty
+ return $ text "``" <> s1 <> contents <> s2 <> text "''"
+inlineToLaTeX Apostrophe = return $ char '\''
+inlineToLaTeX EmDash = return $ text "---"
+inlineToLaTeX EnDash = return $ text "--"
+inlineToLaTeX Ellipses = return $ text "\\ldots{}"
+inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
+inlineToLaTeX (TeX str) = return $ text str
+inlineToLaTeX (HtmlInline str) = return empty
+inlineToLaTeX (LineBreak) = return $ text "\\\\"
+inlineToLaTeX Space = return $ char ' '
+inlineToLaTeX (Link txt (src, _)) = do
+ addToHeader "\\usepackage[breaklinks=true]{hyperref}"
+ case txt of
+ [Code x] | x == src -> -- autolink
+ do addToHeader "\\usepackage{url}"
+ return $ text $ "\\url{" ++ x ++ "}"
+ _ -> do contents <- inlineListToLaTeX $ deVerb txt
+ return $ text ("\\href{" ++ src ++ "}{") <> contents <>
+ char '}'
+inlineToLaTeX (Image alternate (source, tit)) = do
+ addToHeader "\\usepackage{graphicx}"
+ return $ text $ "\\includegraphics{" ++ source ++ "}"
+inlineToLaTeX (Note contents) = do
+ st <- get
+ put (st {stInNote = True})
+ contents' <- blockListToLaTeX contents
+ modify (\st -> st {stInNote = False})
+ let rawnote = stripTrailingNewlines $ render contents'
+ -- note: a \n before } is needed when note ends with a Verbatim environment
+ let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote
+ return $ text "%\n\\footnote{" <>
+ text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
new file mode 100644
index 000000000..8e14c2bf0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -0,0 +1,293 @@
+{-
+Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Man
+ Copyright : Copyright (C) 2007 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to groff man page format.
+
+-}
+module Text.Pandoc.Writers.Man ( writeMan) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Printf ( printf )
+import Data.List ( isPrefixOf, drop, nub, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Preprocessors = [String] -- e.g. "t" for tbl
+type WriterState = (Notes, Preprocessors)
+
+-- | Convert Pandoc to Man.
+writeMan :: WriterOptions -> Pandoc -> String
+writeMan opts document = render $ evalState (pandocToMan opts document) ([],[])
+
+-- | Return groff man representation of document.
+pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMan opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ (head, foot) <- metaToMan opts meta
+ body <- blockListToMan opts blocks
+ (notes, preprocessors) <- get
+ let preamble = if null preprocessors || not (writerStandalone opts)
+ then empty
+ else text $ ".\\\" " ++ concat (nub preprocessors)
+ notes' <- notesToMan opts (reverse notes)
+ return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after'
+
+-- | Insert bibliographic information into Man header and footer.
+metaToMan :: WriterOptions -- ^ Options, including Man header
+ -> Meta -- ^ Meta with bibliographic information
+ -> State WriterState (Doc, Doc)
+metaToMan options (Meta title authors date) = do
+ titleText <- inlineListToMan options title
+ let (cmdName, rest) = break (== ' ') $ render titleText
+ let (title', section) = case reverse cmdName of
+ (')':d:'(':xs) | d `elem` ['0'..'9'] ->
+ (text (reverse xs), char d)
+ xs -> (text (reverse xs), doubleQuotes empty)
+ let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $
+ splitBy '|' rest
+ let head = (text ".TH") <+> title' <+> section <+>
+ doubleQuotes (text date) <+> hsep extras
+ let foot = case length authors of
+ 0 -> empty
+ 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors)
+ 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors)
+ return $ if writerStandalone options
+ then (head, foot)
+ else (empty, empty)
+
+-- | Return man representation of notes.
+notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMan opts notes =
+ if null notes
+ then return empty
+ else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>=
+ return . (text ".SH NOTES" $$) . vcat
+
+-- | Return man representation of a note.
+noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMan opts num note = do
+ contents <- blockListToMan opts note
+ let marker = text "\n.SS [" <> text (show num) <> char ']'
+ return $ marker $$ contents
+
+-- | Association list of characters to escape.
+manEscapes :: [(Char, String)]
+manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\"
+
+-- | Escape special characters for Man.
+escapeString :: String -> String
+escapeString = escapeStringUsing manEscapes
+
+-- | Escape a literal (code) section for Man.
+escapeCode :: String -> String
+escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ")
+
+-- | Convert Pandoc block element to man.
+blockToMan :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMan opts Null = return empty
+blockToMan opts (Plain inlines) =
+ wrapIfNeeded opts (inlineListToMan opts) inlines
+blockToMan opts (Para inlines) = do
+ contents <- wrapIfNeeded opts (inlineListToMan opts) inlines
+ return $ text ".PP" $$ contents
+blockToMan opts (RawHtml str) = return $ text str
+blockToMan opts HorizontalRule = return $ text $ ".PP\n * * * * *"
+blockToMan opts (Header level inlines) = do
+ contents <- inlineListToMan opts inlines
+ let heading = case level of
+ 1 -> ".SH "
+ _ -> ".SS "
+ return $ text heading <> contents
+blockToMan opts (CodeBlock str) = return $
+ text ".PP" $$ text "\\f[CR]" $$
+ text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
+blockToMan opts (BlockQuote blocks) = do
+ contents <- blockListToMan opts blocks
+ return $ text ".RS" $$ contents $$ text ".RE"
+blockToMan opts (Table caption alignments widths headers rows) =
+ let aligncode AlignLeft = "l"
+ aligncode AlignRight = "r"
+ aligncode AlignCenter = "c"
+ aligncode AlignDefault = "l"
+ in do
+ caption' <- inlineListToMan opts caption
+ modify (\(notes, preprocessors) -> (notes, "t":preprocessors))
+ let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths
+ -- 78n default width - 8n indent = 70n
+ let coldescriptions = text $ joinWithSep " "
+ (zipWith (\align width -> aligncode align ++ width)
+ alignments iwidths) ++ "."
+ colheadings <- mapM (blockListToMan opts) headers
+ let makeRow cols = text "T{" $$
+ (vcat $ intersperse (text "T}@T{") cols) $$
+ text "T}"
+ let colheadings' = makeRow colheadings
+ body <- mapM (\row -> do
+ cols <- mapM (blockListToMan opts) row
+ return $ makeRow cols) rows
+ return $ text ".PP" $$ caption' $$
+ text ".TS" $$ text "tab(@);" $$ coldescriptions $$
+ colheadings' $$ char '_' $$ vcat body $$ text ".TE"
+
+blockToMan opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMan opts) items
+ return (vcat contents)
+blockToMan opts (OrderedList attribs items) = do
+ let markers = take (length items) $ orderedListMarkers attribs
+ let indent = 1 + (maximum $ map length markers)
+ contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $
+ zip markers items
+ return (vcat contents)
+blockToMan opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMan opts) items
+ return (vcat contents)
+
+-- | Convert bullet list item (list of blocks) to man.
+bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMan opts [] = return empty
+bulletListItemToMan opts ((Para first):rest) =
+ bulletListItemToMan opts ((Plain first):rest)
+bulletListItemToMan opts ((Plain first):rest) = do
+ first' <- blockToMan opts (Plain first)
+ rest' <- blockListToMan opts rest
+ let first'' = text ".IP \\[bu] 2" $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 2" $$ rest' $$ text ".RE"
+ return (first'' $$ rest'')
+bulletListItemToMan opts (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE"
+
+-- | Convert ordered list item (a list of blocks) to man.
+orderedListItemToMan :: WriterOptions -- ^ options
+ -> String -- ^ order marker for list item
+ -> Int -- ^ number of spaces to indent
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMan _ _ _ [] = return empty
+orderedListItemToMan opts num indent ((Para first):rest) =
+ orderedListItemToMan opts num indent ((Plain first):rest)
+orderedListItemToMan opts num indent (first:rest) = do
+ first' <- blockToMan opts first
+ rest' <- blockListToMan opts rest
+ let num' = printf ("%" ++ show (indent - 1) ++ "s") num
+ let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first'
+ let rest'' = if null rest
+ then empty
+ else text ".RS 4" $$ rest' $$ text ".RE"
+ return $ first'' $$ rest''
+
+-- | Convert definition list item (label, list of blocks) to man.
+definitionListItemToMan :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMan opts (label, items) = do
+ labelText <- inlineListToMan opts label
+ contents <- if null items
+ then return empty
+ else do
+ let (first, rest) = case items of
+ ((Para x):y) -> (Plain x,y)
+ (x:y) -> (x,y)
+ rest' <- mapM (\item -> blockToMan opts item)
+ rest >>= (return . vcat)
+ first' <- blockToMan opts first
+ return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
+ return $ text ".TP\n.B " <> labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to man.
+blockListToMan :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMan opts blocks =
+ mapM (blockToMan opts) blocks >>= (return . vcat)
+
+-- | Convert list of Pandoc inline elements to man.
+inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat)
+
+-- | Convert Pandoc inline element to man.
+inlineToMan :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMan opts (Emph lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[I]" <> contents <> text "\\f[]"
+inlineToMan opts (Strong lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\f[B]" <> contents <> text "\\f[]"
+inlineToMan opts (Strikeout lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToMan opts (Superscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '^' <> contents <> char '^'
+inlineToMan opts (Subscript lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '~' <> contents <> char '~'
+inlineToMan opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ char '`' <> contents <> char '\''
+inlineToMan opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMan opts lst
+ return $ text "\\[lq]" <> contents <> text "\\[rq]"
+inlineToMan opts EmDash = return $ text "\\[em]"
+inlineToMan opts EnDash = return $ text "\\[en]"
+inlineToMan opts Apostrophe = return $ char '\''
+inlineToMan opts Ellipses = return $ text "\\&..."
+inlineToMan opts (Code str) =
+ return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
+inlineToMan opts (Str str) = return $ text $ escapeString str
+inlineToMan opts (TeX str) = return $ text $ escapeCode str
+inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str
+inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n"
+inlineToMan opts Space = return $ char ' '
+inlineToMan opts (Link txt (src, _)) = do
+ linktext <- inlineListToMan opts txt
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ if txt == [Code srcSuffix]
+ then char '<' <> text srcSuffix <> char '>'
+ else linktext <> text " (" <> text src <> char ')'
+inlineToMan opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMan opts (Link txt (source, tit))
+ return $ char '[' <> text "IMAGE: " <> linkPart <> char ']'
+inlineToMan opts (Note contents) = do
+ modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ char '[' <> text ref <> char ']'
+
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
new file mode 100644
index 000000000..4cecaae5d
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -0,0 +1,373 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Markdown
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to markdown-formatted plain text.
+
+Markdown: <http://daringfireball.net/projects/markdown/>
+-}
+module Text.Pandoc.Writers.Markdown ( writeMarkdown) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser )
+import Data.List ( group, isPrefixOf, drop, find, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs)
+
+-- | Convert Pandoc to Markdown.
+writeMarkdown :: WriterOptions -> Pandoc -> String
+writeMarkdown opts document =
+ render $ evalState (pandocToMarkdown opts document) ([],[])
+
+-- | Return markdown representation of document.
+pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToMarkdown opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ let before' = if null before then empty else text before
+ let after' = if null after then empty else text after
+ metaBlock <- metaToMarkdown opts meta
+ let head = if writerStandalone opts
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ let headerBlocks = filter isHeaderBlock blocks
+ let toc = if writerTableOfContents opts
+ then tableOfContents opts headerBlocks
+ else empty
+ body <- blockListToMarkdown opts blocks
+ (notes, _) <- get
+ notes' <- notesToMarkdown opts (reverse notes)
+ (_, refs) <- get -- note that the notes may contain refs
+ refs' <- keyTableToMarkdown opts (reverse refs)
+ return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$
+ notes' $+$ text "" $+$ refs' $+$ after'
+
+-- | Return markdown representation of reference key table.
+keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+
+-- | Return markdown representation of a reference key.
+keyToMarkdown :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToMarkdown opts (label, (src, tit)) = do
+ label' <- inlineListToMarkdown opts label
+ let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
+ text src <> tit'
+
+-- | Return markdown representation of notes.
+notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToMarkdown opts notes =
+ mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return markdown representation of a note.
+noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToMarkdown opts num blocks = do
+ contents <- blockListToMarkdown opts blocks
+ let marker = text "[^" <> text (show num) <> text "]:"
+ return $ hang marker (writerTabStop opts) contents
+
+-- | Escape special characters for Markdown.
+escapeString :: String -> String
+escapeString = escapeStringUsing markdownEscapes
+ where markdownEscapes = ('\160', "&nbsp;"):(backslashEscapes "`<\\*_^~")
+
+-- | Convert bibliographic information into Markdown header.
+metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc
+metaToMarkdown opts (Meta title authors date) = do
+ title' <- titleToMarkdown opts title
+ authors' <- authorsToMarkdown authors
+ date' <- dateToMarkdown date
+ return $ title' $+$ authors' $+$ date'
+
+titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToMarkdown opts [] = return empty
+titleToMarkdown opts lst = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "% " <> contents
+
+authorsToMarkdown :: [String] -> State WriterState Doc
+authorsToMarkdown [] = return empty
+authorsToMarkdown lst = return $
+ text "% " <> text (joinWithSep ", " (map escapeString lst))
+
+dateToMarkdown :: String -> State WriterState Doc
+dateToMarkdown [] = return empty
+dateToMarkdown str = return $ text "% " <> text (escapeString str)
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: WriterOptions -> [Block] -> Doc
+tableOfContents opts headers =
+ let opts' = opts { writerIgnoreNotes = True }
+ contents = BulletList $ map elementToListItem $ hierarchicalize headers
+ in evalState (blockToMarkdown opts' contents) ([],[])
+
+-- | Converts an Element to a list item for a table of contents,
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
+ if null subsecs
+ then []
+ else [BulletList $ map elementToListItem subsecs]
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: GenParser Char st Char
+olMarker = do (start, style, delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style == UpperAlpha || (style == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case parse olMarker "para start" str of
+ Left _ -> False
+ Right _ -> True
+
+wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedMarkdown opts inlines = do
+ let chunks = splitBy LineBreak inlines
+ let chunks' = if null chunks
+ then []
+ else (map (++ [Str " "]) $ init chunks) ++ [last chunks]
+ lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks'
+ return $ vcat lns
+
+-- | Convert Pandoc block element to markdown.
+blockToMarkdown :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToMarkdown opts Null = return empty
+blockToMarkdown opts (Plain inlines) =
+ wrappedMarkdown opts inlines
+blockToMarkdown opts (Para inlines) = do
+ contents <- wrappedMarkdown opts inlines
+ -- escape if para starts with ordered list marker
+ let esc = if (not (writerStrictMarkdown opts)) &&
+ beginsWithOrderedListMarker (render contents)
+ then char '\\'
+ else empty
+ return $ esc <> contents <> text "\n"
+blockToMarkdown opts (RawHtml str) = return $ text str
+blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n"
+blockToMarkdown opts (Header level inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
+blockToMarkdown opts (CodeBlock str) = return $
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToMarkdown opts (BlockQuote blocks) = do
+ contents <- blockListToMarkdown opts blocks
+ return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <>
+ text "\n"
+blockToMarkdown opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToMarkdown opts caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM (blockListToMarkdown opts) headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> leftAlignBlock
+ AlignCenter -> centerAlignBlock
+ AlignRight -> rightAlignBlock
+ AlignDefault -> leftAlignBlock
+ let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
+ (zipWith docToBlock widthsInChars)
+ let head = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row
+ return $ makeRow cols) rows
+ let tableWidth = sum widthsInChars
+ let maxRowHeight = maximum $ map heightOfBlock (head:rows')
+ let isMultilineTable = maxRowHeight > 1
+ let underline = hsep $
+ map (\width -> text $ replicate width '-') widthsInChars
+ let border = if isMultilineTable
+ then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-'
+ else empty
+ let spacer = if isMultilineTable
+ then text ""
+ else empty
+ let body = vcat $ intersperse spacer $ map blockToDoc rows'
+ return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$
+ border $+$ caption'') <> text "\n"
+blockToMarkdown opts (BulletList items) = do
+ contents <- mapM (bulletListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (OrderedList attribs items) = do
+ let markers = orderedListMarkers attribs
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
+ zip markers' items
+ return $ (vcat contents) <> text "\n"
+blockToMarkdown opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToMarkdown opts) items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to markdown.
+bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToMarkdown opts items = do
+ contents <- blockListToMarkdown opts items
+ return $ hang (text "- ") (writerTabStop opts) contents
+
+-- | Convert ordered list item (a list of blocks) to markdown.
+orderedListItemToMarkdown :: WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToMarkdown opts marker items = do
+ contents <- blockListToMarkdown opts items
+ -- The complexities here are needed to ensure that if the list
+ -- marker is 4 characters or longer, the second and following
+ -- lines are indented 4 spaces but the list item begins after the marker.
+ return $ sep [nest (min (3 - length marker) 0) (text marker),
+ nest (writerTabStop opts) contents]
+
+-- | Convert definition list item (label, list of blocks) to markdown.
+definitionListItemToMarkdown :: WriterOptions
+ -> ([Inline],[Block])
+ -> State WriterState Doc
+definitionListItemToMarkdown opts (label, items) = do
+ labelText <- inlineListToMarkdown opts label
+ let tabStop = writerTabStop opts
+ let leader = char ':'
+ contents <- mapM (\item -> blockToMarkdown opts item >>=
+ (\txt -> return (leader $$ nest tabStop txt)))
+ items >>= return . vcat
+ return $ labelText $+$ contents
+
+-- | Convert list of Pandoc block elements to markdown.
+blockListToMarkdown :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToMarkdown opts blocks =
+ mapM (blockToMarkdown opts) blocks >>= return . vcat
+
+-- | Get reference for target; if none exists, create unique one and return.
+-- Prefer label if possible; otherwise, generate a unique key.
+getReference :: [Inline] -> Target -> State WriterState [Inline]
+getReference label (src, tit) = do
+ (_,refs) <- get
+ case find ((== (src, tit)) . snd) refs of
+ Just (ref, _) -> return ref
+ Nothing -> do
+ let label' = case find ((== label) . fst) refs of
+ Just _ -> -- label is used; generate numerical label
+ case find (\n -> not (any (== [Str (show n)])
+ (map fst refs))) [1..10000] of
+ Just x -> [Str (show x)]
+ Nothing -> error "no unique label"
+ Nothing -> label
+ modify (\(notes, refs) -> (notes, (label', (src,tit)):refs))
+ return label'
+
+-- | Convert list of Pandoc inline elements to markdown.
+inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToMarkdown opts lst =
+ mapM (inlineToMarkdown opts) lst >>= return . hcat
+
+-- | Convert Pandoc inline element to markdown.
+inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
+inlineToMarkdown opts (Emph lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '*' <> contents <> char '*'
+inlineToMarkdown opts (Strong lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToMarkdown opts (Strikeout lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ text "~~" <> contents <> text "~~"
+inlineToMarkdown opts (Superscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '^' <> contents' <> char '^'
+inlineToMarkdown opts (Subscript lst) = do
+ contents <- inlineListToMarkdown opts lst
+ let contents' = text $ substitute " " "\\ " $ render contents
+ return $ char '~' <> contents' <> char '~'
+inlineToMarkdown opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToMarkdown opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToMarkdown opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToMarkdown opts EmDash = return $ text "--"
+inlineToMarkdown opts EnDash = return $ char '-'
+inlineToMarkdown opts Apostrophe = return $ char '\''
+inlineToMarkdown opts Ellipses = return $ text "..."
+inlineToMarkdown opts (Code str) =
+ let tickGroups = filter (\s -> '`' `elem` s) $ group str
+ longest = if null tickGroups
+ then 0
+ else maximum $ map length tickGroups
+ marker = replicate (longest + 1) '`'
+ spacer = if (longest == 0) then "" else " " in
+ return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
+inlineToMarkdown opts (Str str) = return $ text $ escapeString str
+inlineToMarkdown opts (TeX str) = return $ text str
+inlineToMarkdown opts (HtmlInline str) = return $ text str
+inlineToMarkdown opts (LineBreak) = return $ text " \n"
+inlineToMarkdown opts Space = return $ char ' '
+inlineToMarkdown opts (Link txt (src, tit)) = do
+ linktext <- inlineListToMarkdown opts txt
+ let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useRefLinks = writerReferenceLinks opts
+ let useAuto = null tit && txt == [Code srcSuffix]
+ ref <- if useRefLinks then getReference txt (src, tit) else return []
+ reftext <- inlineListToMarkdown opts ref
+ return $ if useAuto
+ then char '<' <> text srcSuffix <> char '>'
+ else if useRefLinks
+ then let first = char '[' <> linktext <> char ']'
+ second = if txt == ref
+ then text "[]"
+ else char '[' <> reftext <> char ']'
+ in first <> second
+ else char '[' <> linktext <> char ']' <>
+ char '(' <> text src <> linktitle <> char ')'
+inlineToMarkdown opts (Image alternate (source, tit)) = do
+ let txt = if (null alternate) || (alternate == [Str ""]) ||
+ (alternate == [Str source]) -- to prevent autolinks
+ then [Str "image"]
+ else alternate
+ linkPart <- inlineToMarkdown opts (Link txt (source, tit))
+ return $ char '!' <> linkPart
+inlineToMarkdown opts (Note contents) = do
+ modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state
+ (notes, _) <- get
+ let ref = show $ (length notes)
+ return $ text "[^" <> text ref <> char ']'
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
new file mode 100644
index 000000000..ddcbf95c0
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -0,0 +1,325 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RST
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to reStructuredText.
+
+reStructuredText: <http://docutils.sourceforge.net/rst.html>
+-}
+module Text.Pandoc.Writers.RST ( writeRST) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Blocks
+import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse )
+import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Control.Monad.State
+
+type Notes = [[Block]]
+type Refs = KeyTable
+type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures
+
+-- | Convert Pandoc to RST.
+writeRST :: WriterOptions -> Pandoc -> String
+writeRST opts document =
+ render $ evalState (pandocToRST opts document) ([],[],[])
+
+-- | Return RST representation of document.
+pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc
+pandocToRST opts (Pandoc meta blocks) = do
+ let before = writerIncludeBefore opts
+ let after = writerIncludeAfter opts
+ before' = if null before then empty else text before
+ after' = if null after then empty else text after
+ metaBlock <- metaToRST opts meta
+ let head = if (writerStandalone opts)
+ then metaBlock $+$ text (writerHeader opts)
+ else empty
+ body <- blockListToRST opts blocks
+ (notes, _, _) <- get
+ notes' <- notesToRST opts (reverse notes)
+ (_, refs, pics) <- get -- note that the notes may contain refs
+ refs' <- keyTableToRST opts (reverse refs)
+ pics' <- pictTableToRST opts (reverse pics)
+ return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$
+ pics' $+$ after'
+
+-- | Return RST representation of reference key table.
+keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat
+
+-- | Return RST representation of a reference key.
+keyToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+keyToRST opts (label, (src, tit)) = do
+ label' <- inlineListToRST opts label
+ let label'' = if ':' `elem` (render label')
+ then char '`' <> label' <> char '`'
+ else label'
+ return $ text ".. _" <> label'' <> text ": " <> text src
+
+-- | Return RST representation of notes.
+notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc
+notesToRST opts notes =
+ mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>=
+ return . vcat
+
+-- | Return RST representation of a note.
+noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc
+noteToRST opts num note = do
+ contents <- blockListToRST opts note
+ let marker = text ".. [" <> text (show num) <> text "] "
+ return $ hang marker 3 contents
+
+-- | Return RST representation of picture reference table.
+pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc
+pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat
+
+-- | Return RST representation of a picture substitution reference.
+pictToRST :: WriterOptions
+ -> ([Inline], (String, String))
+ -> State WriterState Doc
+pictToRST opts (label, (src, _)) = do
+ label' <- inlineListToRST opts label
+ return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
+ text src
+
+-- | Take list of inline elements and return wrapped doc.
+wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
+wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts))
+ (splitBy LineBreak inlines) >>= return . vcat
+
+-- | Escape special characters for RST.
+escapeString :: String -> String
+escapeString = escapeStringUsing (backslashEscapes "`\\|*_")
+
+-- | Convert bibliographic information into RST header.
+metaToRST :: WriterOptions -> Meta -> State WriterState Doc
+metaToRST opts (Meta title authors date) = do
+ title' <- titleToRST opts title
+ authors' <- authorsToRST authors
+ date' <- dateToRST date
+ let toc = if writerTableOfContents opts
+ then text "" $+$ text ".. contents::"
+ else empty
+ return $ title' $+$ authors' $+$ date' $+$ toc
+
+titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+titleToRST opts [] = return empty
+titleToRST opts lst = do
+ contents <- inlineListToRST opts lst
+ let titleLength = length $ render contents
+ let border = text (replicate titleLength '=')
+ return $ border $+$ contents $+$ border <> text "\n"
+
+authorsToRST :: [String] -> State WriterState Doc
+authorsToRST [] = return empty
+authorsToRST (first:rest) = do
+ rest' <- authorsToRST rest
+ return $ (text ":Author: " <> text first) $+$ rest'
+
+dateToRST :: String -> State WriterState Doc
+dateToRST [] = return empty
+dateToRST str = return $ text ":Date: " <> text (escapeString str)
+
+-- | Convert Pandoc block element to RST.
+blockToRST :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToRST opts Null = return empty
+blockToRST opts (Plain inlines) = wrappedRST opts inlines
+blockToRST opts (Para [TeX str]) =
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str')
+blockToRST opts (Para inlines) = do
+ contents <- wrappedRST opts inlines
+ return $ contents <> text "\n"
+blockToRST opts (RawHtml str) =
+ let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
+ return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str')
+blockToRST opts HorizontalRule = return $ text "--------------\n"
+blockToRST opts (Header level inlines) = do
+ contents <- inlineListToRST opts inlines
+ let headerLength = length $ render contents
+ let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
+ let border = text $ replicate headerLength headerChar
+ return $ contents $+$ border <> text "\n"
+blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$
+ (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+blockToRST opts (BlockQuote blocks) = do
+ contents <- blockListToRST opts blocks
+ return $ (nest (writerTabStop opts) contents) <> text "\n"
+blockToRST opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToRST opts caption
+ let caption'' = if null caption
+ then empty
+ else text "" $+$ (text "Table: " <> caption')
+ headers' <- mapM (blockListToRST opts) headers
+ let widthsInChars = map (floor . (78 *)) widths
+ let alignHeader alignment = case alignment of
+ AlignLeft -> leftAlignBlock
+ AlignCenter -> centerAlignBlock
+ AlignRight -> rightAlignBlock
+ AlignDefault -> leftAlignBlock
+ let hpipeBlocks blocks = hcatBlocks [beg, middle, end]
+ where height = maximum (map heightOfBlock blocks)
+ sep = TextBlock 3 height (replicate height " | ")
+ beg = TextBlock 2 height (replicate height "| ")
+ end = TextBlock 2 height (replicate height " |")
+ middle = hcatBlocks $ intersperse sep blocks
+ let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
+ let head = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row
+ return $ makeRow cols) rows
+ let tableWidth = sum widthsInChars
+ let maxRowHeight = maximum $ map heightOfBlock (head:rows')
+ let border ch = char '+' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '+'
+ let body = vcat $ intersperse (border '-') $ map blockToDoc rows'
+ return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$
+ border '-' $$ caption'' $$ text ""
+blockToRST opts (BulletList items) = do
+ contents <- mapM (bulletListItemToRST opts) items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST opts (OrderedList (start, style, delim) items) = do
+ let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim
+ then take (length items) $ repeat "#."
+ else take (length items) $ orderedListMarkers
+ (start, style, delim)
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ text "" $+$ vcat contents <> text "\n"
+blockToRST opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToRST opts) items
+ return $ (vcat contents) <> text "\n"
+
+-- | Convert bullet list item (list of blocks) to RST.
+bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToRST opts items = do
+ contents <- blockListToRST opts items
+ return $ hang (text "- ") 3 contents
+
+-- | Convert ordered list item (a list of blocks) to RST.
+orderedListItemToRST :: WriterOptions -- ^ options
+ -> String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToRST opts marker items = do
+ contents <- blockListToRST opts items
+ return $ hang (text marker) (length marker + 1) contents
+
+-- | Convert defintion list item (label, list of blocks) to RST.
+definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc
+definitionListItemToRST opts (label, items) = do
+ label <- inlineListToRST opts label
+ contents <- blockListToRST opts items
+ return $ label $+$ nest (writerTabStop opts) contents
+
+-- | Convert list of Pandoc block elements to RST.
+blockListToRST :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToRST opts blocks =
+ mapM (blockToRST opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to RST.
+inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat
+
+-- | Convert Pandoc inline element to RST.
+inlineToRST :: WriterOptions -> Inline -> State WriterState Doc
+inlineToRST opts (Emph lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '*' <> contents <> char '*'
+inlineToRST opts (Strong lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "**" <> contents <> text "**"
+inlineToRST opts (Strikeout lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "[STRIKEOUT:" <> contents <> char ']'
+inlineToRST opts (Superscript lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "\\ :sup:`" <> contents <> text "`\\ "
+inlineToRST opts (Subscript lst) = do
+ contents <- inlineListToRST opts lst
+ return $ text "\\ :sub:`" <> contents <> text "`\\ "
+inlineToRST opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '\'' <> contents <> char '\''
+inlineToRST opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToRST opts lst
+ return $ char '"' <> contents <> char '"'
+inlineToRST opts EmDash = return $ text "--"
+inlineToRST opts EnDash = return $ char '-'
+inlineToRST opts Apostrophe = return $ char '\''
+inlineToRST opts Ellipses = return $ text "..."
+inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``"
+inlineToRST opts (Str str) = return $ text $ escapeString str
+inlineToRST opts (TeX str) = return $ text str
+inlineToRST opts (HtmlInline str) = return empty
+inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks
+inlineToRST opts Space = return $ char ' '
+inlineToRST opts (Link [Code str] (src, tit)) | src == str ||
+ src == "mailto:" ++ str = do
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ return $ text srcSuffix
+inlineToRST opts (Link txt (src, tit)) = do
+ let useReferenceLinks = writerReferenceLinks opts
+ linktext <- inlineListToRST opts $ normalizeSpaces txt
+ if useReferenceLinks
+ then do (notes, refs, pics) <- get
+ let refs' = if (txt, (src, tit)) `elem` refs
+ then refs
+ else (txt, (src, tit)):refs
+ put (notes, refs', pics)
+ return $ char '`' <> linktext <> text "`_"
+ else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_"
+inlineToRST opts (Image alternate (source, tit)) = do
+ (notes, refs, pics) <- get
+ let labelsUsed = map fst pics
+ let txt = if null alternate || alternate == [Str ""] ||
+ alternate `elem` labelsUsed
+ then [Str $ "image" ++ show (length refs)]
+ else alternate
+ let pics' = if (txt, (source, tit)) `elem` pics
+ then pics
+ else (txt, (source, tit)):pics
+ put (notes, refs, pics')
+ label <- inlineListToRST opts txt
+ return $ char '|' <> label <> char '|'
+inlineToRST opts (Note contents) = do
+ -- add to notes in state
+ modify (\(notes, refs, pics) -> (contents:notes, refs, pics))
+ (notes, _, _) <- get
+ let ref = show $ (length notes)
+ return $ text " [" <> text ref <> text "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
new file mode 100644
index 000000000..3bd5c63b2
--- /dev/null
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -0,0 +1,286 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.RTF
+ Copyright : Copyright (C) 2006-7 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to RTF (rich text format).
+-}
+module Text.Pandoc.Writers.RTF ( writeRTF ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Regex ( matchRegexAll, mkRegex )
+import Data.List ( isSuffixOf )
+import Data.Char ( ord )
+
+-- | Convert Pandoc to a string in rich text format.
+writeRTF :: WriterOptions -> Pandoc -> String
+writeRTF options (Pandoc meta blocks) =
+ let head = if writerStandalone options
+ then rtfHeader (writerHeader options) meta
+ else ""
+ toc = if writerTableOfContents options
+ then tableOfContents $ filter isHeaderBlock blocks
+ else ""
+ foot = if writerStandalone options then "\n}\n" else ""
+ body = writerIncludeBefore options ++
+ concatMap (blockToRTF 0 AlignDefault) blocks ++
+ writerIncludeAfter options
+ in head ++ toc ++ body ++ foot
+
+-- | Construct table of contents from list of header blocks.
+tableOfContents :: [Block] -> String
+tableOfContents headers =
+ let contentsTree = hierarchicalize headers
+ in concatMap (blockToRTF 0 AlignDefault) $
+ [Header 1 [Str "Contents"],
+ BulletList (map elementToListItem contentsTree)]
+
+elementToListItem :: Element -> [Block]
+elementToListItem (Blk _) = []
+elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
+ if null subsecs
+ then []
+ else [BulletList (map elementToListItem subsecs)]
+
+-- | Convert unicode characters (> 127) into rich text format representation.
+handleUnicode :: String -> String
+handleUnicode [] = []
+handleUnicode (c:cs) =
+ if ord c > 127
+ then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs
+ else c:(handleUnicode cs)
+
+-- | Escape special characters.
+escapeSpecial :: String -> String
+escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
+
+-- | Escape strings as needed for rich text format.
+stringToRTF :: String -> String
+stringToRTF = handleUnicode . escapeSpecial
+
+-- | Escape things as needed for code block in RTF.
+codeStringToRTF :: String -> String
+codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str)
+
+-- | Deal with raw LaTeX.
+latexToRTF :: String -> String
+latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } "
+
+-- | Make a paragraph with first-line indent, block indent, and space after.
+rtfParSpaced :: Int -- ^ space after (in twips)
+ -> Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfParSpaced spaceAfter indent firstLineIndent alignment content =
+ let alignString = case alignment of
+ AlignLeft -> "\\ql "
+ AlignRight -> "\\qr "
+ AlignCenter -> "\\qc "
+ AlignDefault -> "\\ql "
+ in "{\\pard " ++ alignString ++
+ "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++
+ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n"
+
+-- | Default paragraph.
+rtfPar :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfPar = rtfParSpaced 180
+
+-- | Compact paragraph (e.g. for compact list items).
+rtfCompact :: Int -- ^ block indent (in twips)
+ -> Int -- ^ first line indent (relative to block) (in twips)
+ -> Alignment -- ^ alignment
+ -> String -- ^ string with content
+ -> String
+rtfCompact = rtfParSpaced 0
+
+-- number of twips to indent
+indentIncrement = 720
+listIncrement = 360
+
+-- | Returns appropriate bullet list marker for indent level.
+bulletMarker :: Int -> String
+bulletMarker indent = case indent `mod` 720 of
+ 0 -> "\\bullet "
+ otherwise -> "\\endash "
+
+-- | Returns appropriate (list of) ordered list markers for indent level.
+orderedMarkers :: Int -> ListAttributes -> [String]
+orderedMarkers indent (start, style, delim) =
+ if style == DefaultStyle && delim == DefaultDelim
+ then case indent `mod` 720 of
+ 0 -> orderedListMarkers (start, Decimal, Period)
+ otherwise -> orderedListMarkers (start, LowerAlpha, Period)
+ else orderedListMarkers (start, style, delim)
+
+-- | Returns RTF header.
+rtfHeader :: String -- ^ header text
+ -> Meta -- ^ bibliographic information
+ -> String
+rtfHeader headerText (Meta title authors date) =
+ let titletext = if null title
+ then ""
+ else rtfPar 0 0 AlignCenter $
+ "\\b \\fs36 " ++ inlineListToRTF title
+ authorstext = if null authors
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $
+ map stringToRTF authors))
+ datetext = if date == ""
+ then ""
+ else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in
+ let spacer = if null (titletext ++ authorstext ++ datetext)
+ then ""
+ else rtfPar 0 0 AlignDefault "" in
+ headerText ++ titletext ++ authorstext ++ datetext ++ spacer
+
+-- | Convert Pandoc block element to RTF.
+blockToRTF :: Int -- ^ indent level
+ -> Alignment -- ^ alignment
+ -> Block -- ^ block to convert
+ -> String
+blockToRTF _ _ Null = ""
+blockToRTF indent alignment (Plain lst) =
+ rtfCompact indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (Para lst) =
+ rtfPar indent 0 alignment $ inlineListToRTF lst
+blockToRTF indent alignment (BlockQuote lst) =
+ concatMap (blockToRTF (indent + indentIncrement) alignment) lst
+blockToRTF indent _ (CodeBlock str) =
+ rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
+blockToRTF _ _ (RawHtml str) = ""
+blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
+ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
+blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
+ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst
+blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $
+ concatMap (definitionListItemToRTF alignment indent) lst
+blockToRTF indent _ HorizontalRule =
+ rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash"
+blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $
+ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst
+blockToRTF indent alignment (Table caption aligns sizes headers rows) =
+ tableRowToRTF True indent aligns sizes headers ++
+ concatMap (tableRowToRTF False indent aligns sizes) rows ++
+ rtfPar indent 0 alignment (inlineListToRTF caption)
+
+tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String
+tableRowToRTF header indent aligns sizes cols =
+ let columns = concat $ zipWith (tableItemToRTF indent) aligns cols
+ totalTwips = 6 * 1440 -- 6 inches
+ rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips))
+ 0 sizes
+ cellDefs = map (\edge -> (if header
+ then "\\clbrdrb\\brdrs"
+ else "") ++ "\\cellx" ++ show edge)
+ rightEdges
+ start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++
+ "\\trkeep\\intbl\n{\n"
+ end = "}\n\\intbl\\row}\n"
+ in start ++ columns ++ end
+
+tableItemToRTF :: Int -> Alignment -> [Block] -> String
+tableItemToRTF indent alignment item =
+ let contents = concatMap (blockToRTF indent alignment) item
+ in "{\\intbl " ++ contents ++ "\\cell}\n"
+
+-- | Ensure that there's the same amount of space after compact
+-- lists as after regular lists.
+spaceAtEnd :: String -> String
+spaceAtEnd str =
+ if isSuffixOf "\\par}\n" str
+ then (take ((length str) - 6) str) ++ "\\sa180\\par}\n"
+ else str
+
+-- | Convert list item (list of blocks) to RTF.
+listItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> String -- ^ list start marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> [Char]
+listItemToRTF alignment indent marker [] =
+ rtfCompact (indent + listIncrement) (0 - listIncrement) alignment
+ (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ")
+listItemToRTF alignment indent marker list =
+ let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in
+ -- insert the list marker into the (processed) first block
+ let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of
+ Just (before, matched, after, _) ->
+ before ++ "\\fi" ++ show (0 - listIncrement) ++
+ " " ++ marker ++ "\\tx" ++
+ show listIncrement ++ "\\tab" ++ after
+ Nothing -> first in
+ modFirst ++ concat rest
+
+-- | Convert definition list item (label, list of blocks) to RTF.
+definitionListItemToRTF :: Alignment -- ^ alignment
+ -> Int -- ^ indent level
+ -> ([Inline],[Block]) -- ^ list item (list of blocks)
+ -> [Char]
+definitionListItemToRTF alignment indent (label, items) =
+ let labelText = blockToRTF indent alignment (Plain label)
+ itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items
+ in labelText ++ itemsText
+
+-- | Convert list of inline items to RTF.
+inlineListToRTF :: [Inline] -- ^ list of inlines to convert
+ -> String
+inlineListToRTF lst = concatMap inlineToRTF lst
+
+-- | Convert inline item to RTF.
+inlineToRTF :: Inline -- ^ inline to convert
+ -> String
+inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} "
+inlineToRTF (Quoted SingleQuote lst) =
+ "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
+inlineToRTF (Quoted DoubleQuote lst) =
+ "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
+inlineToRTF Apostrophe = "\\u8217'"
+inlineToRTF Ellipses = "\\u8230?"
+inlineToRTF EmDash = "\\u8212-"
+inlineToRTF EnDash = "\\u8211-"
+inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} "
+inlineToRTF (Str str) = stringToRTF str
+inlineToRTF (TeX str) = latexToRTF str
+inlineToRTF (HtmlInline str) = ""
+inlineToRTF (LineBreak) = "\\line "
+inlineToRTF Space = " "
+inlineToRTF (Link text (src, tit)) =
+ "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++
+ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n"
+inlineToRTF (Image alternate (source, tit)) =
+ "{\\cf1 [image: " ++ source ++ "]\\cf0}"
+inlineToRTF (Note contents) =
+ "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++
+ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}"