diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/Blocks.hs | 145 | ||||
-rw-r--r-- | src/Text/Pandoc/CharacterReferences.hs | 327 | ||||
-rw-r--r-- | src/Text/Pandoc/Definition.hs | 116 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 496 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 651 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 909 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 640 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 792 | ||||
-rw-r--r-- | src/Text/Pandoc/UTF8.hs | 45 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 248 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 299 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 458 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 310 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 293 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 373 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 325 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 286 |
17 files changed, 0 insertions, 6713 deletions
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs deleted file mode 100644 index cfc22cb3e..000000000 --- a/src/Text/Pandoc/Blocks.hs +++ /dev/null @@ -1,145 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Blocks - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for the manipulation of fixed-width blocks of text. -These are used in the construction of plain-text tables. --} - -module Text.Pandoc.Blocks - ( - TextBlock (..), - docToBlock, - blockToDoc, - widthOfBlock, - heightOfBlock, - hcatBlocks, - hsepBlocks, - centerAlignBlock, - leftAlignBlock, - rightAlignBlock - ) -where -import Text.PrettyPrint -import Data.List ( intersperse ) - --- | A fixed-width block of text. Parameters are width of block, --- height of block, and list of lines. -data TextBlock = TextBlock Int Int [String] -instance Show TextBlock where - show x = show $ blockToDoc x - --- | Break lines in a list of lines so that none are greater than --- a given width. -breakLines :: Int -- ^ Maximum length of lines. - -> [String] -- ^ List of lines. - -> [String] -breakLines width [] = [] -breakLines width (l:ls) = - if length l > width - then (take width l):(breakLines width ((drop width l):ls)) - else l:(breakLines width ls) - --- | Convert a @Doc@ element into a @TextBlock@ with a specified width. -docToBlock :: Int -- ^ Width of text block. - -> Doc -- ^ @Doc@ to convert. - -> TextBlock -docToBlock width doc = - let rendered = renderStyle (style {lineLength = width, - ribbonsPerLine = 1}) doc - lns = breakLines width $ lines rendered - in TextBlock width (length lns) lns - --- | Convert a @TextBlock@ to a @Doc@ element. -blockToDoc :: TextBlock -> Doc -blockToDoc (TextBlock _ _ lns) = - if null lns - then empty - else vcat $ map text lns - --- | Returns width of a @TextBlock@ (number of columns). -widthOfBlock :: TextBlock -> Int -widthOfBlock (TextBlock width _ _) = width - --- | Returns height of a @TextBlock@ (number of rows). -heightOfBlock :: TextBlock -> Int -heightOfBlock (TextBlock _ height _) = height - --- | Pads a string out to a given width using spaces. -hPad :: Int -- ^ Desired width. - -> String -- ^ String to pad. - -> String -hPad width line = - let lineLength = length line - in if lineLength <= width - then line ++ replicate (width - lineLength) ' ' - else take width line - --- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in --- which they appear side by side. -hcatBlocks :: [TextBlock] -> TextBlock -hcatBlocks [] = TextBlock 0 0 [] -hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd. -hcatBlocks ((TextBlock width1 height1 lns1):xs) = - let (TextBlock width2 height2 lns2) = hcatBlocks xs - height = max height1 height2 - width = width1 + width2 - lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) "" - lns2' = lns2 ++ replicate (height - height2) "" - lns = zipWith (++) lns1' lns2' - in TextBlock width height lns - --- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. -hsepBlocks :: [TextBlock] -> TextBlock -hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) - -isWhitespace x = x `elem` " \t" - --- | Left-aligns the contents of a @TextBlock@ within the block. -leftAlignBlock :: TextBlock -> TextBlock -leftAlignBlock (TextBlock width height lns) = - TextBlock width height $ map (dropWhile isWhitespace) lns - --- | Right-aligns the contents of a @TextBlock@ within the block. -rightAlignBlock :: TextBlock -> TextBlock -rightAlignBlock (TextBlock width height lns) = - let rightAlignLine ln = - let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln - in reverse (rest ++ spaces) - in TextBlock width height $ map rightAlignLine lns - --- | Centers the contents of a @TextBlock@ within the block. -centerAlignBlock :: TextBlock -> TextBlock -centerAlignBlock (TextBlock width height lns) = - let centerAlignLine ln = - let ln' = hPad width ln - (startSpaces, rest) = span isWhitespace ln' - endSpaces = takeWhile isWhitespace (reverse ln') - numSpaces = length (startSpaces ++ endSpaces) - startSpaces' = replicate (quot numSpaces 2) ' ' - in startSpaces' ++ rest - in TextBlock width height $ map centerAlignLine lns - diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs deleted file mode 100644 index 466f5d8f4..000000000 --- a/src/Text/Pandoc/CharacterReferences.hs +++ /dev/null @@ -1,327 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.CharacterReferences - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for parsing character references. --} -module Text.Pandoc.CharacterReferences ( - characterReference, - decodeCharacterReferences, - ) where -import Data.Char ( chr ) -import Text.ParserCombinators.Parsec -import qualified Data.Map as Map - --- | Parse character entity. -characterReference :: GenParser Char st Char -characterReference = try $ do - st <- char '&' - character <- numRef <|> entity - end <- char ';' - return character - -numRef :: GenParser Char st Char -numRef = do - char '#' - num <- hexNum <|> decNum - return $ chr $ num - -hexNum :: GenParser Char st Int -hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++) - -decNum :: GenParser Char st Int -decNum = many1 digit >>= return . read - -entity :: GenParser Char st Char -entity = do - body <- many1 alphaNum - return $ Map.findWithDefault '?' body entityTable - --- | Convert entities in a string to characters. -decodeCharacterReferences :: String -> String -decodeCharacterReferences str = - case parse (many (characterReference <|> anyChar)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> result - -entityTable :: Map.Map String Char -entityTable = Map.fromList entityTableList - -entityTableList :: [(String, Char)] -entityTableList = [ - ("quot", chr 34), - ("amp", chr 38), - ("lt", chr 60), - ("gt", chr 62), - ("nbsp", chr 160), - ("iexcl", chr 161), - ("cent", chr 162), - ("pound", chr 163), - ("curren", chr 164), - ("yen", chr 165), - ("brvbar", chr 166), - ("sect", chr 167), - ("uml", chr 168), - ("copy", chr 169), - ("ordf", chr 170), - ("laquo", chr 171), - ("not", chr 172), - ("shy", chr 173), - ("reg", chr 174), - ("macr", chr 175), - ("deg", chr 176), - ("plusmn", chr 177), - ("sup2", chr 178), - ("sup3", chr 179), - ("acute", chr 180), - ("micro", chr 181), - ("para", chr 182), - ("middot", chr 183), - ("cedil", chr 184), - ("sup1", chr 185), - ("ordm", chr 186), - ("raquo", chr 187), - ("frac14", chr 188), - ("frac12", chr 189), - ("frac34", chr 190), - ("iquest", chr 191), - ("Agrave", chr 192), - ("Aacute", chr 193), - ("Acirc", chr 194), - ("Atilde", chr 195), - ("Auml", chr 196), - ("Aring", chr 197), - ("AElig", chr 198), - ("Ccedil", chr 199), - ("Egrave", chr 200), - ("Eacute", chr 201), - ("Ecirc", chr 202), - ("Euml", chr 203), - ("Igrave", chr 204), - ("Iacute", chr 205), - ("Icirc", chr 206), - ("Iuml", chr 207), - ("ETH", chr 208), - ("Ntilde", chr 209), - ("Ograve", chr 210), - ("Oacute", chr 211), - ("Ocirc", chr 212), - ("Otilde", chr 213), - ("Ouml", chr 214), - ("times", chr 215), - ("Oslash", chr 216), - ("Ugrave", chr 217), - ("Uacute", chr 218), - ("Ucirc", chr 219), - ("Uuml", chr 220), - ("Yacute", chr 221), - ("THORN", chr 222), - ("szlig", chr 223), - ("agrave", chr 224), - ("aacute", chr 225), - ("acirc", chr 226), - ("atilde", chr 227), - ("auml", chr 228), - ("aring", chr 229), - ("aelig", chr 230), - ("ccedil", chr 231), - ("egrave", chr 232), - ("eacute", chr 233), - ("ecirc", chr 234), - ("euml", chr 235), - ("igrave", chr 236), - ("iacute", chr 237), - ("icirc", chr 238), - ("iuml", chr 239), - ("eth", chr 240), - ("ntilde", chr 241), - ("ograve", chr 242), - ("oacute", chr 243), - ("ocirc", chr 244), - ("otilde", chr 245), - ("ouml", chr 246), - ("divide", chr 247), - ("oslash", chr 248), - ("ugrave", chr 249), - ("uacute", chr 250), - ("ucirc", chr 251), - ("uuml", chr 252), - ("yacute", chr 253), - ("thorn", chr 254), - ("yuml", chr 255), - ("OElig", chr 338), - ("oelig", chr 339), - ("Scaron", chr 352), - ("scaron", chr 353), - ("Yuml", chr 376), - ("fnof", chr 402), - ("circ", chr 710), - ("tilde", chr 732), - ("Alpha", chr 913), - ("Beta", chr 914), - ("Gamma", chr 915), - ("Delta", chr 916), - ("Epsilon", chr 917), - ("Zeta", chr 918), - ("Eta", chr 919), - ("Theta", chr 920), - ("Iota", chr 921), - ("Kappa", chr 922), - ("Lambda", chr 923), - ("Mu", chr 924), - ("Nu", chr 925), - ("Xi", chr 926), - ("Omicron", chr 927), - ("Pi", chr 928), - ("Rho", chr 929), - ("Sigma", chr 931), - ("Tau", chr 932), - ("Upsilon", chr 933), - ("Phi", chr 934), - ("Chi", chr 935), - ("Psi", chr 936), - ("Omega", chr 937), - ("alpha", chr 945), - ("beta", chr 946), - ("gamma", chr 947), - ("delta", chr 948), - ("epsilon", chr 949), - ("zeta", chr 950), - ("eta", chr 951), - ("theta", chr 952), - ("iota", chr 953), - ("kappa", chr 954), - ("lambda", chr 955), - ("mu", chr 956), - ("nu", chr 957), - ("xi", chr 958), - ("omicron", chr 959), - ("pi", chr 960), - ("rho", chr 961), - ("sigmaf", chr 962), - ("sigma", chr 963), - ("tau", chr 964), - ("upsilon", chr 965), - ("phi", chr 966), - ("chi", chr 967), - ("psi", chr 968), - ("omega", chr 969), - ("thetasym", chr 977), - ("upsih", chr 978), - ("piv", chr 982), - ("ensp", chr 8194), - ("emsp", chr 8195), - ("thinsp", chr 8201), - ("zwnj", chr 8204), - ("zwj", chr 8205), - ("lrm", chr 8206), - ("rlm", chr 8207), - ("ndash", chr 8211), - ("mdash", chr 8212), - ("lsquo", chr 8216), - ("rsquo", chr 8217), - ("sbquo", chr 8218), - ("ldquo", chr 8220), - ("rdquo", chr 8221), - ("bdquo", chr 8222), - ("dagger", chr 8224), - ("Dagger", chr 8225), - ("bull", chr 8226), - ("hellip", chr 8230), - ("permil", chr 8240), - ("prime", chr 8242), - ("Prime", chr 8243), - ("lsaquo", chr 8249), - ("rsaquo", chr 8250), - ("oline", chr 8254), - ("frasl", chr 8260), - ("euro", chr 8364), - ("image", chr 8465), - ("weierp", chr 8472), - ("real", chr 8476), - ("trade", chr 8482), - ("alefsym", chr 8501), - ("larr", chr 8592), - ("uarr", chr 8593), - ("rarr", chr 8594), - ("darr", chr 8595), - ("harr", chr 8596), - ("crarr", chr 8629), - ("lArr", chr 8656), - ("uArr", chr 8657), - ("rArr", chr 8658), - ("dArr", chr 8659), - ("hArr", chr 8660), - ("forall", chr 8704), - ("part", chr 8706), - ("exist", chr 8707), - ("empty", chr 8709), - ("nabla", chr 8711), - ("isin", chr 8712), - ("notin", chr 8713), - ("ni", chr 8715), - ("prod", chr 8719), - ("sum", chr 8721), - ("minus", chr 8722), - ("lowast", chr 8727), - ("radic", chr 8730), - ("prop", chr 8733), - ("infin", chr 8734), - ("ang", chr 8736), - ("and", chr 8743), - ("or", chr 8744), - ("cap", chr 8745), - ("cup", chr 8746), - ("int", chr 8747), - ("there4", chr 8756), - ("sim", chr 8764), - ("cong", chr 8773), - ("asymp", chr 8776), - ("ne", chr 8800), - ("equiv", chr 8801), - ("le", chr 8804), - ("ge", chr 8805), - ("sub", chr 8834), - ("sup", chr 8835), - ("nsub", chr 8836), - ("sube", chr 8838), - ("supe", chr 8839), - ("oplus", chr 8853), - ("otimes", chr 8855), - ("perp", chr 8869), - ("sdot", chr 8901), - ("lceil", chr 8968), - ("rceil", chr 8969), - ("lfloor", chr 8970), - ("rfloor", chr 8971), - ("lang", chr 9001), - ("rang", chr 9002), - ("loz", chr 9674), - ("spades", chr 9824), - ("clubs", chr 9827), - ("hearts", chr 9829), - ("diams", chr 9830) - ] diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs deleted file mode 100644 index 7d1125c5a..000000000 --- a/src/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,116 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show) - --- | Bibliographic information for the document: title (list of 'Inline'), --- authors (list of strings), date (string). -data Meta = Meta [Inline] -- title - [String] -- authors - String -- date - deriving (Eq, Show, Read) - --- | Alignment of a table column. -data Alignment = AlignLeft - | AlignRight - | AlignCenter - | AlignDefault deriving (Eq, Show, Read) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle - | Decimal - | LowerRoman - | UpperRoman - | LowerAlpha - | UpperAlpha deriving (Eq, Show, Read) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim - | Period - | OneParen - | TwoParens deriving (Eq, Show, Read) - --- | Block element. -data Block - = Plain [Inline] -- ^ Plain text, not a paragraph - | Para [Inline] -- ^ Paragraph - | CodeBlock String -- ^ Code block (literal) - | RawHtml String -- ^ Raw HTML block (literal) - | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes - -- and a list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each - -- a list of blocks) - | DefinitionList [([Inline],[Block])] -- ^ Definition list - -- (list of items, each a pair of an inline list, - -- the term, and a block list) - | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) - | HorizontalRule -- ^ Horizontal rule - | Table [Inline] [Alignment] [Float] [[Block]] [[[Block]]] -- ^ Table, - -- with caption, column alignments, - -- relative column widths, column headers - -- (each a list of blocks), and rows - -- (each a list of lists of blocks) - | Null -- ^ Nothing - deriving (Eq, Read, Show) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read) - -type Target = (String, String) -- ^ Link target (URL, title) - --- | Inline elements. -data Inline - = Str String -- ^ Text (string) - | Emph [Inline] -- ^ Emphasized text (list of inlines) - | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) - | Strikeout [Inline] -- ^ Strikeout text (list of inlines) - | Superscript [Inline] -- ^ Superscripted text (list of inlines) - | Subscript [Inline] -- ^ Subscripted text (list of inlines) - | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Code String -- ^ Inline code (literal) - | Space -- ^ Inter-word space - | EmDash -- ^ Em dash - | EnDash -- ^ En dash - | Apostrophe -- ^ Apostrophe - | Ellipses -- ^ Ellipses - | LineBreak -- ^ Hard line break - | TeX String -- ^ LaTeX code (literal) - | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target - | Image [Inline] Target -- ^ Image: alt text (list of inlines), target - -- and target - | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Read) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs deleted file mode 100644 index 70a071152..000000000 --- a/src/Text/Pandoc/Readers/HTML.hs +++ /dev/null @@ -1,496 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of HTML to 'Pandoc' document. --} -module Text.Pandoc.Readers.HTML ( - readHtml, - rawHtmlInline, - rawHtmlBlock, - anyHtmlBlockTag, - anyHtmlInlineTag, - anyHtmlTag, - anyHtmlEndTag, - htmlEndTag, - extractTagType, - htmlBlockElement - ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.CharacterReferences ( characterReference, - decodeCharacterReferences ) -import Data.Maybe ( fromMaybe ) -import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf ) -import Data.Char ( toUpper, toLower, isAlphaNum ) - --- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState -- ^ Parser state - -> String -- ^ String to parse - -> Pandoc -readHtml = readWith parseHtml - --- --- Constants --- - -eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", - "map", "area", "object", "script"] - -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] ++ eitherBlockOrInline - -blockHtmlTags = ["address", "blockquote", "center", "dir", "div", - "dl", "fieldset", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "hr", "isindex", "menu", "noframes", - "noscript", "ol", "p", "pre", "table", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr"] ++ eitherBlockOrInline - --- --- HTML utility functions --- - --- | Read blocks until end tag. -blocksTilEnd tag = do - blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) - return $ filter (/= Null) blocks - --- | Read inlines until end tag. -inlinesTilEnd tag = manyTill inline (htmlEndTag tag) - --- | Parse blocks between open and close tag. -blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag - --- | Parse inlines between open and close tag. -inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag - --- | Extract type from a tag: e.g. @br@ from @\<br\>@ -extractTagType :: String -> String -extractTagType ('<':rest) = - let isSpaceOrSlash c = c `elem` "/ \n\t" in - map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest -extractTagType _ = "" - --- | Parse any HTML tag (opening or self-closing) and return text of tag -anyHtmlTag = try $ do - char '<' - spaces - tag <- many1 alphaNum - attribs <- many htmlAttribute - spaces - ender <- option "" (string "/") - let ender' = if null ender then "" else " /" - spaces - char '>' - return $ "<" ++ tag ++ - concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" - -anyHtmlEndTag = try $ do - char '<' - spaces - char '/' - spaces - tagType <- many1 alphaNum - spaces - char '>' - return $ "</" ++ tagType ++ ">" - -htmlTag :: String -> GenParser Char st (String, [(String, String)]) -htmlTag tag = try $ do - char '<' - spaces - stringAnyCase tag - attribs <- many htmlAttribute - spaces - optional (string "/") - spaces - char '>' - return (tag, (map (\(name, content, raw) -> (name, content)) attribs)) - --- parses a quoted html attribute value -quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) - (many (noneOf [quoteChar])) - return (result, [quoteChar]) - -htmlAttribute = htmlRegularAttribute <|> htmlMinimizedAttribute - --- minimized boolean attribute -htmlMinimizedAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - return (name, name, name) - -htmlRegularAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - spaces - char '=' - spaces - (content, quoteStr) <- choice [ (quoted '\''), - (quoted '"'), - (do - a <- many (alphaNum <|> (oneOf "-._:")) - return (a,"")) ] - return (name, content, - (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) - --- | Parse an end tag of type 'tag' -htmlEndTag tag = try $ do - char '<' - spaces - char '/' - spaces - stringAnyCase tag - spaces - char '>' - return $ "</" ++ tag ++ ">" - --- | Returns @True@ if the tag is (or can be) an inline tag. -isInline tag = (extractTagType tag) `elem` inlineHtmlTags - --- | Returns @True@ if the tag is (or can be) a block tag. -isBlock tag = (extractTagType tag) `elem` blockHtmlTags - -anyHtmlBlockTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if isBlock tag then return tag else fail "inline tag" - -anyHtmlInlineTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if isInline tag then return tag else fail "not an inline tag" - --- | Parses material between script tags. --- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript = try $ do - open <- string "<script" - rest <- manyTill anyChar (htmlEndTag "script") - return $ open ++ rest ++ "</script>" - -htmlBlockElement = choice [ htmlScript, htmlComment, xmlDec, definition ] - -rawHtmlBlock = try $ do - notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") - body <- htmlBlockElement <|> anyHtmlTag <|> anyHtmlEndTag - sp <- many space - state <- getState - if stateParseRaw state then return (RawHtml (body ++ sp)) else return Null - --- | Parses an HTML comment. -htmlComment = try $ do - string "<!--" - comment <- manyTill anyChar (try (string "-->")) - return $ "<!--" ++ comment ++ "-->" - --- --- parsing documents --- - -xmlDec = try $ do - string "<?" - rest <- manyTill anyChar (char '>') - return $ "<?" ++ rest ++ ">" - -definition = try $ do - string "<!" - rest <- manyTill anyChar (char '>') - return $ "<!" ++ rest ++ ">" - -nonTitleNonHead = try $ notFollowedBy' (htmlTag "title" <|> htmlTag "/head") >> - ((rawHtmlBlock >> return ' ') <|> anyChar) - -parseTitle = try $ do - (tag, _) <- htmlTag "title" - contents <- inlinesTilEnd tag - spaces - return contents - --- parse header and return meta-information (for now, just title) -parseHead = try $ do - htmlTag "head" - spaces - skipMany nonTitleNonHead - contents <- option [] parseTitle - skipMany nonTitleNonHead - htmlTag "/head" - return (contents, [], "") - -skipHtmlTag tag = optional (htmlTag tag) - --- h1 class="title" representation of title in body -bodyTitle = try $ do - (tag, attribs) <- htmlTag "h1" - cl <- case (extractAttribute "class" attribs) of - Just "title" -> return "" - otherwise -> fail "not title" - inlinesTilEnd "h1" - -parseHtml = do - sepEndBy (choice [xmlDec, definition, htmlComment]) spaces - skipHtmlTag "html" - spaces - (title, authors, date) <- option ([], [], "") parseHead - spaces - skipHtmlTag "body" - spaces - optional bodyTitle -- skip title in body, because it's represented in meta - blocks <- parseBlocks - spaces - optional (htmlEndTag "body") - spaces - optional (htmlEndTag "html" >> many anyChar) -- ignore anything after </html> - eof - return $ Pandoc (Meta title authors date) blocks - --- --- parsing blocks --- - -parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) - -block = choice [ codeBlock - , header - , hrule - , list - , blockQuote - , para - , plain - , rawHtmlBlock ] <?> "block" - --- --- header blocks --- - -header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" - -headerLevel n = try $ do - let level = "h" ++ show n - (tag, attribs) <- htmlTag level - contents <- inlinesTilEnd level - return $ Header n (normalizeSpaces contents) - --- --- hrule block --- - -hrule = try $ do - (tag, attribs) <- htmlTag "hr" - state <- getState - if not (null attribs) && stateParseRaw state - then unexpected "attributes in hr" -- parse as raw in this case - else return HorizontalRule - --- --- code blocks --- - --- Note: HTML tags in code blocks (e.g. for syntax highlighting) are --- skipped, because they are not portable to output formats other than HTML. -codeBlock = try $ do - htmlTag "pre" - result <- manyTill - (many1 (satisfy (/= '<')) <|> - ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) - (htmlEndTag "pre") - let result' = concat result - -- drop leading newline if any - let result'' = if "\n" `isPrefixOf` result' - then drop 1 result' - else result' - -- drop trailing newline if any - let result''' = if "\n" `isSuffixOf` result'' - then init result'' - else result'' - return $ CodeBlock $ decodeCharacterReferences result''' - --- --- block quotes --- - -blockQuote = try $ htmlTag "blockquote" >> spaces >> - blocksTilEnd "blockquote" >>= (return . BlockQuote) - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -orderedList = try $ do - (_, attribs) <- htmlTag "ol" - (start, style) <- option (1, DefaultStyle) $ - do failIfStrict - let sta = fromMaybe "1" $ - lookup "start" attribs - let sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - let sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle - return (read sta, sty') - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ol" - return $ OrderedList (start, style, DefaultDelim) items - -bulletList = try $ do - htmlTag "ul" - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ul" - return $ BulletList items - -definitionList = try $ do - failIfStrict -- def lists not part of standard markdown - tag <- htmlTag "dl" - spaces - items <- sepEndBy1 definitionListItem spaces - htmlEndTag "dl" - return $ DefinitionList items - -definitionListItem = try $ do - terms <- sepEndBy1 (inlinesIn "dt") spaces - defs <- sepEndBy1 (blocksIn "dd") spaces - let term = joinWithSep [LineBreak] terms - return (term, concat defs) - --- --- paragraph block --- - -para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= - return . Para . normalizeSpaces - --- --- plain block --- - -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- inline --- - -inline = choice [ charRef - , strong - , emph - , superscript - , subscript - , strikeout - , spanStrikeout - , code - , str - , linebreak - , whitespace - , link - , image - , rawHtmlInline - ] <?> "inline" - -code = try $ do - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") - -- remove internal line breaks, leading and trailing space, - -- and decode character references - return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ - joinWithSep " " $ lines result - -rawHtmlInline = do - result <- htmlScript <|> htmlComment <|> anyHtmlInlineTag - state <- getState - if stateParseRaw state then return (HtmlInline result) else return (Str "") - -betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= - return . normalizeSpaces - -emph = (betweenTags "em" <|> betweenTags "it") >>= return . Emph - -strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong - -superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript - -subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript - -strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= - return . Strikeout - -spanStrikeout = try $ do - failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - (tag, attributes) <- htmlTag "span" - result <- case (extractAttribute "class" attributes) of - Just "strikeout" -> inlinesTilEnd "span" - _ -> fail "not a strikeout" - return $ Strikeout result - -whitespace = many1 space >> return Space - --- hard line break -linebreak = htmlTag "br" >> optional newline >> return LineBreak - -str = many1 (noneOf "<& \t\n") >>= return . Str - --- --- links and images --- - --- extract contents of attribute (attribute names are case-insensitive) -extractAttribute name [] = Nothing -extractAttribute name ((attrName, contents):rest) = - let name' = map toLower name - attrName' = map toLower attrName - in if attrName' == name' - then Just (decodeCharacterReferences contents) - else extractAttribute name rest - -link = try $ do - (tag, attributes) <- htmlTag "a" - url <- case (extractAttribute "href" attributes) of - Just url -> return url - Nothing -> fail "no href" - let title = fromMaybe "" $ extractAttribute "title" attributes - label <- inlinesTilEnd "a" - return $ Link (normalizeSpaces label) (url, title) - -image = try $ do - (tag, attributes) <- htmlTag "img" - url <- case (extractAttribute "src" attributes) of - Just url -> return url - Nothing -> fail "no src" - let title = fromMaybe "" $ extractAttribute "title" attributes - let alt = fromMaybe "" (extractAttribute "alt" attributes) - return $ Image [Str alt] (url, title) - diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs deleted file mode 100644 index 37cc2bfe4..000000000 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ /dev/null @@ -1,651 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of LaTeX to 'Pandoc' document. --} -module Text.Pandoc.Readers.LaTeX ( - readLaTeX, - rawLaTeXInline, - rawLaTeXEnvironment - ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Data.Maybe ( fromMaybe ) -import Data.Char ( chr ) -import Data.List ( isPrefixOf, isSuffixOf ) - --- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse - -> Pandoc -readLaTeX = readWith parseLaTeX - --- characters with special meaning -specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" - --- --- utility functions --- - --- | Returns text between brackets and its matching pair. -bracketedText openB closeB = do - result <- charsInBalanced' openB closeB - return $ [openB] ++ result ++ [closeB] - --- | Returns an option or argument of a LaTeX command. -optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' - --- | True if the string begins with '{'. -isArg ('{':rest) = True -isArg other = False - --- | Returns list of options and arguments of a LaTeX command. -commandArgs = many optOrArg - --- | Parses LaTeX command, returns (name, star, list of options or arguments). -command = do - char '\\' - name <- many1 letter - star <- option "" (string "*") -- some commands have starred versions - args <- commandArgs - return (name, star, args) - -begin name = try $ do - string $ "\\begin{" ++ name ++ "}" - optional commandArgs - spaces - return name - -end name = try $ do - string $ "\\end{" ++ name ++ "}" - spaces - return name - --- | Returns a list of block elements containing the contents of an --- environment. -environment name = try $ begin name >> spaces >> manyTill block (end name) - -anyEnvironment = try $ do - string "\\begin{" - name <- many letter - star <- option "" (string "*") -- some environments have starred variants - char '}' - optional commandArgs - spaces - contents <- manyTill block (end (name ++ star)) - return $ BlockQuote contents - --- --- parsing documents --- - --- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble = try $ manyTill - (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) >> - spaces - --- | Parse LaTeX and return 'Pandoc'. -parseLaTeX = do - optional processLaTeXPreamble -- preamble might not be present (fragment) - spaces - blocks <- parseBlocks - spaces - optional $ try (string "\\end{document}" >> many anyChar) - -- might not be present (fragment) - spaces - eof - state <- getState - let blocks' = filter (/= Null) blocks - let title' = stateTitle state - let authors' = stateAuthors state - let date' = stateDate state - return $ Pandoc (Meta title' authors' date') blocks' - --- --- parsing blocks --- - -parseBlocks = spaces >> many block - -block = choice [ hrule - , codeBlock - , header - , list - , blockQuote - , mathBlock - , comment - , bibliographic - , para - , specialEnvironment - , itemBlock - , unknownEnvironment - , unknownCommand ] <?> "block" - --- --- header blocks --- - -header = try $ do - char '\\' - subs <- many (try (string "sub")) - string "section" - optional (char '*') - char '{' - title <- manyTill inline (char '}') - spaces - return $ Header (length subs + 1) (normalizeSpaces title) - --- --- hrule block --- - -hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", - "\\newpage" ] >> spaces >> return HorizontalRule - --- --- code blocks --- - -codeBlock = codeBlock1 <|> codeBlock2 - -codeBlock1 = try $ do - string "\\begin{verbatim}" -- don't use begin function because it - -- gobbles whitespace - optional blanklines -- we want to gobble blank lines, but not - -- leading space - contents <- manyTill anyChar (try (string "\\end{verbatim}")) - spaces - return $ CodeBlock (stripTrailingNewlines contents) - -codeBlock2 = try $ do - string "\\begin{Verbatim}" -- used by fancyvrb package - option "" blanklines - contents <- manyTill anyChar (try (string "\\end{Verbatim}")) - spaces - return $ CodeBlock (stripTrailingNewlines contents) - --- --- block quotes --- - -blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= - return . BlockQuote - --- --- math block --- - -mathBlock = mathBlockWith (begin "equation") (end "equation") <|> - mathBlockWith (begin "displaymath") (end "displaymath") <|> - mathBlockWith (string "\\[") (string "\\]") <?> "math block" - -mathBlockWith start end = try $ do - start - spaces - result <- manyTill anyChar end - spaces - return $ BlockQuote [Para [TeX ("$" ++ result ++ "$")]] - --- --- list blocks --- - -list = bulletList <|> orderedList <|> definitionList <?> "list" - -listItem = try $ do - ("item", _, args) <- command - spaces - state <- getState - let oldParserContext = stateParserContext state - updateState (\state -> state {stateParserContext = ListItemState}) - blocks <- many block - updateState (\state -> state {stateParserContext = oldParserContext}) - opt <- case args of - ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> - parseFromString (many inline) $ tail $ init x - _ -> return [] - return (opt, blocks) - -orderedList = try $ do - string "\\begin{enumerate}" - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ do failIfStrict - char '[' - res <- anyOrderedListMarker - char ']' - return res - spaces - option "" $ try $ do string "\\setlength{\\itemindent}" - char '{' - manyTill anyChar (char '}') - spaces - start <- option 1 $ try $ do failIfStrict - string "\\setcounter{enum" - many1 (oneOf "iv") - string "}{" - num <- many1 digit - char '}' - spaces - return $ (read num) + 1 - items <- many listItem - end "enumerate" - spaces - return $ OrderedList (start, style, delim) $ map snd items - -bulletList = try $ do - begin "itemize" - spaces - items <- many listItem - end "itemize" - spaces - return (BulletList $ map snd items) - -definitionList = try $ do - begin "description" - spaces - items <- many listItem - end "description" - spaces - return (DefinitionList items) - --- --- paragraph block --- - -para = many1 inline >>~ spaces >>= return . Para . normalizeSpaces - --- --- title authors date --- - -bibliographic = choice [ maketitle, title, authors, date ] - -maketitle = try (string "\\maketitle") >> spaces >> return Null - -title = try $ do - string "\\title{" - tit <- manyTill inline (char '}') - spaces - updateState (\state -> state { stateTitle = tit }) - return Null - -authors = try $ do - string "\\author{" - authors <- manyTill anyChar (char '}') - spaces - let authors' = map removeLeadingTrailingSpace $ lines $ - substitute "\\\\" "\n" authors - updateState (\state -> state { stateAuthors = authors' }) - return Null - -date = try $ do - string "\\date{" - date' <- manyTill anyChar (char '}') - spaces - updateState (\state -> state { stateDate = date' }) - return Null - --- --- item block --- for use in unknown environments that aren't being parsed as raw latex --- - --- this forces items to be parsed in different blocks -itemBlock = try $ do - ("item", _, args) <- command - state <- getState - if (stateParserContext state == ListItemState) - then fail "item should be handled by list block" - else if null args - then return Null - else return $ Plain [Str (stripFirstAndLast (head args))] - --- --- raw LaTeX --- - -specialEnvironment = do -- these are always parsed as raw - lookAhead (choice (map (\name -> begin name) ["tabular", "figure", - "tabbing", "eqnarry", "picture", "table", "verse", "theorem"])) - rawLaTeXEnvironment - --- | Parse any LaTeX environment and return a Para block containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = try $ do - string "\\begin{" - name <- many1 letter - star <- option "" (string "*") -- for starred variants - let name' = name ++ star - char '}' - args <- option [] commandArgs - let argStr = concat args - contents <- manyTill (choice [ (many1 (noneOf "\\")), - (do - (Para [TeX str]) <- rawLaTeXEnvironment - return str), - string "\\" ]) - (end name') - spaces - return $ Para [TeX $ "\\begin{" ++ name' ++ "}" ++ argStr ++ - concat contents ++ "\\end{" ++ name' ++ "}"] - -unknownEnvironment = try $ do - state <- getState - result <- if stateParseRaw state -- check whether we should include raw TeX - then rawLaTeXEnvironment -- if so, get whole raw environment - else anyEnvironment -- otherwise just the contents - return result - -unknownCommand = try $ do - notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", - "document"] - (name, star, args) <- command - spaces - let argStr = concat args - state <- getState - if name == "item" && (stateParserContext state) == ListItemState - then fail "should not be parsed as raw" - else string "" - if stateParseRaw state - then return $ Plain [TeX ("\\" ++ name ++ star ++ argStr)] - else return $ Plain [Str (joinWithSep " " args)] - --- latex comment -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null - --- --- inline --- - -inline = choice [ str - , endline - , whitespace - , quoted - , apostrophe - , spacer - , strong - , math - , ellipses - , emDash - , enDash - , hyphen - , emph - , strikeout - , superscript - , subscript - , ref - , lab - , code - , url - , link - , image - , footnote - , linebreak - , accentedChar - , specialChar - , rawLaTeXInline - , escapedChar - , unescapedChar - ] <?> "inline" - -accentedChar = normalAccentedChar <|> specialAccentedChar - -normalAccentedChar = try $ do - char '\\' - accent <- oneOf "'`^\"~" - character <- (try $ char '{' >> letter >>~ char '}') <|> letter - let table = fromMaybe [] $ lookup character accentTable - let result = case lookup accent table of - Just num -> chr num - Nothing -> '?' - return $ Str [result] - --- an association list of letters and association list of accents --- and decimal character numbers. -accentTable = - [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), - ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), - ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), - ('N', [('~', 209)]), - ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), - ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), - ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), - ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), - ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), - ('n', [('~', 241)]), - ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), - ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] - -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, - oslash, pound, euro, copyright, sect ] - -ccedil = try $ do - char '\\' - letter <- oneOfStrings ["cc", "cC"] - let num = if letter == "cc" then 231 else 199 - return $ Str [chr num] - -aring = try $ do - char '\\' - letter <- oneOfStrings ["aa", "AA"] - let num = if letter == "aa" then 229 else 197 - return $ Str [chr num] - -iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> - return (Str [chr 239]) - -icirc = try (string "\\^") >> oneOfStrings ["\\i", "{\\i}"] >> - return (Str [chr 238]) - -szlig = try (string "\\ss") >> return (Str [chr 223]) - -oslash = try $ do - char '\\' - letter <- choice [char 'o', char 'O'] - let num = if letter == 'o' then 248 else 216 - return $ Str [chr num] - -aelig = try $ do - char '\\' - letter <- oneOfStrings ["ae", "AE"] - let num = if letter == "ae" then 230 else 198 - return $ Str [chr num] - -pound = try (string "\\pounds") >> return (Str [chr 163]) - -euro = try (string "\\euro") >> return (Str [chr 8364]) - -copyright = try (string "\\copyright") >> return (Str [chr 169]) - -sect = try (string "\\S") >> return (Str [chr 167]) - -escapedChar = do - result <- escaped (oneOf " $%&_#{}\n") - return $ if result == Str "\n" then Str " " else result - --- ignore standalone, nonescaped special characters -unescapedChar = oneOf "`$^&_#{}|<>" >> return (Str "") - -specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] - -backslash = try (string "\\textbackslash") >> return (Str "\\") - -tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") - -caret = try (string "\\^{}") >> return (Str "^") - -bar = try (string "\\textbar") >> return (Str "\\") - -lt = try (string "\\textless") >> return (Str "<") - -gt = try (string "\\textgreater") >> return (Str ">") - -doubleQuote = char '"' >> return (Str "\"") - -code = code1 <|> code2 - -code1 = try $ do - string "\\verb" - marker <- anyChar - result <- manyTill anyChar (char marker) - return $ Code $ removeLeadingTrailingSpace result - -code2 = try $ do - string "\\texttt{" - result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') - return $ Code result - -emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> - manyTill inline (char '}') >>= return . Emph - -strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= - return . Strikeout - -superscript = try $ string "\\textsuperscript{" >> - manyTill inline (char '}') >>= return . Superscript - --- note: \textsubscript isn't a standard latex command, but we use --- a defined version in pandoc. -subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= - return . Subscript - -apostrophe = char '\'' >> return Apostrophe - -quoted = doubleQuoted <|> singleQuoted - -singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= - return . Quoted DoubleQuote . normalizeSpaces - -singleQuoteStart = char '`' - -singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum - -doubleQuoteStart = string "``" - -doubleQuoteEnd = try $ string "''" - -ellipses = try $ string "\\ldots" >> optional (try (string "{}")) >> - return Ellipses - -enDash = try (string "--") >> return EnDash - -emDash = try (string "---") >> return EmDash - -hyphen = char '-' >> return (Str "-") - -lab = try $ do - string "\\label{" - result <- manyTill anyChar (char '}') - return $ Str $ "(" ++ result ++ ")" - -ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str - -strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= - return . Strong - -whitespace = many1 (oneOf "~ \t") >> return Space - --- hard line break -linebreak = try (string "\\\\") >> return LineBreak - -spacer = try (string "\\,") >> return (Str "") - -str = many1 (noneOf specialChars) >>= return . Str - --- endline internal to paragraph -endline = try $ newline >> notFollowedBy blankline >> return Space - --- math -math = math1 <|> math2 <?> "math" - -math1 = try $ do - char '$' - result <- many (noneOf "$") - char '$' - return $ TeX ("$" ++ result ++ "$") - -math2 = try $ do - string "\\(" - result <- many (noneOf "$") - string "\\)" - return $ TeX ("$" ++ result ++ "$") - --- --- links and images --- - -url = try $ do - string "\\url" - url <- charsInBalanced '{' '}' - return $ Link [Code url] (url, "") - -link = try $ do - string "\\href{" - url <- manyTill anyChar (char '}') - char '{' - label <- manyTill inline (char '}') - return $ Link (normalizeSpaces label) (url, "") - -image = try $ do - ("includegraphics", _, args) <- command - let args' = filter isArg args -- filter out options - let src = if null args' then - ("", "") - else - (stripFirstAndLast (head args'), "") - return $ Image [Str "image"] src - -footnote = try $ do - (name, _, (contents:[])) <- command - if ((name == "footnote") || (name == "thanks")) - then string "" - else fail "not a footnote or thanks command" - let contents' = stripFirstAndLast contents - -- parse the extracted block, which may contain various block elements: - rest <- getInput - setInput $ contents' - blocks <- parseBlocks - setInput rest - return $ Note blocks - --- | Parse any LaTeX command and return it in a raw TeX inline element. -rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try $ do - (name, star, args) <- command - state <- getState - if ((name == "begin") || (name == "end") || (name == "item")) - then fail "not an inline command" - else string "" - return $ TeX ("\\" ++ name ++ star ++ concat args) - diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs deleted file mode 100644 index df84c0ac7..000000000 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ /dev/null @@ -1,909 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of markdown-formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Markdown ( - readMarkdown - ) where - -import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy ) -import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) -import Network.URI ( isURI ) -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag, anyHtmlTag, - anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.ParserCombinators.Parsec - --- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -> String -> Pandoc -readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") - --- --- Constants and data structure definitions --- - -spaceChars = " \t" -bulletListMarkers = "*+-" -hruleChars = "*-_" -setextHChars = "=-" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\[]*_~`<>$!^-.&'\"" - --- --- auxiliary functions --- - -indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state - try (count tabStop (char ' ')) <|> - (many (char ' ') >> string "\t") <?> "indentation" - -nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" - --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - --- | Fail unless we're in "smart typography" mode. -failUnlessSmart = do - state <- getState - if stateSmart state then return () else fail "Smart typography feature" - --- | Parse an inline Str element with a given content. -inlineString str = try $ do - (Str res) <- inline - if res == str then return res else fail $ "unexpected Str content" - --- | Parse a sequence of inline elements between a string --- @opener@ and a string @closer@, including inlines --- between balanced pairs of @opener@ and a @closer@. -inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] -inlinesInBalanced opener closer = try $ do - string opener - result <- manyTill ( (do lookAhead (inlineString opener) - -- because it might be a link... - bal <- inlinesInBalanced opener closer - return $ [Str opener] ++ bal ++ [Str closer]) - <|> (count 1 inline)) - (try (string closer)) - return $ concat result - --- --- document structure --- - -titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline - -authorsLine = try $ do - char '%' - skipSpaces - authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") - newline - return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors - -dateLine = try $ do - char '%' - skipSpaces - date <- many (noneOf "\n") - newline - return $ decodeCharacterReferences $ removeTrailingSpace date - -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option "" dateLine - optional blanklines - return (title, author, date) - -parseMarkdown = do - -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= - return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - -- go through again for notes unless strict... - if stateStrict st - then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= - return . concat - st <- getState - let reversedNotes = stateNotes st - updateState $ \st -> st { stateNotes = reverse reversedNotes } - setInput docMinusNotes - setPosition startPos - -- now parse it for real... - (title, author, date) <- option ([],[],"") titleBlock - blocks <- parseBlocks - return $ Pandoc (Meta title author date) $ filter (/= Null) blocks - --- --- initial pass for references and notes --- - -referenceKey = try $ do - startPos <- getPosition - nonindentSpaces - label <- reference - char ':' - skipSpaces - optional (char '<') - src <- many (noneOf "> \n\t") - optional (char '>') - tit <- option "" referenceTitle - blanklines - endPos <- getPosition - let newkey = (label, (removeTrailingSpace src, tit)) - st <- getState - let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = newkey : oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -referenceTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - tit <- (charsInBalanced '(' ')' >>= return . unwords . words) - <|> do delim <- char '\'' <|> char '"' - manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit - -noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') - -rawLine = do - notFollowedBy blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (newline >> optional indentSpaces >> return "\n") - return $ contents ++ end - -rawLines = many1 rawLine >>= return . concat - -noteBlock = try $ do - startPos <- getPosition - ref <- noteMarker - char ':' - optional blankline - optional indentSpaces - raw <- sepBy rawLines (try (blankline >> indentSpaces)) - optional blanklines - endPos <- getPosition - -- parse the extracted text, which may contain various block elements: - contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" - let newnote = (ref, contents) - st <- getState - let oldnotes = stateNotes st - updateState $ \st -> st { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ header - , table - , codeBlock - , hrule - , list - , blockQuote - , htmlBlock - , rawLaTeXEnvironment' - , para - , plain - , nullBlock ] <?> "block" - --- --- header blocks --- - -header = atxHeader <|> setextHeader <?> "header" - -atxHeader = try $ do - level <- many1 (char '#') >>= return . length - notFollowedBy (char '.' <|> char ')') -- this would be a list - skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text - -atxClosing = try $ skipMany (char '#') >> blanklines - -setextHeader = try $ do - -- first, see if this block has any chance of being a setextHeader: - lookAhead (anyLine >> oneOf setextHChars) - text <- many1Till inline newline >>= return . normalizeSpaces - level <- choice $ zipWith - (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev) - setextHChars [1..(length setextHChars)] - return $ Header level text - --- --- hrule block --- - -hrule = try $ do - skipSpaces - start <- oneOf hruleChars - count 2 (skipSpaces >> char start) - skipMany (skipSpaces >> char start) - newline - optional blanklines - return HorizontalRule - --- --- code blocks --- - -indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") - -codeBlock = do - contents <- many1 (indentedLine <|> - try (do b <- blanklines - l <- indentedLine - return $ b ++ l)) - optional blanklines - return $ CodeBlock $ stripTrailingNewlines $ concat contents - --- --- block quotes --- - -emacsBoxQuote = try $ do - failIfStrict - string ",----" - manyTill anyChar newline - raw <- manyTill - (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) - (try (string "`----")) - blanklines - return raw - -emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') - -emailBlockQuote = try $ do - emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> - (try (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n')))) - (try (newline >> emailBlockQuoteStart)) - newline <|> (eof >> return '\n') - optional blanklines - return raw - -blockQuote = do - raw <- emailBlockQuote <|> emacsBoxQuote - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -bulletListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers - spaceChar - skipSpaces - -anyOrderedListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else anyOrderedListMarker >>~ spaceChar - -orderedListStart style delim = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - state <- getState - num <- if stateStrict state - then do many1 digit - char '.' - return 1 - else orderedListMarker style delim - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (spaceChar >> spaceChar) - else spaceChar - skipSpaces - --- parse a line of a list item (start = parser for beginning of list item) -listLine start = try $ do - notFollowedBy' start - notFollowedBy blankline - notFollowedBy' (do indentSpaces - many (spaceChar) - bulletListStart <|> (anyOrderedListStart >> return ())) - line <- manyTill anyChar newline - return $ line ++ "\n" - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try $ do - start - result <- many1 (listLine start) - blanks <- many blankline - return $ concat result ++ blanks - --- continuation of a list item - indented and separated by blankline --- or (in compact lists) endline. --- note: nested lists are parsed as continuations -listContinuation start = try $ do - lookAhead indentSpaces - result <- many1 (listContinuationLine start) - blanks <- many blankline - return $ concat result ++ blanks - -listContinuationLine start = try $ do - notFollowedBy blankline - notFollowedBy' start - optional indentSpaces - result <- manyTill anyChar newline - return $ result ++ "\n" - -listItem start = try $ do - first <- rawListItem start - continuations <- many (listContinuation start) - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw - updateState (\st -> st {stateParserContext = oldContext}) - return contents - -orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 (listItem (orderedListStart style delim)) - return $ OrderedList (start, style, delim) $ compactify items - -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify - --- definition lists - -definitionListItem = try $ do - notFollowedBy blankline - notFollowedBy' indentSpaces - -- first, see if this has any chance of being a definition list: - lookAhead (anyLine >> char ':') - term <- manyTill inline newline - raw <- many1 defRawBlock - state <- getState - let oldContext = stateParserContext state - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ concat raw - updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) - -defRawBlock = try $ do - char ':' - state <- getState - let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") - firstline <- anyLine - rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) - trailing <- option "" blanklines - return $ firstline ++ "\n" ++ unlines rawlines ++ trailing - -definitionList = do - failIfStrict - items <- many1 definitionListItem - let (terms, defs) = unzip items - let defs' = compactify defs - let items' = zip terms defs' - return $ DefinitionList items' - --- --- paragraph block --- - -para = try $ do - result <- many1 inline - newline - blanklines <|> do st <- getState - if stateStrict st - then lookAhead (blockQuote <|> header) >> return "" - else lookAhead emacsBoxQuote >> return "" - return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- raw html --- - -htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" - -htmlBlock = do - st <- getState - if stateStrict st - then try $ do failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return $ RawHtml $ first ++ finalSpace ++ finalNewlines - else rawHtmlBlocks - --- True if tag is self-closing -isSelfClosing tag = - isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag - -strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag - let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" - then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> - (htmlElement <|> (count 1 anyChar))) - end <- htmlEndTag tag' - return $ tag ++ concat contents ++ end - -rawHtmlBlocks = do - htmlBlocks <- many1 rawHtmlBlock - let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if not (null combined) && last combined == '\n' - then init combined -- strip extra newline - else combined - return $ RawHtml combined' - --- --- LaTeX --- - -rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment - --- --- Tables --- - --- Parse a dashed line with optional trailing spaces; return its length --- and the length including trailing space. -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) - --- Parse a table header with dashed lines of '-' preceded by --- one line of text. -simpleTableHeader = try $ do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return (rawHeads, aligns, indices) - --- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines - --- Parse a table separator - dashed line. -tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" - --- Parse a raw line and split it into chunks by indices. -rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line - --- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) - --- Parse a multiline table row and return a list of blocks (columns). -multilineRow indices = do - colLines <- many1 (rawTableLine indices) - optional blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Float] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - --- Parses a table caption: inlines beginning with 'Table:' --- and followed by blank lines. -tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines - --- Parse a simple table with '---' header and one line per row. -simpleTable = tableWith simpleTableHeader tableLine blanklines - --- Parse a multiline table: starts with row of '-' on top, then header --- (which may be multiline), then the rows, --- which may be multiline, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -multilineTable = tableWith multilineTableHeader multilineRow tableFooter - -multilineTableHeader = try $ do - tableSep - rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (joinWithSep " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) - --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] -> Int -> Alignment -alignType [] len = AlignDefault -alignType strLst len = - let str = head $ sortBy (comparing length) $ - map removeTrailingSpace strLst - leftSpace = if null str then False else (str !! 0) `elem` " \t" - rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - -table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table" - --- --- inline --- - -inline = choice [ str - , smartPunctuation - , whitespace - , endline - , code - , charRef - , strong - , emph - , note - , inlineNote - , link - , image - , math - , strikeout - , superscript - , subscript - , autoLink - , rawHtmlInline' - , rawLaTeXInline' - , escapedChar - , symbol - , ltSign ] <?> "inline" - -escapedChar = do - char '\\' - state <- getState - result <- option '\\' $ if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) - return $ Str [result] - -ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] - -specialCharsMinusLt = filter (/= '<') specialChars - -symbol = do - result <- oneOf specialCharsMinusLt - return $ Str [result] - --- parses inline code, between n `s and n `s -code = try $ do - starts <- many1 (char '`') - skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> - notFollowedBy (char '`'))) - return $ Code $ removeLeadingTrailingSpace $ concat result - -mathWord = many1 ((noneOf " \t\n\\$") <|> - (try (char '\\') >>~ notFollowedBy (char '$'))) - -math = try $ do - failIfStrict - char '$' - notFollowedBy space - words <- sepBy1 mathWord (many1 space) - char '$' - return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") - -emph = ((enclosed (char '*') (char '*') inline) <|> - (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= - return . Emph . normalizeSpaces - -strong = ((enclosed (string "**") (try $ string "**") inline) <|> - (enclosed (string "__") (try $ string "__") inline)) >>= - return . Strong . normalizeSpaces - -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= - return . Strikeout . normalizeSpaces - -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Superscript - -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript - -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= - return . Quoted DoubleQuote . normalizeSpaces - -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - char '\8216' <|> - do char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) -- possess/contraction - return '\'' - -singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum - -doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> - (char '"' <|> char '\8220') >> - notFollowedBy (oneOf " \t\n") - -doubleQuoteEnd = char '"' <|> char '\8221' - -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash = enDash <|> emDash - -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> - skipSpaces >> return EmDash - -whitespace = do - sps <- many1 (oneOf spaceChars) - if length sps >= 2 - then option Space (endline >> return LineBreak) - else return Space <?> "whitespace" - -nonEndline = satisfy (/='\n') - -strChar = noneOf (specialChars ++ spaceChars ++ "\n") - -str = many1 strChar >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do - newline - notFollowedBy blankline - st <- getState - if stateStrict st - then do notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header - else return () - -- parse potential list-starts differently if in a list: - if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> - (anyOrderedListStart >> return ())) - else return () - return Space - --- --- links --- - --- a reference label for a link -reference = notFollowedBy' (string "[^") >> -- footnote reference - inlinesInBalanced "[" "]" >>= (return . normalizeSpaces) - --- source for a link, with optional title -source = try $ do - char '(' - optional (char '<') - src <- many (noneOf ")> \t\n") - optional (char '>') - tit <- option "" linkTitle - skipSpaces - char ')' - return (removeTrailingSpace src, tit) - -linkTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - delim <- char '\'' <|> char '"' - tit <- manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit - -link = try $ do - label <- reference - src <- source <|> referenceLink label - return $ Link label src - --- a link like [this][ref] or [this][] or [this] -referenceLink label = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then label else ref - state <- getState - case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" - Just target -> return target - -emailAddress = try $ do - name <- many1 (alphaNum <|> char '+') - char '@' - first <- many1 alphaNum - rest <- many1 (char '.' >> many1 alphaNum) - return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest) - -uri = try $ do - str <- many1 (noneOf "\n\t >") - if isURI str - then return str - else fail "not a URI" - -autoLink = try $ do - char '<' - src <- uri <|> emailAddress - char '>' - let src' = if "mailto:" `isPrefixOf` src - then drop 7 src - else src - st <- getState - return $ if stateStrict st - then Link [Str src'] (src, "") - else Link [Code src'] (src, "") - -image = try $ do - char '!' - (Link label src) <- link - return $ Image label src - -note = try $ do - failIfStrict - ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return $ Note contents - -inlineNote = try $ do - failIfStrict - char '^' - contents <- inlinesInBalanced "[" "]" - return $ Note [Para contents] - -rawLaTeXInline' = failIfStrict >> rawLaTeXInline - -rawHtmlInline' = do - st <- getState - result <- choice $ if stateStrict st - then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else [htmlBlockElement, anyHtmlInlineTag] - return $ HtmlInline result - diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs deleted file mode 100644 index 1239eb688..000000000 --- a/src/Text/Pandoc/Readers/RST.hs +++ /dev/null @@ -1,640 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion from reStructuredText to 'Pandoc' document. --} -module Text.Pandoc.Readers.RST ( - readRST - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.ParserCombinators.Parsec -import Data.List ( findIndex, delete ) - --- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -> String -> Pandoc -readRST state str = (readWith parseRST) state (str ++ "\n\n") - --- --- Constants and data structure definitions ---- - -bulletListMarkers = "*+-" -underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" - --- treat these as potentially non-text when parsing inline: -specialChars = "\\`|*_<>$:[-" - --- --- parsing documents --- - -isAnonKey (ref, src) = ref == [Str "_"] - -isHeader :: Int -> Block -> Bool -isHeader n (Header x _) = x == n -isHeader _ _ = False - --- | Promote all headers in a list of blocks. (Part of --- title transformation for RST.) -promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = - (Header (level - num) text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders num [] = [] - --- | If list of blocks starts with a header (or a header and subheader) --- of level that are not found elsewhere, return it as a title and --- promote all the other headers. -titleTransform :: [Block] -- ^ list of blocks - -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle - if (any (isHeader 1) rest) || (any (isHeader 2) rest) - then ((Header 1 head1):(Header 2 head2):rest, []) - else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any (isHeader 1) rest) - then ((Header 1 head1):rest, []) - else ((promoteHeaders 1 rest), head1) -titleTransform blocks = (blocks, []) - -parseRST = do - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - let reversedKeys = stateKeys st - updateState $ \st -> st { stateKeys = reverse reversedKeys } - -- now parse it for real... - blocks <- parseBlocks - let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state - then titleTransform blocks' - else (blocks', []) - let authors = stateAuthors state - let date = stateDate state - let title' = if (null title) then (stateTitle state) else title - return $ Pandoc (Meta title' authors date) blocks'' - --- --- parsing blocks --- - -parseBlocks = manyTill block eof - -block = choice [ codeBlock - , rawHtmlBlock - , rawLaTeXBlock - , fieldList - , blockQuote - , imageBlock - , unknownDirective - , header - , hrule - , list - , lineBlock - , para - , plain - , nullBlock ] <?> "block" - --- --- field list --- - -fieldListItem indent = try $ do - string indent - char ':' - name <- many1 alphaNum - string ": " - skipSpaces - first <- manyTill anyChar newline - rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> - indentedBlock - return (name, joinWithSep " " (first:(lines rest))) - -fieldList = try $ do - indent <- lookAhead $ many (oneOf " \t") - items <- many1 $ fieldListItem indent - blanklines - let authors = case lookup "Authors" items of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,y) -> x == "Author") items) - if null authors - then return () - else updateState $ \st -> st {stateAuthors = authors} - case (lookup "Date" items) of - Just dat -> updateState $ \st -> st {stateDate = dat} - Nothing -> return () - case (lookup "Title" items) of - Just tit -> parseFromString (many inline) tit >>= - \t -> updateState $ \st -> st {stateTitle = t} - Nothing -> return () - let remaining = filter (\(x,y) -> (x /= "Authors") && (x /= "Author") && - (x /= "Date") && (x /= "Title")) items - if null remaining - then return Null - else do terms <- mapM (return . (:[]) . Str . fst) remaining - defs <- mapM (parseFromString (many block) . snd) - remaining - return $ DefinitionList $ zip terms defs - --- --- line block --- - -lineBlockLine = try $ do - string "| " - white <- many (oneOf " \t") - line <- manyTill inline newline - return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] - -lineBlock = try $ do - lines <- many1 lineBlockLine - blanklines - return $ Para (concat lines) - --- --- paragraph block --- - -para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" - -codeBlockStart = string "::" >> blankline >> blankline - --- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock = try $ do - result <- many1 (notFollowedBy' codeBlockStart >> inline) - lookAhead (string "::") - return $ Para $ if last result == Space - then normalizeSpaces result - else (normalizeSpaces result) ++ [Str ":"] - --- regular paragraph -paraNormal = try $ do - result <- many1 inline - newline - blanklines - return $ Para $ normalizeSpaces result - -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- image block --- - -imageBlock = try $ do - string ".. image:: " - src <- manyTill anyChar newline - fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") - many1 $ fieldListItem indent - optional blanklines - case lookup "alt" fields of - Just alt -> return $ Plain [Image [Str alt] (src, alt)] - Nothing -> return $ Plain [Image [Str "image"] (src, "")] --- --- header blocks --- - -header = doubleHeader <|> singleHeader <?> "header" - --- a header with lines on top and bottom -doubleHeader = try $ do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- many1 (notFollowedBy blankline >> inline) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- a header with line on the bottom only -singleHeader = try $ do - notFollowedBy' whitespace - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - rest <- count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- --- hrule block --- - -hrule = try $ do - chr <- oneOf underlineChars - count 3 (char chr) - skipMany (char chr) - blankline - blanklines - return HorizontalRule - --- --- code blocks --- - --- read a line indented by a given string -indentedLine indents = try $ do - string indents - result <- manyTill anyChar newline - return $ result ++ "\n" - --- two or more indented lines, possibly separated by blank lines. --- any amount of indentation will work. -indentedBlock = do - indents <- lookAhead $ many1 (oneOf " \t") - lns <- many $ choice $ [ indentedLine indents, - try $ do b <- blanklines - l <- indentedLine indents - return (b ++ l) ] - optional blanklines - return $ concat lns - -codeBlock = try $ do - codeBlockStart - result <- indentedBlock - return $ CodeBlock $ stripTrailingNewlines result - --- --- raw html --- - -rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> - indentedBlock >>= return . RawHtml - --- --- raw latex --- - -rawLaTeXBlock = try $ do - string ".. raw:: latex" - blanklines - result <- indentedBlock - return $ Para [(TeX result)] - --- --- block quotes --- - -blockQuote = do - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -definitionListItem = try $ do - term <- many1Till inline endline - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return (normalizeSpaces term, contents) - -definitionList = many1 definitionListItem >>= return . DefinitionList - --- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart = try $ do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - return $ length (marker:white) - --- parses ordered list start and returns its length (inc following whitespace) -orderedListStart style delim = try $ do - (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar - return $ markerLen + length white - --- parse a line of a list item -listLine markerLength = try $ do - notFollowedBy blankline - indentWith markerLength - line <- manyTill anyChar newline - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith num = do - state <- getState - let tabStop = stateTabStop state - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] - --- parse raw text for one list item, excluding start marker and continuations -rawListItem start = do - markerLength <- start - firstLine <- manyTill anyChar newline - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) - --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. --- Note: nested lists are parsed as continuations. -listContinuation markerLength = try $ do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return $ blanks ++ concat result - -listItem start = try $ do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline >>~ lookAhead start), - many1 blankline ] -- whole list must end with blank. - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks - updateState (\st -> st {stateParserContext = oldContext}) - return parsed - -orderedList = do - (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) - items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return $ OrderedList (start, style, delim) items' - -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify - --- --- unknown directive (e.g. comment) --- - -unknownDirective = try $ do - string ".. " - manyTill anyChar newline - many (string " :" >> many1 (noneOf "\n:") >> char ':' >> - many1 (noneOf "\n") >> newline) - optional blanklines - return Null - --- --- reference key --- - -referenceKey = do - startPos <- getPosition - key <- choice [imageKey, anonymousKey, regularKeyQuoted, regularKey] - st <- getState - let oldkeys = stateKeys st - updateState $ \st -> st { stateKeys = key : oldkeys } - optional blanklines - endPos <- getPosition - -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -targetURI = do - skipSpaces - optional newline - contents <- many1 (try (many spaceChar >> newline >> - many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") - blanklines - return contents - -imageKey = try $ do - string ".. |" - ref <- manyTill inline (char '|') - skipSpaces - string "image::" - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -anonymousKey = try $ do - oneOfStrings [".. __:", "__"] - src <- targetURI - state <- getState - return ([Str "_"], (removeLeadingTrailingSpace src, "")) - -regularKeyQuoted = try $ do - string ".. _`" - ref <- manyTill inline (char '`') - char ':' - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -regularKey = try $ do - string ".. _" - ref <- manyTill inline (char ':') - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - - -- - -- inline - -- - -inline = choice [ link - , str - , whitespace - , endline - , strong - , emph - , code - , image - , hyphens - , superscript - , subscript - , escapedChar - , symbol ] <?> "inline" - -hyphens = do - result <- many1 (char '-') - option Space endline - -- don't want to treat endline after hyphen or dash as a space - return $ Str result - -escapedChar = escaped anyChar - -symbol = do - result <- oneOf specialChars - return $ Str [result] - --- parses inline code, between codeStart and codeEnd -code = try $ do - string "``" - result <- manyTill anyChar (try (string "``")) - return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result - -emph = enclosed (char '*') (char '*') inline >>= - return . Emph . normalizeSpaces - -strong = enclosed (string "**") (try $ string "**") inline >>= - return . Strong . normalizeSpaces - -interpreted role = try $ do - optional $ try $ string "\\ " - result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar - nextChar <- lookAhead anyChar - try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") - return [Str result] - -superscript = interpreted "sup" >>= (return . Superscript) - -subscript = interpreted "sub" >>= (return . Subscript) - -whitespace = many1 spaceChar >> return Space <?> "whitespace" - -str = notFollowedBy' oneWordReference >> - many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline = try $ do - newline - notFollowedBy blankline - -- parse potential list-starts at beginning of line differently in a list: - st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> - notFollowedBy' bulletListStart - else return () - return Space - --- --- links --- - -link = choice [explicitLink, referenceLink, autoLink] <?> "link" - -explicitLink = try $ do - char '`' - notFollowedBy (char '`') -- `` is marks start of inline code - label <- manyTill inline (try (do {spaces; char '<'})) - src <- manyTill (noneOf ">\n ") (char '>') - skipSpaces - string "`_" - return $ Link (normalizeSpaces label) (removeLeadingTrailingSpace src, "") - -reference = try $ do - char '`' - notFollowedBy (char '`') - label <- many1Till inline (char '`') - char '_' - return label - -oneWordReference = do - raw <- many1 alphaNum - char '_' - notFollowedBy alphaNum -- because this_is_not a link - return [Str raw] - -referenceLink = try $ do - label <- reference <|> oneWordReference - key <- option label (do{char '_'; return [Str "_"]}) -- anonymous link - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable key of - Nothing -> fail "no corresponding key" - Just target -> return target - -- if anonymous link, remove first anon key so it won't be used again - let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], src) keyTable -- remove first anon key - else keyTable - setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label) src - -uriScheme = oneOfStrings [ "http://", "https://", "ftp://", "file://", - "mailto:", "news:", "telnet:" ] - -uri = try $ do - scheme <- uriScheme - identifier <- many1 (noneOf " \t\n") - return $ scheme ++ identifier - -autoURI = do - src <- uri - return $ Link [Str src] (src, "") - -emailChar = alphaNum <|> oneOf "-+_." - -emailAddress = try $ do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - return $ addr ++ '@':dom - -domainChar = alphaNum <|> char '-' - -domain = do - first <- many1 domainChar - dom <- many1 (try (do{ char '.'; many1 domainChar })) - return $ joinWithSep "." (first:dom) - -autoEmail = do - src <- emailAddress - return $ Link [Str src] ("mailto:" ++ src, "") - -autoLink = autoURI <|> autoEmail - --- For now, we assume that all substitution references are for images. -image = try $ do - char '|' - ref <- manyTill inline (char '|') - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable ref of - Nothing -> fail "no corresponding key" - Just target -> return target - return $ Image (normalizeSpaces ref) src - diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs deleted file mode 100644 index f27c3ae75..000000000 --- a/src/Text/Pandoc/Shared.hs +++ /dev/null @@ -1,792 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Utility functions and definitions used by the various Pandoc modules. --} -module Text.Pandoc.Shared ( - -- * List processing - splitBy, - splitByIndices, - substitute, - joinWithSep, - -- * Text processing - backslashEscapes, - escapeStringUsing, - stripTrailingNewlines, - removeLeadingTrailingSpace, - removeLeadingSpace, - removeTrailingSpace, - stripFirstAndLast, - camelCaseToHyphenated, - toRomanNumeral, - wrapped, - wrapIfNeeded, - -- * Parsing - (>>~), - anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - spaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - charsInBalanced', - romanNumeral, - withHorizDisplacement, - nullBlock, - failIfStrict, - escaped, - anyOrderedListMarker, - orderedListMarker, - charRef, - readWith, - testStringWith, - ParserState (..), - defaultParserState, - HeaderType (..), - ParserContext (..), - QuoteContext (..), - NoteTable, - KeyTable, - lookupKeySrc, - refsMatch, - -- * Native format prettyprinting - prettyPandoc, - -- * Pandoc block and inline list processing - orderedListMarkers, - normalizeSpaces, - compactify, - Element (..), - hierarchicalize, - isHeaderBlock, - -- * Writer options - WriterOptions (..), - defaultWriterOptions - ) where - -import Text.Pandoc.Definition -import Text.ParserCombinators.Parsec -import Text.PrettyPrint.HughesPJ ( Doc, fsep ) -import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) -import Data.List ( find, isPrefixOf ) -import Control.Monad ( join ) - --- --- List processing --- - --- | Split list by groups of one or more sep. -splitBy :: (Eq a) => a -> [a] -> [[a]] -splitBy _ [] = [] -splitBy sep lst = - let (first, rest) = break (== sep) lst - rest' = dropWhile (== sep) rest - in first:(splitBy sep rest') - --- | Split list into chunks divided at specified indices. -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = - let (first, rest) = splitAt x lst in - first:(splitByIndices (map (\y -> y - x) xs) rest) - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst = - if target `isPrefixOf` lst - then replacement ++ (substitute target replacement $ drop (length target) lst) - else (head lst):(substitute target replacement $ tail lst) - --- | Joins a list of lists, separated by another list. -joinWithSep :: [a] -- ^ List to use as separator - -> [[a]] -- ^ Lists to join - -> [a] -joinWithSep _ [] = [] -joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst - --- --- Text processing --- - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse - --- | Remove leading and trailing space (including newlines) from string. -removeLeadingTrailingSpace :: String -> String -removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace - --- | Remove leading space (including newlines) from string. -removeLeadingSpace :: String -> String -removeLeadingSpace = dropWhile (`elem` " \n\t") - --- | Remove trailing space (including newlines) from string. -removeTrailingSpace :: String -> String -removeTrailingSpace = reverse . removeLeadingSpace . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x = - if x >= 4000 || x < 0 - then "?" - else case x of - _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) - _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) - _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) - _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) - _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) - _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) - _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) - _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) - _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) - _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) - _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) - _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) - _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) - _ -> "" - --- | Wrap inlines to line length. -wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc -wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= - return . fsep - -wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) -> - [Inline] -> m Doc -wrapIfNeeded opts = if writerWrapText opts - then wrapped - else ($) - --- --- Parsing --- - --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - --- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = manyTill anyChar newline - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] -many1Till p end = do - first <- p - rest <- manyTill p end - return (first:rest) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' p = try $ join $ do a <- try p - return (unexpected (show a)) - <|> - return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ - start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do - firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs - return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = do - oldPos <- getPosition - oldInput <- getInput - setInput str - result <- parser - setInput oldInput - setPosition oldPos - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) - <|> (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> try (string "\n" >>~ notFollowedBy' blanklines) - char close - return $ concat raw - --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close])) - <|> (do res <- charsInBalanced' open close - return $ [open] ++ res ++ [close]) - char close - return $ concat raw - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int -romanNumeral upperCase = do - let charAnyCase c = char (if upperCase then toUpper c else c) - let one = charAnyCase 'i' - let five = charAnyCase 'v' - let ten = charAnyCase 'x' - let fifty = charAnyCase 'l' - let hundred = charAnyCase 'c' - let fivehundred = charAnyCase 'd' - let thousand = charAnyCase 'm' - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Parses a character and returns 'Null' (so that the parser can move on --- if it gets stuck). -nullBlock :: GenParser Char st Block -nullBlock = anyChar >> return Null - --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Inline -escaped parser = try $ do - char '\\' - result <- parser - return (Str [result]) - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) -upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) -lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) -decimal = do - num <- many1 digit - return (Decimal, read num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) -defaultNum = do - char '#' - return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) -lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) -upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> - (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char st ListAttributes -anyOrderedListMarker = choice $ - [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) - --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char st Int -orderedListMarker style delim = do - let num = case style of - DefaultStyle -> decimal <|> defaultNum - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, _, _) <- context num - return start - --- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline -charRef = do - c <- characterReference - return $ Str [c] - --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser - -> ParserState -- ^ initial state - -> String -- ^ input string - -> a -readWith parser state input = - case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> result - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = putStrLn $ show $ - readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateKeys :: KeyTable, -- ^ List of reference keys - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [String], -- ^ Authors of document - stateDate :: String, -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used - } - deriving Show - -defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateKeys = [], - stateNotes = [], - stateTabStop = 4, - stateStandalone = False, - stateTitle = [], - stateAuthors = [], - stateDate = [], - stateStrict = False, - stateSmart = False, - stateColumns = 80, - stateHeaderTable = [] } - -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) - -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) - -data QuoteContext - = InSingleQuote -- ^ Used when parsing inside single quotes - | InDoubleQuote -- ^ Used when parsing inside double quotes - | NoQuote -- ^ Used when not parsing inside quotes - deriving (Eq, Show) - -type NoteTable = [(String, [Block])] - -type KeyTable = [([Inline], Target)] - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> [Inline] -- ^ Key - -> Maybe Target -lookupKeySrc table key = case find (refsMatch key . fst) table of - Nothing -> Nothing - Just (_, src) -> Just src - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x - --- --- Native format prettyprinting --- - --- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces (rel to block) to indent first line - -> String -- ^ Contents of block to indent - -> String -indentBy _ _ [] = "" -indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first - in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ - (joinWithSep "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks - -> [Block] -- ^ List of blocks - -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ - (prettyBlockList 2 blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (joinWithSep ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (joinWithSep ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ - indentBy 2 0 ("[" ++ (joinWithSep ",\n" - (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ - indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" -prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" -prettyBlock block = show block - --- | Prettyprint Pandoc document. -prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - --- --- Pandoc block and inline list processing --- - --- | Generate infinite lazy list of markers for an ordered list, --- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] - LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] - UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] - inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" - in map inDelim nums - --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces [] = [] -normalizeSpaces list = - let removeDoubles [] = [] - removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) - removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) - removeDoubles ((Str ""):rest) = removeDoubles rest - removeDoubles (x:rest) = x:(removeDoubles rest) - removeLeading (Space:xs) = removeLeading xs - removeLeading x = x - removeTrailing [] = [] - removeTrailing lst = if (last lst == Space) - then init lst - else lst - in removeLeading $ removeTrailing $ removeDoubles list - --- | Change final list item from @Para@ to @Plain@ if the list should --- be compact. -compactify :: [[Block]] -- ^ List of list items (each a list of blocks) - -> [[Block]] -compactify [] = [] -compactify items = - let final = last items - others = init items - in case final of - [Para a] -> if any containsPara others - then items - else others ++ [[Plain a]] - _ -> items - -containsPara :: [Block] -> Bool -containsPara [] = False -containsPara ((Para _):_) = True -containsPara ((BulletList items):rest) = any containsPara items || - containsPara rest -containsPara ((OrderedList _ items):rest) = any containsPara items || - containsPara rest -containsPara ((DefinitionList items):rest) = any containsPara (map snd items) || - containsPara rest -containsPara (_:rest) = containsPara rest - --- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block - | Sec [Inline] [Element] deriving (Eq, Read, Show) - --- | Returns @True@ on Header block with at least the specified level -headerAtLeast :: Int -> Block -> Bool -headerAtLeast level (Header x _) = x <= level -headerAtLeast _ _ = False - --- | Convert list of Pandoc blocks into (hierarchical) list of Elements -hierarchicalize :: [Block] -> [Element] -hierarchicalize [] = [] -hierarchicalize (block:rest) = - case block of - (Header level title) -> - let (thisSection, rest') = break (headerAtLeast level) rest - in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') - x -> (Blk x):(hierarchicalize rest) - --- | True if block is a Header block. -isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _) = True -isHeaderBlock _ = False - --- --- Writer options --- - --- | Options for writers -data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerHeader :: String -- ^ Header for the document - , writerTitlePrefix :: String -- ^ Prefix for HTML titles - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerS5 :: Bool -- ^ We're writing S5 - , writerUseASCIIMathML :: Bool -- ^ Use ASCIIMathML - , writerASCIIMathMLURL :: Maybe String -- ^ URL to asciiMathML.js - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerIncremental :: Bool -- ^ Incremental S5 lists - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerIncludeBefore :: String -- ^ String to include before the body - , writerIncludeAfter :: String -- ^ String to include after the body - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerWrapText :: Bool -- ^ Wrap text to line length - } deriving Show - --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions = - WriterOptions { writerStandalone = False - , writerHeader = "" - , writerTitlePrefix = "" - , writerTabStop = 4 - , writerTableOfContents = False - , writerS5 = False - , writerUseASCIIMathML = False - , writerASCIIMathMLURL = Nothing - , writerIgnoreNotes = False - , writerIncremental = False - , writerNumberSections = False - , writerIncludeBefore = "" - , writerIncludeAfter = "" - , writerStrictMarkdown = False - , writerReferenceLinks = False - , writerWrapText = True - } diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs deleted file mode 100644 index 16bdb9218..000000000 --- a/src/Text/Pandoc/UTF8.hs +++ /dev/null @@ -1,45 +0,0 @@ --- | Functions for converting Unicode strings to UTF-8 and vice versa. --- --- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>. --- (c) 2003, OGI School of Science & Engineering, Oregon Health and --- Science University. --- --- Modified by Martin Norbaeck --- to pass illegal UTF-8 sequences through unchanged. -module Text.Pandoc.UTF8 ( - fromUTF8, - toUTF8 - ) where - --- From the Char module supplied with HBC. - --- | Take a UTF-8 string and decode it into a Unicode string. -fromUTF8 :: String -> String -fromUTF8 "" = "" -fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker) -fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' && - '\x80' <= c' && c' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs -fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' && - '\x80' <= c' && c' <= '\xbf' && - '\x80' <= c'' && c'' <= '\xbf' = - toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs -fromUTF8 (c:cs) = c : fromUTF8 cs - --- | Take a Unicode string and encode it as a UTF-8 string. -toUTF8 :: String -> String -toUTF8 "" = "" -toUTF8 (c:cs) = - if c > '\x0000' && c < '\x0080' then - c : toUTF8 cs - else if c < toEnum 0x0800 then - let i = fromEnum c - in toEnum (0xc0 + i `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - toUTF8 cs - else - let i = fromEnum c - in toEnum (0xe0 + i `div` 0x1000) : - toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : - toEnum (0x80 + i `mod` 0x40) : - toUTF8 cs diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs deleted file mode 100644 index 13912a9f3..000000000 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ /dev/null @@ -1,248 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into ConTeXt. --} -module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( (\\), intersperse ) -import Control.Monad.State - -type WriterState = Int -- number of next URL reference - --- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = evalState (pandocToConTeXt options document) 1 - -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String -pandocToConTeXt options (Pandoc meta blocks) = do - main <- blockListToConTeXt blocks - let body = writerIncludeBefore options ++ main ++ writerIncludeAfter options - head <- if writerStandalone options - then contextHeader options meta - else return "" - let toc = if writerTableOfContents options - then "\\placecontent\n\n" - else "" - let foot = if writerStandalone options - then "\n\\stoptext\n" - else "" - return $ head ++ toc ++ body ++ foot - --- | Insert bibliographic information into ConTeXt header. -contextHeader :: WriterOptions -- ^ Options, including ConTeXt header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState String -contextHeader options (Meta title authors date) = do - titletext <- if null title - then return "" - else inlineListToConTeXt title - let authorstext = if null authors - then "" - else if length authors == 1 - then stringToConTeXt $ head authors - else stringToConTeXt $ (joinWithSep ", " $ - init authors) ++ " & " ++ last authors - let datetext = if date == "" - then "" - else stringToConTeXt date - let titleblock = "\\doctitle{" ++ titletext ++ "}\n\ - \ \\author{" ++ authorstext ++ "}\n\ - \ \\date{" ++ datetext ++ "}\n\n" - let setupheads = if (writerNumberSections options) - then "\\setupheads[sectionnumber=yes, style=\\bf]\n" - else "\\setupheads[sectionnumber=no, style=\\bf]\n" - let header = writerHeader options - return $ header ++ setupheads ++ titleblock ++ "\\starttext\n\\maketitle\n\n" - --- escape things as needed for ConTeXt - -escapeCharForConTeXt :: Char -> String -escapeCharForConTeXt ch = - case ch of - '{' -> "\\letteropenbrace{}" - '}' -> "\\letterclosebrace{}" - '\\' -> "\\letterbackslash{}" - '$' -> "\\$" - '|' -> "\\letterbar{}" - '^' -> "\\letterhat{}" - '%' -> "\\%" - '~' -> "\\lettertilde{}" - '&' -> "\\&" - '#' -> "\\#" - '<' -> "\\letterless{}" - '>' -> "\\lettermore{}" - '_' -> "\\letterunderscore{}" - x -> [x] - --- | Escape string for ConTeXt -stringToConTeXt :: String -> String -stringToConTeXt = concatMap escapeCharForConTeXt - --- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block -> State WriterState String -blockToConTeXt Null = return "" -blockToConTeXt (Plain lst) = inlineListToConTeXt lst >>= return . (++ "\n") -blockToConTeXt (Para lst) = inlineListToConTeXt lst >>= return . (++ "\n\n") -blockToConTeXt (BlockQuote lst) = do - contents <- blockListToConTeXt lst - return $ "\\startblockquote\n" ++ contents ++ "\\stopblockquote\n\n" -blockToConTeXt (CodeBlock str) = - return $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" -blockToConTeXt (RawHtml str) = return "" -blockToConTeXt (BulletList lst) = do - contents <- mapM listItemToConTeXt lst - return $ "\\startltxitem\n" ++ concat contents ++ "\\stopltxitem\n" -blockToConTeXt (OrderedList attribs lst) = case attribs of - (1, DefaultStyle, DefaultDelim) -> do - contents <- mapM listItemToConTeXt lst - return $ "\\startltxenum\n" ++ concat contents ++ "\\stopltxenum\n" - _ -> do - let markers = take (length lst) $ orderedListMarkers attribs - contents <- zipWithM orderedListItemToConTeXt markers lst - let markerWidth = maximum $ map length markers - let markerWidth' = if markerWidth < 3 - then "" - else "[width=" ++ - show ((markerWidth + 2) `div` 2) ++ "em]" - return $ "\\startitemize" ++ markerWidth' ++ "\n" ++ concat contents ++ - "\\stopitemize\n" -blockToConTeXt (DefinitionList lst) = - mapM defListItemToConTeXt lst >>= return . (++ "\n") . concat -blockToConTeXt HorizontalRule = return "\\thinrule\n\n" -blockToConTeXt (Header level lst) = do - contents <- inlineListToConTeXt lst - return $ if level > 0 && level <= 3 - then "\\" ++ concat (replicate (level - 1) "sub") ++ - "section{" ++ contents ++ "}\n\n" - else contents ++ "\n\n" -blockToConTeXt (Table caption aligns widths heads rows) = do - let colWidths = map printDecimal widths - let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' - AlignRight -> 'r' - AlignCenter -> 'c' - AlignDefault -> 'l'): - "p(" ++ colWidth ++ "\\textwidth)|" - let colDescriptors = "|" ++ (concat $ - zipWith colDescriptor colWidths aligns) - headers <- tableRowToConTeXt heads - captionText <- inlineListToConTeXt caption - let captionText' = if null caption then "none" else captionText - rows' <- mapM tableRowToConTeXt rows - return $ "\\placetable[here]{" ++ captionText' ++ "}\n\\starttable[" ++ - colDescriptors ++ "]\n" ++ "\\HL\n" ++ headers ++ "\\HL\n" ++ - concat rows' ++ "\\HL\n\\stoptable\n\n" - -printDecimal :: Float -> String -printDecimal = printf "%.2f" - -tableRowToConTeXt cols = do - cols' <- mapM blockListToConTeXt cols - return $ "\\NC " ++ (concat $ intersperse "\\NC " cols') ++ "\\NC\\AR\n" - -listItemToConTeXt list = do - contents <- blockListToConTeXt list - return $ "\\item " ++ contents - -orderedListItemToConTeXt marker list = do - contents <- blockListToConTeXt list - return $ "\\sym{" ++ marker ++ "} " ++ contents - -defListItemToConTeXt (term, def) = do - term' <- inlineListToConTeXt term - def' <- blockListToConTeXt def - return $ "\\startdescr{" ++ term' ++ "}\n" ++ - def' ++ "\n\\stopdescr\n" - --- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState String -blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . concat - --- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline] -- ^ Inlines to convert - -> State WriterState String -inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . concat - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True -isQuoted _ = False - --- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline -- ^ Inline to convert - -> State WriterState String -inlineToConTeXt (Emph lst) = do - contents <- inlineListToConTeXt lst - return $ "{\\em " ++ contents ++ "}" -inlineToConTeXt (Strong lst) = do - contents <- inlineListToConTeXt lst - return $ "{\\bf " ++ contents ++ "}" -inlineToConTeXt (Strikeout lst) = do - contents <- inlineListToConTeXt lst - return $ "\\overstrikes{" ++ contents ++ "}" -inlineToConTeXt (Superscript lst) = do - contents <- inlineListToConTeXt lst - return $ "\\high{" ++ contents ++ "}" -inlineToConTeXt (Subscript lst) = do - contents <- inlineListToConTeXt lst - return $ "\\low{" ++ contents ++ "}" -inlineToConTeXt (Code str) = return $ "\\type{" ++ str ++ "}" -inlineToConTeXt (Quoted SingleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ "\\quote{" ++ contents ++ "}" -inlineToConTeXt (Quoted DoubleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ "\\quotation{" ++ contents ++ "}" -inlineToConTeXt Apostrophe = return "'" -inlineToConTeXt EmDash = return "---" -inlineToConTeXt EnDash = return "--" -inlineToConTeXt Ellipses = return "\\ldots{}" -inlineToConTeXt (Str str) = return $ stringToConTeXt str -inlineToConTeXt (TeX str) = return str -inlineToConTeXt (HtmlInline str) = return "" -inlineToConTeXt (LineBreak) = return "\\crlf\n" -inlineToConTeXt Space = return " " -inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own - inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... -inlineToConTeXt (Link text (src, _)) = do - next <- get - put (next + 1) - let ref = show next - label <- inlineListToConTeXt text - return $ "\\useurl[" ++ ref ++ "][" ++ src ++ "][][" ++ label ++ - "]\\from[" ++ ref ++ "]" -inlineToConTeXt (Image alternate (src, tit)) = do - alt <- inlineListToConTeXt alternate - return $ "\\placefigure\n[]\n[fig:" ++ alt ++ "]\n{" ++ - tit ++ "}\n{\\externalfigure[" ++ src ++ "]}" -inlineToConTeXt (Note contents) = do - contents' <- blockListToConTeXt contents - return $ "\\footnote{" ++ contents' ++ "}" - diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs deleted file mode 100644 index 13dc8585d..000000000 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ /dev/null @@ -1,299 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Docbook XML. --} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Data.List ( isPrefixOf, drop ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) - --- --- code to format XML --- - --- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String -escapeCharForXML x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - '\160' -> " " - c -> [c] - --- | True if the character needs to be escaped. -needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"\160" - --- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML "" = "" -escapeStringForXML str = - case break needsEscaping str of - (okay, "") -> okay - (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs - --- | Return a text object with a string of formatted XML attributes. -attributeList :: [(String, String)] -> Doc -attributeList = text . concatMap - (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ - escapeStringForXML b ++ "\"") - --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes and (if specified) indentation. -inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc -inTags isIndented tagType attribs contents = - let openTag = char '<' <> text tagType <> attributeList attribs <> - char '>' - closeTag = text "</" <> text tagType <> char '>' - in if isIndented - then openTag $$ nest 2 contents $$ closeTag - else openTag <> contents <> closeTag - --- | Return a self-closing tag of tagType with specified attributes -selfClosingTag :: String -> [(String, String)] -> Doc -selfClosingTag tagType attribs = - char '<' <> text tagType <> attributeList attribs <> text " />" - --- | Put the supplied contents between start and end tags of tagType. -inTagsSimple :: String -> Doc -> Doc -inTagsSimple tagType = inTags False tagType [] - --- | Put the supplied contents in indented block btw start and end tags. -inTagsIndented :: String -> Doc -> Doc -inTagsIndented tagType = inTags True tagType [] - --- --- Docbook writer --- - --- | Convert list of authors to a docbook <author> section -authorToDocbook :: [Char] -> Doc -authorToDocbook name = inTagsIndented "author" $ - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (joinWithSep " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) - --- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta title authors date) blocks) = - let head = if writerStandalone opts - then text (writerHeader opts) - else empty - meta = if writerStandalone opts - then inTagsIndented "articleinfo" $ - (inTagsSimple "title" (wrap opts title)) $$ - (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ escapeStringForXML date)) - else empty - elements = hierarchicalize blocks - before = writerIncludeBefore opts - after = writerIncludeAfter opts - body = (if null before then empty else text before) $$ - vcat (map (elementToDocbook opts) elements) $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "article" (meta $$ body) - else body - in render $ head $$ body' $$ text "" - --- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook opts (Blk block) = blockToDocbook opts block -elementToDocbook opts (Sec title elements) = - -- Docbook doesn't allow sections with no content, so insert some if needed - let elements' = if null elements - then [Blk (Para [])] - else elements - in inTagsIndented "section" $ - inTagsSimple "title" (wrap opts title) $$ - vcat (map (elementToDocbook opts) elements') - --- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) - --- | Auxiliary function to convert Plain block to Para. -plainToPara (Plain x) = Para x -plainToPara x = x - --- | Convert a list of pairs of terms and definitions into a list of --- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc -deflistItemsToDocbook opts items = - vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items - --- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc -deflistItemToDocbook opts term def = - let def' = map plainToPara def - in inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') - --- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items - --- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc -listItemToDocbook opts item = - inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item - --- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook opts Null = empty -blockToDocbook opts (Plain lst) = wrap opts lst -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst -blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook opts (CodeBlock str) = - text "<screen>\n" <> text (escapeStringForXML str) <> text "\n</screen>" -blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst -blockToDocbook opts (OrderedList _ []) = empty -blockToDocbook opts (OrderedList (start, numstyle, numdelim) (first:rest)) = - let attribs = case numstyle of - DefaultStyle -> [] - Decimal -> [("numeration", "arabic")] - UpperAlpha -> [("numeration", "upperalpha")] - LowerAlpha -> [("numeration", "loweralpha")] - UpperRoman -> [("numeration", "upperroman")] - LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest - in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook opts (RawHtml str) = text str -- raw XML block -blockToDocbook opts HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = - let alignStrings = map alignmentToString aligns - captionDoc = if null caption - then empty - else inTagsIndented "caption" - (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" - in inTagsIndented tableType $ captionDoc $$ - (colHeadsToDocbook opts alignStrings widths headers) $$ - (vcat $ map (tableRowToDocbook opts alignStrings) rows) - -colHeadsToDocbook opts alignStrings widths headers = - let heads = zipWith3 (\align width item -> - tableItemToDocbook opts "th" align width item) - alignStrings widths headers - in inTagsIndented "tr" $ vcat heads - -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ - vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols - -tableItemToDocbook opts tag align width item = - let attrib = [("align", align)] ++ - if width /= 0 - then [("style", "{width: " ++ - show (truncate (100*width)) ++ "%;}")] - else [] - in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item - --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts - then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) - else inlinesToDocbook opts lst - --- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst - --- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook opts (Str str) = text $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" $ inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] $ - inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToDocbook opts lst -inlineToDocbook opts Apostrophe = char '\'' -inlineToDocbook opts Ellipses = text "…" -inlineToDocbook opts EmDash = text "—" -inlineToDocbook opts EnDash = text "–" -inlineToDocbook opts (Code str) = - inTagsSimple "literal" $ text (escapeStringForXML str) -inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) -inlineToDocbook opts (HtmlInline str) = empty -inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>" -inlineToDocbook opts Space = char ' ' -inlineToDocbook opts (Link txt (src, tit)) = - if isPrefixOf "mailto:" src - then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ src' - in if txt == [Code src'] - then emailLink - else inlinesToDocbook opts txt <+> char '(' <> emailLink <> - char ')' - else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt -inlineToDocbook opts (Image alt (src, tit)) = - let titleDoc = if null tit - then empty - else inTagsIndented "objectinfo" $ - inTagsIndented "title" (text $ escapeStringForXML tit) - in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] -inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs deleted file mode 100644 index 7ec95d8ef..000000000 --- a/src/Text/Pandoc/Writers/HTML.hs +++ /dev/null @@ -1,458 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to HTML. --} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition -import Text.Pandoc.ASCIIMathML -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.Pandoc.Shared -import Text.Regex ( mkRegex, matchRegex ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import qualified Data.Set as S -import Control.Monad.State -import Text.XHtml.Transitional - -data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stIds :: [String] -- ^ List of header identifiers - , stMath :: Bool -- ^ Math is used in document - , stCSS :: S.Set String -- ^ CSS to include in header - } deriving Show - -defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stIds = [], - stMath = False, stCSS = S.empty} - --- Helpers to render HTML with the appropriate function. -render opts = if writerWrapText opts then renderHtml else showHtml -renderFragment opts = if writerWrapText opts - then renderHtmlFragment - else showHtmlFragment - --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts = - if writerStandalone opts - then render opts . writeHtml opts - else renderFragment opts . writeHtml opts - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) = - let titlePrefix = writerTitlePrefix opts - topTitle = evalState (inlineListToHtml opts tit) defaultWriterState - topTitle' = if null titlePrefix - then topTitle - else titlePrefix +++ " - " +++ topTitle - metadata = thetitle topTitle' +++ - meta ! [httpequiv "Content-Type", - content "text/html; charset=UTF-8"] +++ - meta ! [name "generator", content "pandoc"] +++ - (toHtmlFromList $ - map (\a -> meta ! [name "author", content a]) authors) +++ - (if null date - then noHtml - else meta ! [name "date", content date]) - titleHeader = if writerStandalone opts && not (null tit) && - not (writerS5 opts) - then h1 ! [theclass "title"] $ topTitle - else noHtml - headerBlocks = filter isHeaderBlock blocks - ids = uniqueIdentifiers $ - map (\(Header _ lst) -> lst) headerBlocks - toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks ids - else noHtml - (blocks', newstate) = - runState (blockListToHtml opts blocks) - (defaultWriterState {stIds = ids}) - cssLines = stCSS newstate - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath newstate - then case writerASCIIMathMLURL opts of - Just path -> script ! [src path, - thetype "text/javascript"] $ - noHtml - Nothing -> primHtml asciiMathMLScript - else noHtml - head = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes newstate) - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection opts notes +++ after - in if writerStandalone opts - then head +++ body thebody - else thebody - --- | Construct table of contents from list of header blocks and identifiers. --- Assumes there are as many identifiers as header blocks. -tableOfContents :: WriterOptions -> [Block] -> [String] -> Html -tableOfContents _ [] _ = noHtml -tableOfContents opts headers ids = - let opts' = opts { writerIgnoreNotes = True } - contentsTree = hierarchicalize headers - contents = evalState (mapM (elementToListItem opts') contentsTree) - (defaultWriterState {stIds = ids}) - in thediv ! [identifier "toc"] $ unordList contents - --- | Converts an Element to a list item for a table of contents, --- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState Html -elementToListItem opts (Blk _) = return noHtml -elementToListItem opts (Sec headerText subsecs) = do - st <- get - let ids = stIds st - let (id, rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - txt <- inlineListToHtml opts headerText - subHeads <- mapM (elementToListItem opts) subsecs - let subList = if null subHeads - then noHtml - else unordList subHeads - return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ - subList - --- | Convert list of Note blocks to a footnote <div>. --- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then noHtml - else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) - --- | Obfuscate a "mailto:" link using Javascript. -obfuscateLink :: WriterOptions -> String -> String -> Html -obfuscateLink opts text src = - let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$" - src' = map toLower src - in case (matchRegex emailRegex src') of - (Just [name, domain]) -> - let domain' = substitute "." " dot " domain - at' = obfuscateChar '@' - (linkText, altText) = - if text == drop 7 src' -- autolink - then ("'<code>'+e+'</code>'", name ++ " at " ++ domain') - else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++ - domain' ++ ")") - in if writerStrictMarkdown opts - then -- need to use primHtml or &'s are escaped to & in URL - primHtml $ "<a href=\"" ++ (obfuscateString src') - ++ "\">" ++ (obfuscateString text) ++ "</a>" - else (script ! [thetype "text/javascript"] $ - primHtml ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ - obfuscateString name ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ - noscript (primHtml $ obfuscateString altText) - _ -> anchor ! [href src] $ primHtml text -- malformed email - --- | Obfuscate character as entity. -obfuscateChar :: Char -> String -obfuscateChar char = - let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" - --- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences - --- | True if character is a punctuation character (unicode). -isPunctuation :: Char -> Bool -isPunctuation c = - let c' = ord c - in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || - c' >= 0xE000 && c' <= 0xE0FF - then True - else False - --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do - st <- get - let current = stCSS st - put $ st {stCSS = S.insert item current} - --- | Convert Pandoc inline list to plain text identifier. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) = - xAsText ++ inlineListToIdentifier xs - where xAsText = case x of - Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ - concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst - Note _ -> "" - --- | Return unique identifiers for list of inline lists. -uniqueIdentifiers :: [[Inline]] -> [String] -uniqueIdentifiers ls = - let addIdentifier (nonuniqueIds, uniqueIds) l = - let new = inlineListToIdentifier l - matches = length $ filter (== new) nonuniqueIds - new' = new ++ if matches > 0 then ("-" ++ show matches) else "" - in (new:nonuniqueIds, new':uniqueIds) - in reverse $ snd $ foldl addIdentifier ([],[]) ls - --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml opts Null = return $ noHtml -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) -blockToHtml opts (RawHtml str) = return $ primHtml str -blockToHtml opts (HorizontalRule) = return $ hr -blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n") - -- the final \n for consistency with Markdown.pl -blockToHtml opts (BlockQuote blocks) = - -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - if writerS5 opts - then let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) - (BulletList lst) - [OrderedList attribs lst] -> - blockToHtml (opts {writerIncremental = inc}) - (OrderedList attribs lst) - otherwise -> blockListToHtml opts blocks >>= - (return . blockquote) - else blockListToHtml opts blocks >>= (return . blockquote) -blockToHtml opts (Header level lst) = do - contents <- inlineListToHtml opts lst - st <- get - let ids = stIds st - let (id, rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) - then [] - else [identifier id] - let contents' = if writerTableOfContents opts - then anchor ! [href ("#TOC-" ++ id)] $ contents - else contents - return $ case level of - 1 -> h1 contents' ! attribs - 2 -> h2 contents' ! attribs - 3 -> h3 contents' ! attribs - 4 -> h4 contents' ! attribs - 5 -> h5 contents' ! attribs - 6 -> h6 contents' ! attribs - _ -> paragraph contents' ! attribs -blockToHtml opts (BulletList lst) = do - contents <- mapM (blockListToHtml opts) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ unordList ! attribs $ contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do - contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle - let attribs = (if writerIncremental opts - then [theclass "incremental"] - else []) ++ - (if startnum /= 1 - then [start startnum] - else []) ++ - (if numstyle /= DefaultStyle - then [theclass numstyle'] - else []) - if numstyle /= DefaultStyle - then addToCSS $ "ol." ++ numstyle' ++ - " { list-style-type: " ++ - numstyle' ++ "; }" - else return () - return $ ordList ! attribs $ contents -blockToHtml opts (DefinitionList lst) = do - contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term - def' <- blockListToHtml opts def - return $ (term', def')) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ defList ! attribs $ contents -blockToHtml opts (Table capt aligns widths headers rows) = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return noHtml - else inlineListToHtml opts capt >>= return . caption - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows' <- mapM (tableRowToHtml opts alignStrings) rows - return $ table $ captionDoc +++ colHeads +++ rows' - -colHeadsToHtml opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\align width item -> tableItemToHtml opts th align width item) - alignStrings widths headers - return $ tr $ toHtmlFromList heads - -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToHtml opts aligns cols = - (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>= - return . tr . toHtmlFromList - -tableItemToHtml opts tag align' width item = do - contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if width /= 0 - then [thestyle ("width: " ++ show (truncate (100*width)) ++ - "%;")] - else [] - return $ tag ! attrib $ contents - -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html -blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= return . toHtmlFromList - --- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst = - mapM (inlineToHtml opts) lst >>= return . toHtmlFromList - --- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = - case inline of - (Str str) -> return $ stringToHtml str - (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" - (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize - (Strong lst) -> inlineListToHtml opts lst >>= return . strong - (Code str) -> return $ thecode << str - (Strikeout lst) -> addToCSS - ".strikeout { text-decoration: line-through; }" >> - inlineListToHtml opts lst >>= - return . (thespan ! [theclass "strikeout"]) - (Superscript lst) -> inlineListToHtml opts lst >>= return . sup - (Subscript lst) -> inlineListToHtml opts lst >>= return . sub - (Quoted quoteType lst) -> - let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") - in do contents <- inlineListToHtml opts lst - return $ leftQuote +++ contents +++ rightQuote - (TeX str) -> (if writerUseASCIIMathML opts - then modify (\st -> st {stMath = True}) - else return ()) >> return (stringToHtml str) - (HtmlInline str) -> return $ primHtml str - (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src -> - return $ obfuscateLink opts str src - (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do - linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (show linkText) src - (Link txt (src,tit)) -> do - linkText <- inlineListToHtml opts txt - return $ anchor ! ([href src] ++ - if null tit then [] else [title tit]) $ - linkText - (Image txt (source,tit)) -> do - alternate <- inlineListToHtml opts txt - let alternate' = renderFragment opts alternate - let attributes = [src source] ++ - (if null tit - then [] - else [title tit]) ++ - if null txt - then [] - else [alt alternate'] - return $ image ! attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do - st <- get - let notes = stNotes st - let number = (length notes) + 1 - let ref = show number - htmlContents <- blockListToNote opts ref contents - -- push contents onto front of notes - put $ st {stNotes = (htmlContents:notes)} - return $ anchor ! [href ("#fn" ++ ref), - theclass "footnoteRef", - identifier ("fnref" ++ ref)] << - sup << ref - -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html -blockListToNote opts ref blocks = - -- If last block is Para or Plain, include the backlink at the end of - -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++ - "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] - blocks' = if null blocks - then [] - else let lastBlock = last blocks - otherBlocks = init blocks - in case lastBlock of - (Para lst) -> otherBlocks ++ - [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ - [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, - Plain backlink] - in do contents <- blockListToHtml opts blocks' - return $ li ! [identifier ("fn" ++ ref)] $ contents - diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs deleted file mode 100644 index f64e06e24..000000000 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ /dev/null @@ -1,310 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into LaTeX. --} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( (\\), isInfixOf, isSuffixOf, intersperse ) -import Data.Char ( toLower ) -import qualified Data.Set as S -import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) - -data WriterState = - WriterState { stIncludes :: S.Set String -- strings to include in header - , stInNote :: Bool -- @True@ if we're in a note - , stOLLevel :: Int } -- level of ordered list nesting - --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do - st <- get - let includes = stIncludes st - put st {stIncludes = S.insert str includes} - --- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String -writeLaTeX options document = - render $ evalState (pandocToLaTeX options document) $ - WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1 } - -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToLaTeX options (Pandoc meta blocks) = do - main <- blockListToLaTeX blocks - head <- if writerStandalone options - then latexHeader options meta - else return empty - let before = if null (writerIncludeBefore options) - then empty - else text (writerIncludeBefore options) - let after = if null (writerIncludeAfter options) - then empty - else text (writerIncludeAfter options) - let body = before $$ main $$ after - let toc = if writerTableOfContents options - then text "\\tableofcontents\n" - else empty - let foot = if writerStandalone options - then text "\\end{document}" - else empty - return $ head $$ toc $$ body $$ foot - --- | Insert bibliographic information into LaTeX header. -latexHeader :: WriterOptions -- ^ Options, including LaTeX header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -latexHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else inlineListToLaTeX title >>= return . inCmd "title" - headerIncludes <- get >>= return . S.toList . stIncludes - let extras = text $ unlines headerIncludes - let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes - then text "\\VerbatimFootnotes % allows verbatim text in footnotes" - else empty - let authorstext = text $ "\\author{" ++ - joinWithSep "\\\\" (map stringToLaTeX authors) ++ "}" - let datetext = if date == "" - then empty - else text $ "\\date{" ++ stringToLaTeX date ++ "}" - let maketitle = if null title then empty else text "\\maketitle" - let secnumline = if (writerNumberSections options) - then empty - else text "\\setcounter{secnumdepth}{0}" - let baseHeader = text $ writerHeader options - let header = baseHeader $$ extras - return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ - datetext $$ text "\\begin{document}" $$ maketitle $$ text "" - --- escape things as needed for LaTeX - -stringToLaTeX :: String -> String -stringToLaTeX = escapeStringUsing latexEscapes - where latexEscapes = backslashEscapes "{}$%&_#" ++ - [ ('^', "\\^{}") - , ('\\', "\\textbackslash{}") - , ('~', "\\ensuremath{\\sim}") - , ('|', "\\textbar{}") - , ('<', "\\textless{}") - , ('>', "\\textgreater{}") - ] - --- | Puts contents into LaTeX command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '\\' <> text cmd <> braces contents - --- | Remove all code elements from list of inline elements --- (because it's illegal to have verbatim inside some command arguments) -deVerb :: [Inline] -> [Inline] -deVerb [] = [] -deVerb ((Code str):rest) = - (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) -deVerb (other:rest) = other:(deVerb rest) - --- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: Block -- ^ Block to convert - -> State WriterState Doc -blockToLaTeX Null = return empty -blockToLaTeX (Plain lst) = wrapped inlineListToLaTeX lst >>= return -blockToLaTeX (Para lst) = - wrapped inlineListToLaTeX lst >>= return . (<> char '\n') -blockToLaTeX (BlockQuote lst) = do - contents <- blockListToLaTeX lst - return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" -blockToLaTeX (CodeBlock str) = do - st <- get - env <- if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - return "Verbatim" - else return "verbatim" - return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> - text ("\n\\end{" ++ env ++ "}") -blockToLaTeX (RawHtml str) = return empty -blockToLaTeX (BulletList lst) = do - items <- mapM listItemToLaTeX lst - return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" -blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do - st <- get - let oldlevel = stOLLevel st - put $ st {stOLLevel = oldlevel + 1} - items <- mapM listItemToLaTeX lst - modify (\st -> st {stOLLevel = oldlevel}) - exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim - then do addToHeader "\\usepackage{enumerate}" - return $ char '[' <> - text (head (orderedListMarkers (1, numstyle, - numdelim))) <> char ']' - else return empty - let resetcounter = if start /= 1 && oldlevel <= 4 - then text $ "\\setcounter{enum" ++ - map toLower (toRomanNumeral oldlevel) ++ - "}{" ++ show (start - 1) ++ "}" - else empty - return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ - vcat items $$ text "\\end{enumerate}" -blockToLaTeX (DefinitionList lst) = do - items <- mapM defListItemToLaTeX lst - return $ text "\\begin{description}" $$ vcat items $$ - text "\\end{description}" -blockToLaTeX HorizontalRule = return $ text $ - "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" -blockToLaTeX (Header level lst) = do - txt <- inlineListToLaTeX (deVerb lst) - return $ if (level > 0) && (level <= 3) - then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++ - "section{") <> txt <> text "}\n" - else txt <> char '\n' -blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- tableRowToLaTeX heads - captionText <- inlineListToLaTeX caption - rows' <- mapM tableRowToLaTeX rows - let colWidths = map (printf "%.2f") widths - let colDescriptors = concat $ zipWith - (\width align -> ">{\\PBS" ++ - (case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright") ++ - "\\hspace{0pt}}p{" ++ width ++ - "\\columnwidth}") - colWidths aligns - let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ - headers $$ text "\\hline" $$ vcat rows' $$ - text "\\end{tabular}" - let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" - addToHeader "\\usepackage{array}\n\ - \% This is needed because raggedright in table elements redefines \\\\:\n\ - \\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n\ - \\\let\\PBS=\\PreserveBackslash" - return $ if isEmpty captionText - then centered tableBody <> char '\n' - else text "\\begin{table}[h]" $$ centered tableBody $$ - inCmd "caption" captionText $$ text "\\end{table}\n" - -blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat - -tableRowToLaTeX cols = mapM blockListToLaTeX cols >>= - return . ($$ text "\\\\") . foldl (\row item -> row $$ - (if isEmpty row then empty else text " & ") <> item) empty - -listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item " $$) . - (nest 2) - -defListItemToLaTeX (term, def) = do - term' <- inlineListToLaTeX $ deVerb term - def' <- blockListToLaTeX def - return $ text "\\item[" <> term' <> text "]" $$ def' - --- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True -isQuoted _ = False - --- | Convert inline element to LaTeX -inlineToLaTeX :: Inline -- ^ Inline to convert - -> State WriterState Doc -inlineToLaTeX (Emph lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" -inlineToLaTeX (Strong lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" -inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX $ deVerb lst - addToHeader "\\usepackage[normalem]{ulem}" - return $ inCmd "sout" contents -inlineToLaTeX (Superscript lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do - contents <- inlineListToLaTeX $ deVerb lst - -- oddly, latex includes \textsuperscript but not \textsubscript - -- so we have to define it: - addToHeader "\\newcommand{\\textsubscript}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" - return $ inCmd "textsubscript" contents -inlineToLaTeX (Code str) = do - st <- get - if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - else return () - let chr = ((enumFromTo '!' '~') \\ str) !! 0 - return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] -inlineToLaTeX (Quoted SingleQuote lst) = do - contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," - else empty - return $ char '`' <> s1 <> contents <> s2 <> char '\'' -inlineToLaTeX (Quoted DoubleQuote lst) = do - contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," - else empty - return $ text "``" <> s1 <> contents <> s2 <> text "''" -inlineToLaTeX Apostrophe = return $ char '\'' -inlineToLaTeX EmDash = return $ text "---" -inlineToLaTeX EnDash = return $ text "--" -inlineToLaTeX Ellipses = return $ text "\\ldots{}" -inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str -inlineToLaTeX (TeX str) = return $ text str -inlineToLaTeX (HtmlInline str) = return empty -inlineToLaTeX (LineBreak) = return $ text "\\\\" -inlineToLaTeX Space = return $ char ' ' -inlineToLaTeX (Link txt (src, _)) = do - addToHeader "\\usepackage[breaklinks=true]{hyperref}" - case txt of - [Code x] | x == src -> -- autolink - do addToHeader "\\usepackage{url}" - return $ text $ "\\url{" ++ x ++ "}" - _ -> do contents <- inlineListToLaTeX $ deVerb txt - return $ text ("\\href{" ++ src ++ "}{") <> contents <> - char '}' -inlineToLaTeX (Image alternate (source, tit)) = do - addToHeader "\\usepackage{graphicx}" - return $ text $ "\\includegraphics{" ++ source ++ "}" -inlineToLaTeX (Note contents) = do - st <- get - put (st {stInNote = True}) - contents' <- blockListToLaTeX contents - modify (\st -> st {stInNote = False}) - let rawnote = stripTrailingNewlines $ render contents' - -- note: a \n before } is needed when note ends with a Verbatim environment - let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote - return $ text "%\n\\footnote{" <> - text rawnote <> (if optNewline then char '\n' else empty) <> char '}' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs deleted file mode 100644 index 8e14c2bf0..000000000 --- a/src/Text/Pandoc/Writers/Man.hs +++ /dev/null @@ -1,293 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to groff man page format. - --} -module Text.Pandoc.Writers.Man ( writeMan) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( isPrefixOf, drop, nub, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Preprocessors = [String] -- e.g. "t" for tbl -type WriterState = (Notes, Preprocessors) - --- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) - --- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMan opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - (head, foot) <- metaToMan opts meta - body <- blockListToMan opts blocks - (notes, preprocessors) <- get - let preamble = if null preprocessors || not (writerStandalone opts) - then empty - else text $ ".\\\" " ++ concat (nub preprocessors) - notes' <- notesToMan opts (reverse notes) - return $ preamble $$ head $$ before' $$ body $$ notes' $$ foot $$ after' - --- | Insert bibliographic information into Man header and footer. -metaToMan :: WriterOptions -- ^ Options, including Man header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState (Doc, Doc) -metaToMan options (Meta title authors date) = do - titleText <- inlineListToMan options title - let (cmdName, rest) = break (== ' ') $ render titleText - let (title', section) = case reverse cmdName of - (')':d:'(':xs) | d `elem` ['0'..'9'] -> - (text (reverse xs), char d) - xs -> (text (reverse xs), doubleQuotes empty) - let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ - splitBy '|' rest - let head = (text ".TH") <+> title' <+> section <+> - doubleQuotes (text date) <+> hsep extras - let foot = case length authors of - 0 -> empty - 1 -> text ".SH AUTHOR" $$ (text $ joinWithSep ", " authors) - 2 -> text ".SH AUTHORS" $$ (text $ joinWithSep ", " authors) - return $ if writerStandalone options - then (head, foot) - else (empty, empty) - --- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMan opts notes = - if null notes - then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= - return . (text ".SH NOTES" $$) . vcat - --- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMan opts num note = do - contents <- blockListToMan opts note - let marker = text "\n.SS [" <> text (show num) <> char ']' - return $ marker $$ contents - --- | Association list of characters to escape. -manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "\".@\\" - --- | Escape special characters for Man. -escapeString :: String -> String -escapeString = escapeStringUsing manEscapes - --- | Escape a literal (code) section for Man. -escapeCode :: String -> String -escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") - --- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToMan opts Null = return empty -blockToMan opts (Plain inlines) = - wrapIfNeeded opts (inlineListToMan opts) inlines -blockToMan opts (Para inlines) = do - contents <- wrapIfNeeded opts (inlineListToMan opts) inlines - return $ text ".PP" $$ contents -blockToMan opts (RawHtml str) = return $ text str -blockToMan opts HorizontalRule = return $ text $ ".PP\n * * * * *" -blockToMan opts (Header level inlines) = do - contents <- inlineListToMan opts inlines - let heading = case level of - 1 -> ".SH " - _ -> ".SS " - return $ text heading <> contents -blockToMan opts (CodeBlock str) = return $ - text ".PP" $$ text "\\f[CR]" $$ - text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" -blockToMan opts (BlockQuote blocks) = do - contents <- blockListToMan opts blocks - return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) = - let aligncode AlignLeft = "l" - aligncode AlignRight = "r" - aligncode AlignCenter = "c" - aligncode AlignDefault = "l" - in do - caption' <- inlineListToMan opts caption - modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) - let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths - -- 78n default width - 8n indent = 70n - let coldescriptions = text $ joinWithSep " " - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." - colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ - text "T}" - let colheadings' = makeRow colheadings - body <- mapM (\row -> do - cols <- mapM (blockListToMan opts) row - return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ char '_' $$ vcat body $$ text ".TE" - -blockToMan opts (BulletList items) = do - contents <- mapM (bulletListItemToMan opts) items - return (vcat contents) -blockToMan opts (OrderedList attribs items) = do - let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + (maximum $ map length markers) - contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ - zip markers items - return (vcat contents) -blockToMan opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMan opts) items - return (vcat contents) - --- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMan opts [] = return empty -bulletListItemToMan opts ((Para first):rest) = - bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do - first' <- blockToMan opts (Plain first) - rest' <- blockListToMan opts rest - let first'' = text ".IP \\[bu] 2" $$ first' - let rest'' = if null rest - then empty - else text ".RS 2" $$ rest' $$ text ".RE" - return (first'' $$ rest'') -bulletListItemToMan opts (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" - --- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = - orderedListItemToMan opts num indent ((Plain first):rest) -orderedListItemToMan opts num indent (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' - let rest'' = if null rest - then empty - else text ".RS 4" $$ rest' $$ text ".RE" - return $ first'' $$ rest'' - --- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions - -> ([Inline],[Block]) - -> State WriterState Doc -definitionListItemToMan opts (label, items) = do - labelText <- inlineListToMan opts label - contents <- if null items - then return empty - else do - let (first, rest) = case items of - ((Para x):y) -> (Plain x,y) - (x:y) -> (x,y) - rest' <- mapM (\item -> blockToMan opts item) - rest >>= (return . vcat) - first' <- blockToMan opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP\n.B " <> labelText $+$ contents - --- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToMan opts blocks = - mapM (blockToMan opts) blocks >>= (return . vcat) - --- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) - --- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc -inlineToMan opts (Emph lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[B]" <> contents <> text "\\f[]" -inlineToMan opts (Strikeout lst) = do - contents <- inlineListToMan opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToMan opts (Superscript lst) = do - contents <- inlineListToMan opts lst - return $ char '^' <> contents <> char '^' -inlineToMan opts (Subscript lst) = do - contents <- inlineListToMan opts lst - return $ char '~' <> contents <> char '~' -inlineToMan opts (Quoted SingleQuote lst) = do - contents <- inlineListToMan opts lst - return $ char '`' <> contents <> char '\'' -inlineToMan opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMan opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" -inlineToMan opts EmDash = return $ text "\\[em]" -inlineToMan opts EnDash = return $ text "\\[en]" -inlineToMan opts Apostrophe = return $ char '\'' -inlineToMan opts Ellipses = return $ text "\\&..." -inlineToMan opts (Code str) = - return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" -inlineToMan opts (Str str) = return $ text $ escapeString str -inlineToMan opts (TeX str) = return $ text $ escapeCode str -inlineToMan opts (HtmlInline str) = return $ text $ escapeCode str -inlineToMan opts (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" -inlineToMan opts Space = return $ char ' ' -inlineToMan opts (Link txt (src, _)) = do - linktext <- inlineListToMan opts txt - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ if txt == [Code srcSuffix] - then char '<' <> text srcSuffix <> char '>' - else linktext <> text " (" <> text src <> char ')' -inlineToMan opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] - else alternate - linkPart <- inlineToMan opts (Link txt (source, tit)) - return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan opts (Note contents) = do - modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state - (notes, _) <- get - let ref = show $ (length notes) - return $ char '[' <> text ref <> char ']' - diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs deleted file mode 100644 index 4cecaae5d..000000000 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ /dev/null @@ -1,373 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to markdown-formatted plain text. - -Markdown: <http://daringfireball.net/projects/markdown/> --} -module Text.Pandoc.Writers.Markdown ( writeMarkdown) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Blocks -import Text.ParserCombinators.Parsec ( parse, (<|>), GenParser ) -import Data.List ( group, isPrefixOf, drop, find, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Refs = KeyTable -type WriterState = (Notes, Refs) - --- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown opts document = - render $ evalState (pandocToMarkdown opts document) ([],[]) - --- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMarkdown opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - metaBlock <- metaToMarkdown opts meta - let head = if writerStandalone opts - then metaBlock $+$ text (writerHeader opts) - else empty - let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks - else empty - body <- blockListToMarkdown opts blocks - (notes, _) <- get - notes' <- notesToMarkdown opts (reverse notes) - (_, refs) <- get -- note that the notes may contain refs - refs' <- keyTableToMarkdown opts (reverse refs) - return $ head $+$ before' $+$ toc $+$ body $+$ text "" $+$ - notes' $+$ text "" $+$ refs' $+$ after' - --- | Return markdown representation of reference key table. -keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat - --- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions - -> ([Inline], (String, String)) - -> State WriterState Doc -keyToMarkdown opts (label, (src, tit)) = do - label' <- inlineListToMarkdown opts label - let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" - return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> - text src <> tit' - --- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMarkdown opts notes = - mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= - return . vcat - --- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMarkdown opts num blocks = do - contents <- blockListToMarkdown opts blocks - let marker = text "[^" <> text (show num) <> text "]:" - return $ hang marker (writerTabStop opts) contents - --- | Escape special characters for Markdown. -escapeString :: String -> String -escapeString = escapeStringUsing markdownEscapes - where markdownEscapes = ('\160', " "):(backslashEscapes "`<\\*_^~") - --- | Convert bibliographic information into Markdown header. -metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc -metaToMarkdown opts (Meta title authors date) = do - title' <- titleToMarkdown opts title - authors' <- authorsToMarkdown authors - date' <- dateToMarkdown date - return $ title' $+$ authors' $+$ date' - -titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -titleToMarkdown opts [] = return empty -titleToMarkdown opts lst = do - contents <- inlineListToMarkdown opts lst - return $ text "% " <> contents - -authorsToMarkdown :: [String] -> State WriterState Doc -authorsToMarkdown [] = return empty -authorsToMarkdown lst = return $ - text "% " <> text (joinWithSep ", " (map escapeString lst)) - -dateToMarkdown :: String -> State WriterState Doc -dateToMarkdown [] = return empty -dateToMarkdown str = return $ text "% " <> text (escapeString str) - --- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc -tableOfContents opts headers = - let opts' = opts { writerIgnoreNotes = True } - contents = BulletList $ map elementToListItem $ hierarchicalize headers - in evalState (blockToMarkdown opts' contents) ([],[]) - --- | Converts an Element to a list item for a table of contents, -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec headerText subsecs) = [Plain headerText] ++ - if null subsecs - then [] - else [BulletList $ map elementToListItem subsecs] - --- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char st Char -olMarker = do (start, style, delim) <- anyOrderedListMarker - if delim == Period && - (style == UpperAlpha || (style == UpperRoman && - start `elem` [1, 5, 10, 50, 100, 500, 1000])) - then spaceChar >> spaceChar - else spaceChar - --- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case parse olMarker "para start" str of - Left _ -> False - Right _ -> True - -wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedMarkdown opts inlines = do - let chunks = splitBy LineBreak inlines - let chunks' = if null chunks - then [] - else (map (++ [Str " "]) $ init chunks) ++ [last chunks] - lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' - return $ vcat lns - --- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToMarkdown opts Null = return empty -blockToMarkdown opts (Plain inlines) = - wrappedMarkdown opts inlines -blockToMarkdown opts (Para inlines) = do - contents <- wrappedMarkdown opts inlines - -- escape if para starts with ordered list marker - let esc = if (not (writerStrictMarkdown opts)) && - beginsWithOrderedListMarker (render contents) - then char '\\' - else empty - return $ esc <> contents <> text "\n" -blockToMarkdown opts (RawHtml str) = return $ text str -blockToMarkdown opts HorizontalRule = return $ text "\n* * * * *\n" -blockToMarkdown opts (Header level inlines) = do - contents <- inlineListToMarkdown opts inlines - return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" -blockToMarkdown opts (CodeBlock str) = return $ - (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" -blockToMarkdown opts (BlockQuote blocks) = do - contents <- blockListToMarkdown opts blocks - return $ (vcat $ map (text . ("> " ++)) $ lines $ render contents) <> - text "\n" -blockToMarkdown opts (Table caption aligns widths headers rows) = do - caption' <- inlineListToMarkdown opts caption - let caption'' = if null caption - then empty - else text "" $+$ (text "Table: " <> caption') - headers' <- mapM (blockListToMarkdown opts) headers - let widthsInChars = map (floor . (78 *)) widths - let alignHeader alignment = case alignment of - AlignLeft -> leftAlignBlock - AlignCenter -> centerAlignBlock - AlignRight -> rightAlignBlock - AlignDefault -> leftAlignBlock - let makeRow = hsepBlocks . (zipWith alignHeader aligns) . - (zipWith docToBlock widthsInChars) - let head = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row - return $ makeRow cols) rows - let tableWidth = sum widthsInChars - let maxRowHeight = maximum $ map heightOfBlock (head:rows') - let isMultilineTable = maxRowHeight > 1 - let underline = hsep $ - map (\width -> text $ replicate width '-') widthsInChars - let border = if isMultilineTable - then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' - else empty - let spacer = if isMultilineTable - then text "" - else empty - let body = vcat $ intersperse spacer $ map blockToDoc rows' - return $ (nest 2 $ border $+$ (blockToDoc head) $+$ underline $+$ body $+$ - border $+$ caption'') <> text "\n" -blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" -blockToMarkdown opts (OrderedList attribs items) = do - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ (vcat contents) <> text "\n" -blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMarkdown opts items = do - contents <- blockListToMarkdown opts items - return $ hang (text "- ") (writerTabStop opts) contents - --- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToMarkdown opts marker items = do - contents <- blockListToMarkdown opts items - -- The complexities here are needed to ensure that if the list - -- marker is 4 characters or longer, the second and following - -- lines are indented 4 spaces but the list item begins after the marker. - return $ sep [nest (min (3 - length marker) 0) (text marker), - nest (writerTabStop opts) contents] - --- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions - -> ([Inline],[Block]) - -> State WriterState Doc -definitionListItemToMarkdown opts (label, items) = do - labelText <- inlineListToMarkdown opts label - let tabStop = writerTabStop opts - let leader = char ':' - contents <- mapM (\item -> blockToMarkdown opts item >>= - (\txt -> return (leader $$ nest tabStop txt))) - items >>= return . vcat - return $ labelText $+$ contents - --- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) blocks >>= return . vcat - --- | Get reference for target; if none exists, create unique one and return. --- Prefer label if possible; otherwise, generate a unique key. -getReference :: [Inline] -> Target -> State WriterState [Inline] -getReference label (src, tit) = do - (_,refs) <- get - case find ((== (src, tit)) . snd) refs of - Just (ref, _) -> return ref - Nothing -> do - let label' = case find ((== label) . fst) refs of - Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst refs))) [1..10000] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label - modify (\(notes, refs) -> (notes, (label', (src,tit)):refs)) - return label' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . hcat - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Emph lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '*' <> contents <> char '*' -inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ text "**" <> contents <> text "**" -inlineToMarkdown opts (Strikeout lst) = do - contents <- inlineListToMarkdown opts lst - return $ text "~~" <> contents <> text "~~" -inlineToMarkdown opts (Superscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '^' <> contents' <> char '^' -inlineToMarkdown opts (Subscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '~' <> contents' <> char '~' -inlineToMarkdown opts (Quoted SingleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '\'' <> contents <> char '\'' -inlineToMarkdown opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '"' <> contents <> char '"' -inlineToMarkdown opts EmDash = return $ text "--" -inlineToMarkdown opts EnDash = return $ char '-' -inlineToMarkdown opts Apostrophe = return $ char '\'' -inlineToMarkdown opts Ellipses = return $ text "..." -inlineToMarkdown opts (Code str) = - let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups - then 0 - else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " in - return $ text (marker ++ spacer ++ str ++ spacer ++ marker) -inlineToMarkdown opts (Str str) = return $ text $ escapeString str -inlineToMarkdown opts (TeX str) = return $ text str -inlineToMarkdown opts (HtmlInline str) = return $ text str -inlineToMarkdown opts (LineBreak) = return $ text " \n" -inlineToMarkdown opts Space = return $ char ' ' -inlineToMarkdown opts (Link txt (src, tit)) = do - linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useRefLinks = writerReferenceLinks opts - let useAuto = null tit && txt == [Code srcSuffix] - ref <- if useRefLinks then getReference txt (src, tit) else return [] - reftext <- inlineListToMarkdown opts ref - return $ if useAuto - then char '<' <> text srcSuffix <> char '>' - else if useRefLinks - then let first = char '[' <> linktext <> char ']' - second = if txt == ref - then text "[]" - else char '[' <> reftext <> char ']' - in first <> second - else char '[' <> linktext <> char ']' <> - char '(' <> text src <> linktitle <> char ')' -inlineToMarkdown opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] - else alternate - linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ char '!' <> linkPart -inlineToMarkdown opts (Note contents) = do - modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state - (notes, _) <- get - let ref = show $ (length notes) - return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs deleted file mode 100644 index ddcbf95c0..000000000 --- a/src/Text/Pandoc/Writers/RST.hs +++ /dev/null @@ -1,325 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to reStructuredText. - -reStructuredText: <http://docutils.sourceforge.net/rst.html> --} -module Text.Pandoc.Writers.RST ( writeRST) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Blocks -import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Refs = KeyTable -type WriterState = (Notes, Refs, Refs) -- first Refs is links, second pictures - --- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = - render $ evalState (pandocToRST opts document) ([],[],[]) - --- | Return RST representation of document. -pandocToRST :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToRST opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - before' = if null before then empty else text before - after' = if null after then empty else text after - metaBlock <- metaToRST opts meta - let head = if (writerStandalone opts) - then metaBlock $+$ text (writerHeader opts) - else empty - body <- blockListToRST opts blocks - (notes, _, _) <- get - notes' <- notesToRST opts (reverse notes) - (_, refs, pics) <- get -- note that the notes may contain refs - refs' <- keyTableToRST opts (reverse refs) - pics' <- pictTableToRST opts (reverse pics) - return $ head $+$ before' $+$ body $+$ notes' $+$ text "" $+$ refs' $+$ - pics' $+$ after' - --- | Return RST representation of reference key table. -keyTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToRST opts refs = mapM (keyToRST opts) refs >>= return . vcat - --- | Return RST representation of a reference key. -keyToRST :: WriterOptions - -> ([Inline], (String, String)) - -> State WriterState Doc -keyToRST opts (label, (src, tit)) = do - label' <- inlineListToRST opts label - let label'' = if ':' `elem` (render label') - then char '`' <> label' <> char '`' - else label' - return $ text ".. _" <> label'' <> text ": " <> text src - --- | Return RST representation of notes. -notesToRST :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToRST opts notes = - mapM (\(num, note) -> noteToRST opts num note) (zip [1..] notes) >>= - return . vcat - --- | Return RST representation of a note. -noteToRST :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToRST opts num note = do - contents <- blockListToRST opts note - let marker = text ".. [" <> text (show num) <> text "] " - return $ hang marker 3 contents - --- | Return RST representation of picture reference table. -pictTableToRST :: WriterOptions -> KeyTable -> State WriterState Doc -pictTableToRST opts refs = mapM (pictToRST opts) refs >>= return . vcat - --- | Return RST representation of a picture substitution reference. -pictToRST :: WriterOptions - -> ([Inline], (String, String)) - -> State WriterState Doc -pictToRST opts (label, (src, _)) = do - label' <- inlineListToRST opts label - return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> - text src - --- | Take list of inline elements and return wrapped doc. -wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedRST opts inlines = mapM (wrapIfNeeded opts (inlineListToRST opts)) - (splitBy LineBreak inlines) >>= return . vcat - --- | Escape special characters for RST. -escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "`\\|*_") - --- | Convert bibliographic information into RST header. -metaToRST :: WriterOptions -> Meta -> State WriterState Doc -metaToRST opts (Meta title authors date) = do - title' <- titleToRST opts title - authors' <- authorsToRST authors - date' <- dateToRST date - let toc = if writerTableOfContents opts - then text "" $+$ text ".. contents::" - else empty - return $ title' $+$ authors' $+$ date' $+$ toc - -titleToRST :: WriterOptions -> [Inline] -> State WriterState Doc -titleToRST opts [] = return empty -titleToRST opts lst = do - contents <- inlineListToRST opts lst - let titleLength = length $ render contents - let border = text (replicate titleLength '=') - return $ border $+$ contents $+$ border <> text "\n" - -authorsToRST :: [String] -> State WriterState Doc -authorsToRST [] = return empty -authorsToRST (first:rest) = do - rest' <- authorsToRST rest - return $ (text ":Author: " <> text first) $+$ rest' - -dateToRST :: String -> State WriterState Doc -dateToRST [] = return empty -dateToRST str = return $ text ":Date: " <> text (escapeString str) - --- | Convert Pandoc block element to RST. -blockToRST :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToRST opts Null = return empty -blockToRST opts (Plain inlines) = wrappedRST opts inlines -blockToRST opts (Para [TeX str]) = - let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in - return $ hang (text "\n.. raw:: latex\n") 3 $ vcat $ map text (lines str') -blockToRST opts (Para inlines) = do - contents <- wrappedRST opts inlines - return $ contents <> text "\n" -blockToRST opts (RawHtml str) = - let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in - return $ hang (text "\n.. raw:: html\n") 3 $ vcat $ map text (lines str') -blockToRST opts HorizontalRule = return $ text "--------------\n" -blockToRST opts (Header level inlines) = do - contents <- inlineListToRST opts inlines - let headerLength = length $ render contents - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate headerLength headerChar - return $ contents $+$ border <> text "\n" -blockToRST opts (CodeBlock str) = return $ (text "::\n") $+$ - (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" -blockToRST opts (BlockQuote blocks) = do - contents <- blockListToRST opts blocks - return $ (nest (writerTabStop opts) contents) <> text "\n" -blockToRST opts (Table caption aligns widths headers rows) = do - caption' <- inlineListToRST opts caption - let caption'' = if null caption - then empty - else text "" $+$ (text "Table: " <> caption') - headers' <- mapM (blockListToRST opts) headers - let widthsInChars = map (floor . (78 *)) widths - let alignHeader alignment = case alignment of - AlignLeft -> leftAlignBlock - AlignCenter -> centerAlignBlock - AlignRight -> rightAlignBlock - AlignDefault -> leftAlignBlock - let hpipeBlocks blocks = hcatBlocks [beg, middle, end] - where height = maximum (map heightOfBlock blocks) - sep = TextBlock 3 height (replicate height " | ") - beg = TextBlock 2 height (replicate height "| ") - end = TextBlock 2 height (replicate height " |") - middle = hcatBlocks $ intersperse sep blocks - let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars - let head = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM (blockListToRST opts) row - return $ makeRow cols) rows - let tableWidth = sum widthsInChars - let maxRowHeight = maximum $ map heightOfBlock (head:rows') - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') $ map blockToDoc rows' - return $ border '-' $+$ blockToDoc head $+$ border '=' $+$ body $+$ - border '-' $$ caption'' $$ text "" -blockToRST opts (BulletList items) = do - contents <- mapM (bulletListItemToRST opts) items - -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" -blockToRST opts (OrderedList (start, style, delim) items) = do - let markers = if start == 1 && style == DefaultStyle && delim == DefaultDelim - then take (length items) $ repeat "#." - else take (length items) $ orderedListMarkers - (start, style, delim) - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToRST opts item num) $ - zip markers' items - -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" -blockToRST opts (DefinitionList items) = do - contents <- mapM (definitionListItemToRST opts) items - return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToRST opts items = do - contents <- blockListToRST opts items - return $ hang (text "- ") 3 contents - --- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: WriterOptions -- ^ options - -> String -- ^ marker for list item - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToRST opts marker items = do - contents <- blockListToRST opts items - return $ hang (text marker) (length marker + 1) contents - --- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: WriterOptions -> ([Inline], [Block]) -> State WriterState Doc -definitionListItemToRST opts (label, items) = do - label <- inlineListToRST opts label - contents <- blockListToRST opts items - return $ label $+$ nest (writerTabStop opts) contents - --- | Convert list of Pandoc block elements to RST. -blockListToRST :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToRST opts blocks = - mapM (blockToRST opts) blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToRST opts lst = mapM (inlineToRST opts) lst >>= return . hcat - --- | Convert Pandoc inline element to RST. -inlineToRST :: WriterOptions -> Inline -> State WriterState Doc -inlineToRST opts (Emph lst) = do - contents <- inlineListToRST opts lst - return $ char '*' <> contents <> char '*' -inlineToRST opts (Strong lst) = do - contents <- inlineListToRST opts lst - return $ text "**" <> contents <> text "**" -inlineToRST opts (Strikeout lst) = do - contents <- inlineListToRST opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToRST opts (Superscript lst) = do - contents <- inlineListToRST opts lst - return $ text "\\ :sup:`" <> contents <> text "`\\ " -inlineToRST opts (Subscript lst) = do - contents <- inlineListToRST opts lst - return $ text "\\ :sub:`" <> contents <> text "`\\ " -inlineToRST opts (Quoted SingleQuote lst) = do - contents <- inlineListToRST opts lst - return $ char '\'' <> contents <> char '\'' -inlineToRST opts (Quoted DoubleQuote lst) = do - contents <- inlineListToRST opts lst - return $ char '"' <> contents <> char '"' -inlineToRST opts EmDash = return $ text "--" -inlineToRST opts EnDash = return $ char '-' -inlineToRST opts Apostrophe = return $ char '\'' -inlineToRST opts Ellipses = return $ text "..." -inlineToRST opts (Code str) = return $ text $ "``" ++ str ++ "``" -inlineToRST opts (Str str) = return $ text $ escapeString str -inlineToRST opts (TeX str) = return $ text str -inlineToRST opts (HtmlInline str) = return empty -inlineToRST opts (LineBreak) = return $ char ' ' -- RST doesn't have linebreaks -inlineToRST opts Space = return $ char ' ' -inlineToRST opts (Link [Code str] (src, tit)) | src == str || - src == "mailto:" ++ str = do - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text srcSuffix -inlineToRST opts (Link txt (src, tit)) = do - let useReferenceLinks = writerReferenceLinks opts - linktext <- inlineListToRST opts $ normalizeSpaces txt - if useReferenceLinks - then do (notes, refs, pics) <- get - let refs' = if (txt, (src, tit)) `elem` refs - then refs - else (txt, (src, tit)):refs - put (notes, refs', pics) - return $ char '`' <> linktext <> text "`_" - else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" -inlineToRST opts (Image alternate (source, tit)) = do - (notes, refs, pics) <- get - let labelsUsed = map fst pics - let txt = if null alternate || alternate == [Str ""] || - alternate `elem` labelsUsed - then [Str $ "image" ++ show (length refs)] - else alternate - let pics' = if (txt, (source, tit)) `elem` pics - then pics - else (txt, (source, tit)):pics - put (notes, refs, pics') - label <- inlineListToRST opts txt - return $ char '|' <> label <> char '|' -inlineToRST opts (Note contents) = do - -- add to notes in state - modify (\(notes, refs, pics) -> (contents:notes, refs, pics)) - (notes, _, _) <- get - let ref = show $ (length notes) - return $ text " [" <> text ref <> text "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs deleted file mode 100644 index 3bd5c63b2..000000000 --- a/src/Text/Pandoc/Writers/RTF.hs +++ /dev/null @@ -1,286 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to RTF (rich text format). --} -module Text.Pandoc.Writers.RTF ( writeRTF ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Regex ( matchRegexAll, mkRegex ) -import Data.List ( isSuffixOf ) -import Data.Char ( ord ) - --- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta blocks) = - let head = if writerStandalone options - then rtfHeader (writerHeader options) meta - else "" - toc = if writerTableOfContents options - then tableOfContents $ filter isHeaderBlock blocks - else "" - foot = if writerStandalone options then "\n}\n" else "" - body = writerIncludeBefore options ++ - concatMap (blockToRTF 0 AlignDefault) blocks ++ - writerIncludeAfter options - in head ++ toc ++ body ++ foot - --- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String -tableOfContents headers = - let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 [Str "Contents"], - BulletList (map elementToListItem contentsTree)] - -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ - if null subsecs - then [] - else [BulletList (map elementToListItem subsecs)] - --- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = - if ord c > 127 - then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs - else c:(handleUnicode cs) - --- | Escape special characters. -escapeSpecial :: String -> String -escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) - --- | Escape strings as needed for rich text format. -stringToRTF :: String -> String -stringToRTF = handleUnicode . escapeSpecial - --- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = joinWithSep "\\line\n" $ lines (stringToRTF str) - --- | Deal with raw LaTeX. -latexToRTF :: String -> String -latexToRTF str = "{\\cf1 " ++ (stringToRTF str) ++ "\\cf0 } " - --- | Make a paragraph with first-line indent, block indent, and space after. -rtfParSpaced :: Int -- ^ space after (in twips) - -> Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfParSpaced spaceAfter indent firstLineIndent alignment content = - let alignString = case alignment of - AlignLeft -> "\\ql " - AlignRight -> "\\qr " - AlignCenter -> "\\qc " - AlignDefault -> "\\ql " - in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" - --- | Default paragraph. -rtfPar :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfPar = rtfParSpaced 180 - --- | Compact paragraph (e.g. for compact list items). -rtfCompact :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfCompact = rtfParSpaced 0 - --- number of twips to indent -indentIncrement = 720 -listIncrement = 360 - --- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String -bulletMarker indent = case indent `mod` 720 of - 0 -> "\\bullet " - otherwise -> "\\endash " - --- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) = - if style == DefaultStyle && delim == DefaultDelim - then case indent `mod` 720 of - 0 -> orderedListMarkers (start, Decimal, Period) - otherwise -> orderedListMarkers (start, LowerAlpha, Period) - else orderedListMarkers (start, style, delim) - --- | Returns RTF header. -rtfHeader :: String -- ^ header text - -> Meta -- ^ bibliographic information - -> String -rtfHeader headerText (Meta title authors date) = - let titletext = if null title - then "" - else rtfPar 0 0 AlignCenter $ - "\\b \\fs36 " ++ inlineListToRTF title - authorstext = if null authors - then "" - else rtfPar 0 0 AlignCenter (" " ++ (joinWithSep "\\" $ - map stringToRTF authors)) - datetext = if date == "" - then "" - else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in - let spacer = if null (titletext ++ authorstext ++ datetext) - then "" - else rtfPar 0 0 AlignDefault "" in - headerText ++ titletext ++ authorstext ++ datetext ++ spacer - --- | Convert Pandoc block element to RTF. -blockToRTF :: Int -- ^ indent level - -> Alignment -- ^ alignment - -> Block -- ^ block to convert - -> String -blockToRTF _ _ Null = "" -blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst -blockToRTF indent _ (CodeBlock str) = - rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawHtml str) = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ - concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = - rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = - tableRowToRTF True indent aligns sizes headers ++ - concatMap (tableRowToRTF False indent aligns sizes) rows ++ - rtfPar indent 0 alignment (inlineListToRTF caption) - -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Float] -> [[Block]] -> String -tableRowToRTF header indent aligns sizes cols = - let columns = concat $ zipWith (tableItemToRTF indent) aligns cols - totalTwips = 6 * 1440 -- 6 inches - rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) - 0 sizes - cellDefs = map (\edge -> (if header - then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) - rightEdges - start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ - "\\trkeep\\intbl\n{\n" - end = "}\n\\intbl\\row}\n" - in start ++ columns ++ end - -tableItemToRTF :: Int -> Alignment -> [Block] -> String -tableItemToRTF indent alignment item = - let contents = concatMap (blockToRTF indent alignment) item - in "{\\intbl " ++ contents ++ "\\cell}\n" - --- | Ensure that there's the same amount of space after compact --- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str = - if isSuffixOf "\\par}\n" str - then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" - else str - --- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment -- ^ alignment - -> Int -- ^ indent level - -> String -- ^ list start marker - -> [Block] -- ^ list item (list of blocks) - -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = - let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list in - -- insert the list marker into the (processed) first block - let modFirst = case matchRegexAll (mkRegex "\\\\fi-?[0-9]+") first of - Just (before, matched, after, _) -> - before ++ "\\fi" ++ show (0 - listIncrement) ++ - " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" ++ after - Nothing -> first in - modFirst ++ concat rest - --- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment -- ^ alignment - -> Int -- ^ indent level - -> ([Inline],[Block]) -- ^ list item (list of blocks) - -> [Char] -definitionListItemToRTF alignment indent (label, items) = - let labelText = blockToRTF indent alignment (Plain label) - itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items - in labelText ++ itemsText - --- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline] -- ^ list of inlines to convert - -> String -inlineListToRTF lst = concatMap inlineToRTF lst - --- | Convert inline item to RTF. -inlineToRTF :: Inline -- ^ inline to convert - -> String -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "} " -inlineToRTF (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF Apostrophe = "\\u8217'" -inlineToRTF Ellipses = "\\u8230?" -inlineToRTF EmDash = "\\u8212-" -inlineToRTF EnDash = "\\u8211-" -inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "} " -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (TeX str) = latexToRTF str -inlineToRTF (HtmlInline str) = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF Space = " " -inlineToRTF (Link text (src, tit)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" -inlineToRTF (Image alternate (source, tit)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" |